{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 28, "global_step": 560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 3e-05, "loss": 3.5167, "step": 6 }, { "epoch": 0.21, "learning_rate": 3e-05, "loss": 2.7034, "step": 12 }, { "epoch": 0.32, "learning_rate": 3e-05, "loss": 2.5409, "step": 18 }, { "epoch": 0.43, "learning_rate": 3e-05, "loss": 2.4931, "step": 24 }, { "epoch": 0.5, "eval_accuracy": 0.5748751060189339, "eval_loss": 2.390355348587036, "eval_runtime": 25.7729, "eval_samples_per_second": 34.261, "eval_steps_per_second": 2.173, "step": 28 }, { "epoch": 0.5, "eval_bleu": 2.6290615444017718, "eval_exact_match": 0.0, "eval_prefix_exact_match": 0.0, "step": 28 }, { "epoch": 0.54, "learning_rate": 3e-05, "loss": 2.4944, "step": 30 }, { "epoch": 0.64, "learning_rate": 3e-05, "loss": 2.4832, "step": 36 }, { "epoch": 0.75, "learning_rate": 3e-05, "loss": 2.4336, "step": 42 }, { "epoch": 0.86, "learning_rate": 3e-05, "loss": 2.5101, "step": 48 }, { "epoch": 0.96, "learning_rate": 3e-05, "loss": 2.4699, "step": 54 }, { "epoch": 1.0, "eval_accuracy": 0.6049526162776446, "eval_loss": 2.101423740386963, "eval_runtime": 23.9781, "eval_samples_per_second": 36.825, "eval_steps_per_second": 2.335, "step": 56 }, { "epoch": 1.0, "eval_bleu": 2.731416805620766, "eval_exact_match": 0.0, "eval_prefix_exact_match": 0.0, "step": 56 }, { "epoch": 1.07, "learning_rate": 3e-05, "loss": 2.2172, "step": 60 }, { "epoch": 1.18, "learning_rate": 3e-05, "loss": 1.9462, "step": 66 }, { "epoch": 1.29, "learning_rate": 3e-05, "loss": 1.8014, "step": 72 }, { "epoch": 1.39, "learning_rate": 3e-05, "loss": 1.8689, "step": 78 }, { "epoch": 1.5, "learning_rate": 3e-05, "loss": 1.9006, "step": 84 }, { "epoch": 1.5, "eval_accuracy": 0.6392953969397911, "eval_loss": 1.8079485893249512, "eval_runtime": 24.4889, "eval_samples_per_second": 36.057, "eval_steps_per_second": 2.287, "step": 84 }, { "epoch": 1.5, "eval_bleu": 3.957789284907739, "eval_exact_match": 0.0, "eval_prefix_exact_match": 0.0, "step": 84 }, { "epoch": 1.61, "learning_rate": 3e-05, "loss": 1.8975, "step": 90 }, { "epoch": 1.71, "learning_rate": 3e-05, "loss": 1.8936, "step": 96 }, { "epoch": 1.82, "learning_rate": 3e-05, "loss": 1.8677, "step": 102 }, { "epoch": 1.93, "learning_rate": 3e-05, "loss": 1.9317, "step": 108 }, { "epoch": 2.0, "eval_accuracy": 0.6722017129718149, "eval_loss": 1.5510306358337402, "eval_runtime": 24.2083, "eval_samples_per_second": 36.475, "eval_steps_per_second": 2.313, "step": 112 }, { "epoch": 2.0, "eval_bleu": 4.403977979732999, "eval_exact_match": 0.0, "eval_prefix_exact_match": 0.0, "step": 112 }, { "epoch": 2.04, "learning_rate": 3e-05, "loss": 1.7801, "step": 114 }, { "epoch": 2.14, "learning_rate": 3e-05, "loss": 1.3602, "step": 120 }, { "epoch": 2.25, "learning_rate": 3e-05, "loss": 1.3667, "step": 126 }, { "epoch": 2.36, "learning_rate": 3e-05, "loss": 1.3497, "step": 132 }, { "epoch": 2.46, "learning_rate": 3e-05, "loss": 1.3984, "step": 138 }, { "epoch": 2.5, "eval_accuracy": 0.707495599907831, "eval_loss": 1.284958839416504, "eval_runtime": 24.875, "eval_samples_per_second": 35.498, "eval_steps_per_second": 2.251, "step": 140 }, { "epoch": 2.5, "eval_bleu": 7.1028643857654, "eval_exact_match": 0.0, "eval_prefix_exact_match": 0.0, "step": 140 }, { "epoch": 2.57, "learning_rate": 3e-05, "loss": 1.3143, "step": 144 }, { "epoch": 2.68, "learning_rate": 3e-05, "loss": 1.3887, "step": 150 }, { "epoch": 2.79, "learning_rate": 3e-05, "loss": 1.3563, "step": 156 }, { "epoch": 2.89, "learning_rate": 3e-05, "loss": 1.3749, "step": 162 }, { "epoch": 3.0, "learning_rate": 3e-05, "loss": 1.3662, "step": 168 }, { "epoch": 3.0, "eval_accuracy": 0.737386810999495, "eval_loss": 1.0899983644485474, "eval_runtime": 24.1441, "eval_samples_per_second": 36.572, "eval_steps_per_second": 2.319, "step": 168 }, { "epoch": 3.0, "eval_bleu": 8.181972004065782, "eval_exact_match": 0.0, "eval_prefix_exact_match": 0.0, "step": 168 }, { "epoch": 3.11, "learning_rate": 3e-05, "loss": 0.9654, "step": 174 }, { "epoch": 3.21, "learning_rate": 3e-05, "loss": 0.9704, "step": 180 }, { "epoch": 3.32, "learning_rate": 3e-05, "loss": 0.9269, "step": 186 }, { "epoch": 3.43, "learning_rate": 3e-05, "loss": 0.9041, "step": 192 }, { "epoch": 3.5, "eval_accuracy": 0.7670475994371804, "eval_loss": 0.890863835811615, "eval_runtime": 24.7388, "eval_samples_per_second": 35.693, "eval_steps_per_second": 2.264, "step": 196 }, { "epoch": 3.5, "eval_bleu": 12.34433922960259, "eval_exact_match": 0.0022650056625141564, "eval_prefix_exact_match": 0.0022650056625141564, "step": 196 }, { "epoch": 3.54, "learning_rate": 3e-05, "loss": 0.9277, "step": 198 }, { "epoch": 3.64, "learning_rate": 3e-05, "loss": 0.8827, "step": 204 }, { "epoch": 3.75, "learning_rate": 3e-05, "loss": 0.99, "step": 210 }, { "epoch": 3.86, "learning_rate": 3e-05, "loss": 0.9083, "step": 216 }, { "epoch": 3.96, "learning_rate": 3e-05, "loss": 0.9056, "step": 222 }, { "epoch": 4.0, "eval_accuracy": 0.7902908718310757, "eval_loss": 0.7502285242080688, "eval_runtime": 24.8951, "eval_samples_per_second": 35.469, "eval_steps_per_second": 2.249, "step": 224 }, { "epoch": 4.0, "eval_bleu": 14.357948484733997, "eval_exact_match": 0.0033975084937712344, "eval_prefix_exact_match": 0.0033975084937712344, "step": 224 }, { "epoch": 4.07, "learning_rate": 3e-05, "loss": 0.7462, "step": 228 }, { "epoch": 4.18, "learning_rate": 3e-05, "loss": 0.583, "step": 234 }, { "epoch": 4.29, "learning_rate": 3e-05, "loss": 0.5995, "step": 240 }, { "epoch": 4.39, "learning_rate": 3e-05, "loss": 0.6094, "step": 246 }, { "epoch": 4.5, "learning_rate": 3e-05, "loss": 0.6131, "step": 252 }, { "epoch": 4.5, "eval_accuracy": 0.8066557828732234, "eval_loss": 0.6304216980934143, "eval_runtime": 25.7927, "eval_samples_per_second": 34.234, "eval_steps_per_second": 2.171, "step": 252 }, { "epoch": 4.5, "eval_bleu": 21.42036295397234, "eval_exact_match": 0.010192525481313703, "eval_prefix_exact_match": 0.010192525481313703, "step": 252 }, { "epoch": 4.61, "learning_rate": 3e-05, "loss": 0.6447, "step": 258 }, { "epoch": 4.71, "learning_rate": 3e-05, "loss": 0.6134, "step": 264 }, { "epoch": 4.82, "learning_rate": 3e-05, "loss": 0.611, "step": 270 }, { "epoch": 4.93, "learning_rate": 3e-05, "loss": 0.6101, "step": 276 }, { "epoch": 5.0, "eval_accuracy": 0.82152049535968, "eval_loss": 0.5429415106773376, "eval_runtime": 24.6601, "eval_samples_per_second": 35.807, "eval_steps_per_second": 2.271, "step": 280 }, { "epoch": 5.0, "eval_bleu": 26.261017838942095, "eval_exact_match": 0.02944507361268403, "eval_prefix_exact_match": 0.02944507361268403, "step": 280 }, { "epoch": 5.04, "learning_rate": 3e-05, "loss": 0.5416, "step": 282 }, { "epoch": 5.14, "learning_rate": 3e-05, "loss": 0.3793, "step": 288 }, { "epoch": 5.25, "learning_rate": 3e-05, "loss": 0.3858, "step": 294 }, { "epoch": 5.36, "learning_rate": 3e-05, "loss": 0.3977, "step": 300 }, { "epoch": 5.46, "learning_rate": 3e-05, "loss": 0.3772, "step": 306 }, { "epoch": 5.5, "eval_accuracy": 0.8287371367779068, "eval_loss": 0.48723191022872925, "eval_runtime": 25.4066, "eval_samples_per_second": 34.755, "eval_steps_per_second": 2.204, "step": 308 }, { "epoch": 5.5, "eval_bleu": 38.82587580024761, "eval_exact_match": 0.10419026047565119, "eval_prefix_exact_match": 0.10419026047565119, "step": 308 }, { "epoch": 5.57, "learning_rate": 3e-05, "loss": 0.3898, "step": 312 }, { "epoch": 5.68, "learning_rate": 3e-05, "loss": 0.3916, "step": 318 }, { "epoch": 5.79, "learning_rate": 3e-05, "loss": 0.3948, "step": 324 }, { "epoch": 5.89, "learning_rate": 3e-05, "loss": 0.4125, "step": 330 }, { "epoch": 6.0, "learning_rate": 3e-05, "loss": 0.4265, "step": 336 }, { "epoch": 6.0, "eval_accuracy": 0.8356841346648821, "eval_loss": 0.44368451833724976, "eval_runtime": 25.151, "eval_samples_per_second": 35.108, "eval_steps_per_second": 2.227, "step": 336 }, { "epoch": 6.0, "eval_bleu": 51.490761143615906, "eval_exact_match": 0.16987542468856173, "eval_prefix_exact_match": 0.16987542468856173, "step": 336 }, { "epoch": 6.11, "learning_rate": 3e-05, "loss": 0.2554, "step": 342 }, { "epoch": 6.21, "learning_rate": 3e-05, "loss": 0.2562, "step": 348 }, { "epoch": 6.32, "learning_rate": 3e-05, "loss": 0.2531, "step": 354 }, { "epoch": 6.43, "learning_rate": 3e-05, "loss": 0.2552, "step": 360 }, { "epoch": 6.5, "eval_accuracy": 0.8388953439916067, "eval_loss": 0.4225637912750244, "eval_runtime": 25.2976, "eval_samples_per_second": 34.905, "eval_steps_per_second": 2.214, "step": 364 }, { "epoch": 6.5, "eval_bleu": 66.56501737677819, "eval_exact_match": 0.3012457531143828, "eval_prefix_exact_match": 0.3012457531143828, "step": 364 }, { "epoch": 6.54, "learning_rate": 3e-05, "loss": 0.2536, "step": 366 }, { "epoch": 6.64, "learning_rate": 3e-05, "loss": 0.2617, "step": 372 }, { "epoch": 6.75, "learning_rate": 3e-05, "loss": 0.2763, "step": 378 }, { "epoch": 6.86, "learning_rate": 3e-05, "loss": 0.2614, "step": 384 }, { "epoch": 6.96, "learning_rate": 3e-05, "loss": 0.2875, "step": 390 }, { "epoch": 7.0, "eval_accuracy": 0.8417535654228746, "eval_loss": 0.4019472599029541, "eval_runtime": 24.4914, "eval_samples_per_second": 36.053, "eval_steps_per_second": 2.287, "step": 392 }, { "epoch": 7.0, "eval_bleu": 73.8359090816191, "eval_exact_match": 0.3941109852774632, "eval_prefix_exact_match": 0.3941109852774632, "step": 392 }, { "epoch": 7.07, "learning_rate": 3e-05, "loss": 0.2191, "step": 396 }, { "epoch": 7.18, "learning_rate": 3e-05, "loss": 0.178, "step": 402 }, { "epoch": 7.29, "learning_rate": 3e-05, "loss": 0.184, "step": 408 }, { "epoch": 7.39, "learning_rate": 3e-05, "loss": 0.1753, "step": 414 }, { "epoch": 7.5, "learning_rate": 3e-05, "loss": 0.1874, "step": 420 }, { "epoch": 7.5, "eval_accuracy": 0.8429596073990185, "eval_loss": 0.39652687311172485, "eval_runtime": 25.3956, "eval_samples_per_second": 34.77, "eval_steps_per_second": 2.205, "step": 420 }, { "epoch": 7.5, "eval_bleu": 82.2514512966405, "eval_exact_match": 0.5209513023782559, "eval_prefix_exact_match": 0.5209513023782559, "step": 420 }, { "epoch": 7.61, "learning_rate": 3e-05, "loss": 0.1977, "step": 426 }, { "epoch": 7.71, "learning_rate": 3e-05, "loss": 0.1922, "step": 432 }, { "epoch": 7.82, "learning_rate": 3e-05, "loss": 0.1977, "step": 438 }, { "epoch": 7.93, "learning_rate": 3e-05, "loss": 0.1958, "step": 444 }, { "epoch": 8.0, "eval_accuracy": 0.844146038936526, "eval_loss": 0.3811873495578766, "eval_runtime": 25.8995, "eval_samples_per_second": 34.093, "eval_steps_per_second": 2.162, "step": 448 }, { "epoch": 8.0, "eval_bleu": 86.30069285909471, "eval_exact_match": 0.6002265005662514, "eval_prefix_exact_match": 0.6002265005662514, "step": 448 }, { "epoch": 8.04, "learning_rate": 3e-05, "loss": 0.1761, "step": 450 }, { "epoch": 8.14, "learning_rate": 3e-05, "loss": 0.1388, "step": 456 }, { "epoch": 8.25, "learning_rate": 3e-05, "loss": 0.1438, "step": 462 }, { "epoch": 8.36, "learning_rate": 3e-05, "loss": 0.143, "step": 468 }, { "epoch": 8.46, "learning_rate": 3e-05, "loss": 0.1443, "step": 474 }, { "epoch": 8.5, "eval_accuracy": 0.8449598721399401, "eval_loss": 0.38515910506248474, "eval_runtime": 25.4148, "eval_samples_per_second": 34.744, "eval_steps_per_second": 2.203, "step": 476 }, { "epoch": 8.5, "eval_bleu": 91.19128906028183, "eval_exact_match": 0.6772366930917327, "eval_prefix_exact_match": 0.6772366930917327, "step": 476 }, { "epoch": 8.57, "learning_rate": 3e-05, "loss": 0.1387, "step": 480 }, { "epoch": 8.68, "learning_rate": 3e-05, "loss": 0.146, "step": 486 }, { "epoch": 8.79, "learning_rate": 3e-05, "loss": 0.1487, "step": 492 }, { "epoch": 8.89, "learning_rate": 3e-05, "loss": 0.1477, "step": 498 }, { "epoch": 9.0, "learning_rate": 3e-05, "loss": 0.1535, "step": 504 }, { "epoch": 9.0, "eval_accuracy": 0.8455825035666485, "eval_loss": 0.3791412115097046, "eval_runtime": 25.7361, "eval_samples_per_second": 34.31, "eval_steps_per_second": 2.176, "step": 504 }, { "epoch": 9.0, "eval_bleu": 93.80439056513916, "eval_exact_match": 0.7327293318233296, "eval_prefix_exact_match": 0.7327293318233296, "step": 504 }, { "epoch": 9.11, "learning_rate": 3e-05, "loss": 0.1186, "step": 510 }, { "epoch": 9.21, "learning_rate": 3e-05, "loss": 0.1124, "step": 516 }, { "epoch": 9.32, "learning_rate": 3e-05, "loss": 0.1165, "step": 522 }, { "epoch": 9.43, "learning_rate": 3e-05, "loss": 0.1236, "step": 528 }, { "epoch": 9.5, "eval_accuracy": 0.8456070166149441, "eval_loss": 0.38488179445266724, "eval_runtime": 25.6458, "eval_samples_per_second": 34.431, "eval_steps_per_second": 2.184, "step": 532 }, { "epoch": 9.5, "eval_bleu": 95.44029675299471, "eval_exact_match": 0.753114382785957, "eval_prefix_exact_match": 0.753114382785957, "step": 532 }, { "epoch": 9.54, "learning_rate": 3e-05, "loss": 0.1191, "step": 534 }, { "epoch": 9.64, "learning_rate": 3e-05, "loss": 0.124, "step": 540 }, { "epoch": 9.75, "learning_rate": 3e-05, "loss": 0.1172, "step": 546 }, { "epoch": 9.86, "learning_rate": 3e-05, "loss": 0.1247, "step": 552 }, { "epoch": 9.96, "learning_rate": 3e-05, "loss": 0.1221, "step": 558 }, { "epoch": 10.0, "eval_accuracy": 0.8462492584802891, "eval_loss": 0.3696680963039398, "eval_runtime": 26.3353, "eval_samples_per_second": 33.529, "eval_steps_per_second": 2.126, "step": 560 }, { "epoch": 10.0, "eval_bleu": 95.90755211395843, "eval_exact_match": 0.8131370328425821, "eval_prefix_exact_match": 0.8131370328425821, "step": 560 }, { "epoch": 10.0, "step": 560, "total_flos": 1.36907799224832e+16, "train_loss": 0.8552157643118075, "train_runtime": 5673.7906, "train_samples_per_second": 1.556, "train_steps_per_second": 0.099 } ], "logging_steps": 6, "max_steps": 560, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.36907799224832e+16, "trial_name": null, "trial_params": null }