diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,28837 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 2215, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "contrastive_loss": 1.0726, + "epoch": 0.002257336343115124, + "grad_norm": 74.57825469970703, + "learning_rate": 2.0000000000000002e-07, + "lm_loss": 18.0083, + "loss": 4.4876, + "step": 1, + "text_contrastive_loss": 3.2285, + "train_positive_log_prob": -271.7488, + "train_positive_token_accuracy": 0.001, + "train_positive_token_prob": 0.0012 + }, + { + "contrastive_loss": 1.1892, + "epoch": 0.004514672686230248, + "grad_norm": 88.64407348632812, + "learning_rate": 4.0000000000000003e-07, + "lm_loss": 18.387, + "loss": 4.8065, + "step": 2, + "text_contrastive_loss": 3.5573, + "train_positive_log_prob": -264.0128, + "train_positive_token_accuracy": 0.0, + "train_positive_token_prob": 0.0006 + }, + { + "contrastive_loss": 0.7495, + "epoch": 0.006772009029345372, + "grad_norm": 67.88338470458984, + "learning_rate": 6.000000000000001e-07, + "lm_loss": 17.8302, + "loss": 4.1554, + "step": 3, + "text_contrastive_loss": 3.2457, + "train_positive_log_prob": -269.9582, + "train_positive_token_accuracy": 0.0015, + "train_positive_token_prob": 0.0015 + }, + { + "contrastive_loss": 0.7506, + "epoch": 0.009029345372460496, + "grad_norm": 58.47990798950195, + "learning_rate": 8.000000000000001e-07, + "lm_loss": 18.1632, + "loss": 3.9869, + "step": 4, + "text_contrastive_loss": 2.84, + "train_positive_log_prob": -268.8983, + "train_positive_token_accuracy": 0.0016, + "train_positive_token_prob": 0.0014 + }, + { + "contrastive_loss": 0.8493, + "epoch": 0.011286681715575621, + "grad_norm": 61.88850784301758, + "learning_rate": 1.0000000000000002e-06, + "lm_loss": 17.5826, + "loss": 4.1875, + "step": 5, + "text_contrastive_loss": 3.1597, + "train_positive_log_prob": -261.6127, + "train_positive_token_accuracy": 0.0011, + "train_positive_token_prob": 0.0012 + }, + { + "contrastive_loss": 0.9867, + "epoch": 0.013544018058690745, + "grad_norm": 58.25684356689453, + "learning_rate": 1.2000000000000002e-06, + "lm_loss": 17.7195, + "loss": 4.2869, + "step": 6, + "text_contrastive_loss": 3.0565, + "train_positive_log_prob": -260.1736, + "train_positive_token_accuracy": 0.0016, + "train_positive_token_prob": 0.0014 + }, + { + "contrastive_loss": 0.859, + "epoch": 0.01580135440180587, + "grad_norm": 57.55093765258789, + "learning_rate": 1.4000000000000001e-06, + "lm_loss": 17.5588, + "loss": 3.7114, + "step": 7, + "text_contrastive_loss": 2.1931, + "train_positive_log_prob": -257.8792, + "train_positive_token_accuracy": 0.0005, + "train_positive_token_prob": 0.001 + }, + { + "contrastive_loss": 0.7611, + "epoch": 0.01805869074492099, + "grad_norm": 53.48955154418945, + "learning_rate": 1.6000000000000001e-06, + "lm_loss": 17.7249, + "loss": 3.8475, + "step": 8, + "text_contrastive_loss": 2.6278, + "train_positive_log_prob": -260.8203, + "train_positive_token_accuracy": 0.0019, + "train_positive_token_prob": 0.0018 + }, + { + "contrastive_loss": 0.8412, + "epoch": 0.020316027088036117, + "grad_norm": 54.6489372253418, + "learning_rate": 1.8000000000000001e-06, + "lm_loss": 17.1428, + "loss": 3.848, + "step": 9, + "text_contrastive_loss": 2.585, + "train_positive_log_prob": -257.544, + "train_positive_token_accuracy": 0.0026, + "train_positive_token_prob": 0.0019 + }, + { + "contrastive_loss": 0.8755, + "epoch": 0.022573363431151242, + "grad_norm": 48.74787139892578, + "learning_rate": 2.0000000000000003e-06, + "lm_loss": 17.2321, + "loss": 3.6728, + "step": 10, + "text_contrastive_loss": 2.1481, + "train_positive_log_prob": -252.4299, + "train_positive_token_accuracy": 0.0013, + "train_positive_token_prob": 0.0016 + }, + { + "contrastive_loss": 0.8073, + "epoch": 0.024830699774266364, + "grad_norm": 44.24809646606445, + "learning_rate": 2.2e-06, + "lm_loss": 16.6762, + "loss": 3.3572, + "step": 11, + "text_contrastive_loss": 1.7646, + "train_positive_log_prob": -240.8988, + "train_positive_token_accuracy": 0.0016, + "train_positive_token_prob": 0.0015 + }, + { + "contrastive_loss": 0.8814, + "epoch": 0.02708803611738149, + "grad_norm": 45.11954116821289, + "learning_rate": 2.4000000000000003e-06, + "lm_loss": 16.508, + "loss": 3.547, + "step": 12, + "text_contrastive_loss": 2.0296, + "train_positive_log_prob": -246.7091, + "train_positive_token_accuracy": 0.0026, + "train_positive_token_prob": 0.0022 + }, + { + "contrastive_loss": 0.7926, + "epoch": 0.029345372460496615, + "grad_norm": 48.285457611083984, + "learning_rate": 2.6e-06, + "lm_loss": 16.6079, + "loss": 3.784, + "step": 13, + "text_contrastive_loss": 2.6612, + "train_positive_log_prob": -247.3651, + "train_positive_token_accuracy": 0.0029, + "train_positive_token_prob": 0.0017 + }, + { + "contrastive_loss": 0.5732, + "epoch": 0.03160270880361174, + "grad_norm": 43.60667037963867, + "learning_rate": 2.8000000000000003e-06, + "lm_loss": 16.0875, + "loss": 3.1918, + "step": 14, + "text_contrastive_loss": 2.0198, + "train_positive_log_prob": -243.1328, + "train_positive_token_accuracy": 0.0026, + "train_positive_token_prob": 0.0022 + }, + { + "contrastive_loss": 0.8066, + "epoch": 0.033860045146726865, + "grad_norm": 40.34406661987305, + "learning_rate": 3e-06, + "lm_loss": 15.6112, + "loss": 3.4349, + "step": 15, + "text_contrastive_loss": 2.1343, + "train_positive_log_prob": -226.1101, + "train_positive_token_accuracy": 0.0016, + "train_positive_token_prob": 0.0017 + }, + { + "contrastive_loss": 0.8447, + "epoch": 0.03611738148984198, + "grad_norm": 39.11477279663086, + "learning_rate": 3.2000000000000003e-06, + "lm_loss": 15.356, + "loss": 3.4394, + "step": 16, + "text_contrastive_loss": 2.1182, + "train_positive_log_prob": -227.5833, + "train_positive_token_accuracy": 0.0034, + "train_positive_token_prob": 0.0025 + }, + { + "contrastive_loss": 0.8276, + "epoch": 0.03837471783295711, + "grad_norm": 37.55423355102539, + "learning_rate": 3.4000000000000005e-06, + "lm_loss": 14.9893, + "loss": 3.3673, + "step": 17, + "text_contrastive_loss": 2.0815, + "train_positive_log_prob": -221.5569, + "train_positive_token_accuracy": 0.0032, + "train_positive_token_prob": 0.0025 + }, + { + "contrastive_loss": 0.8842, + "epoch": 0.040632054176072234, + "grad_norm": 36.379981994628906, + "learning_rate": 3.6000000000000003e-06, + "lm_loss": 14.8363, + "loss": 3.3558, + "step": 18, + "text_contrastive_loss": 1.9759, + "train_positive_log_prob": -225.7377, + "train_positive_token_accuracy": 0.0039, + "train_positive_token_prob": 0.0031 + }, + { + "contrastive_loss": 0.7762, + "epoch": 0.04288939051918736, + "grad_norm": 33.81783676147461, + "learning_rate": 3.8000000000000005e-06, + "lm_loss": 14.2563, + "loss": 3.0256, + "step": 19, + "text_contrastive_loss": 1.6475, + "train_positive_log_prob": -206.1565, + "train_positive_token_accuracy": 0.0022, + "train_positive_token_prob": 0.0024 + }, + { + "contrastive_loss": 0.8298, + "epoch": 0.045146726862302484, + "grad_norm": 30.58000946044922, + "learning_rate": 4.000000000000001e-06, + "lm_loss": 13.989, + "loss": 2.9394, + "step": 20, + "text_contrastive_loss": 1.4214, + "train_positive_log_prob": -204.9137, + "train_positive_token_accuracy": 0.004, + "train_positive_token_prob": 0.0031 + }, + { + "contrastive_loss": 0.8009, + "epoch": 0.04740406320541761, + "grad_norm": 27.405580520629883, + "learning_rate": 4.2000000000000004e-06, + "lm_loss": 13.4975, + "loss": 2.8919, + "step": 21, + "text_contrastive_loss": 1.4826, + "train_positive_log_prob": -204.0848, + "train_positive_token_accuracy": 0.0059, + "train_positive_token_prob": 0.0037 + }, + { + "contrastive_loss": 0.832, + "epoch": 0.04966139954853273, + "grad_norm": 27.337148666381836, + "learning_rate": 4.4e-06, + "lm_loss": 13.43, + "loss": 2.9708, + "step": 22, + "text_contrastive_loss": 1.5916, + "train_positive_log_prob": -196.054, + "train_positive_token_accuracy": 0.0062, + "train_positive_token_prob": 0.0039 + }, + { + "contrastive_loss": 0.8391, + "epoch": 0.05191873589164785, + "grad_norm": 26.402467727661133, + "learning_rate": 4.600000000000001e-06, + "lm_loss": 12.9879, + "loss": 2.8148, + "step": 23, + "text_contrastive_loss": 1.3538, + "train_positive_log_prob": -191.9732, + "train_positive_token_accuracy": 0.0055, + "train_positive_token_prob": 0.0039 + }, + { + "contrastive_loss": 0.7746, + "epoch": 0.05417607223476298, + "grad_norm": 24.108522415161133, + "learning_rate": 4.800000000000001e-06, + "lm_loss": 12.7752, + "loss": 2.7673, + "step": 24, + "text_contrastive_loss": 1.4303, + "train_positive_log_prob": -187.4394, + "train_positive_token_accuracy": 0.0067, + "train_positive_token_prob": 0.0048 + }, + { + "contrastive_loss": 0.7068, + "epoch": 0.056433408577878104, + "grad_norm": 23.287439346313477, + "learning_rate": 5e-06, + "lm_loss": 12.29, + "loss": 2.5606, + "step": 25, + "text_contrastive_loss": 1.2495, + "train_positive_log_prob": -182.7146, + "train_positive_token_accuracy": 0.0105, + "train_positive_token_prob": 0.006 + }, + { + "contrastive_loss": 0.7462, + "epoch": 0.05869074492099323, + "grad_norm": 22.84650421142578, + "learning_rate": 5.2e-06, + "lm_loss": 11.8529, + "loss": 2.5556, + "step": 26, + "text_contrastive_loss": 1.2482, + "train_positive_log_prob": -177.9784, + "train_positive_token_accuracy": 0.0203, + "train_positive_token_prob": 0.0087 + }, + { + "contrastive_loss": 0.8627, + "epoch": 0.060948081264108354, + "grad_norm": 22.748159408569336, + "learning_rate": 5.400000000000001e-06, + "lm_loss": 11.6889, + "loss": 2.7565, + "step": 27, + "text_contrastive_loss": 1.4498, + "train_positive_log_prob": -176.018, + "train_positive_token_accuracy": 0.0213, + "train_positive_token_prob": 0.0091 + }, + { + "contrastive_loss": 0.8588, + "epoch": 0.06320541760722348, + "grad_norm": 22.89902114868164, + "learning_rate": 5.600000000000001e-06, + "lm_loss": 11.4813, + "loss": 2.6633, + "step": 28, + "text_contrastive_loss": 1.3128, + "train_positive_log_prob": -169.8945, + "train_positive_token_accuracy": 0.0251, + "train_positive_token_prob": 0.0106 + }, + { + "contrastive_loss": 0.901, + "epoch": 0.0654627539503386, + "grad_norm": 24.330204010009766, + "learning_rate": 5.8e-06, + "lm_loss": 11.0336, + "loss": 2.5903, + "step": 29, + "text_contrastive_loss": 1.1719, + "train_positive_log_prob": -158.9959, + "train_positive_token_accuracy": 0.0339, + "train_positive_token_prob": 0.013 + }, + { + "contrastive_loss": 0.8079, + "epoch": 0.06772009029345373, + "grad_norm": 22.692087173461914, + "learning_rate": 6e-06, + "lm_loss": 10.7708, + "loss": 2.412, + "step": 30, + "text_contrastive_loss": 1.054, + "train_positive_log_prob": -158.7729, + "train_positive_token_accuracy": 0.0366, + "train_positive_token_prob": 0.0149 + }, + { + "contrastive_loss": 0.7005, + "epoch": 0.06997742663656885, + "grad_norm": 18.64335060119629, + "learning_rate": 6.200000000000001e-06, + "lm_loss": 10.4344, + "loss": 2.2566, + "step": 31, + "text_contrastive_loss": 1.0255, + "train_positive_log_prob": -154.6365, + "train_positive_token_accuracy": 0.044, + "train_positive_token_prob": 0.0171 + }, + { + "contrastive_loss": 0.8936, + "epoch": 0.07223476297968397, + "grad_norm": 20.944631576538086, + "learning_rate": 6.4000000000000006e-06, + "lm_loss": 10.1599, + "loss": 2.493, + "step": 32, + "text_contrastive_loss": 1.1669, + "train_positive_log_prob": -147.9612, + "train_positive_token_accuracy": 0.0496, + "train_positive_token_prob": 0.0181 + }, + { + "contrastive_loss": 0.7817, + "epoch": 0.0744920993227991, + "grad_norm": 20.710676193237305, + "learning_rate": 6.600000000000001e-06, + "lm_loss": 10.1113, + "loss": 2.5057, + "step": 33, + "text_contrastive_loss": 1.4257, + "train_positive_log_prob": -150.1212, + "train_positive_token_accuracy": 0.0587, + "train_positive_token_prob": 0.0218 + }, + { + "contrastive_loss": 0.7907, + "epoch": 0.07674943566591422, + "grad_norm": 21.150211334228516, + "learning_rate": 6.800000000000001e-06, + "lm_loss": 9.7874, + "loss": 2.3084, + "step": 34, + "text_contrastive_loss": 1.078, + "train_positive_log_prob": -145.2369, + "train_positive_token_accuracy": 0.0648, + "train_positive_token_prob": 0.0245 + }, + { + "contrastive_loss": 0.7368, + "epoch": 0.07900677200902935, + "grad_norm": 20.44724464416504, + "learning_rate": 7e-06, + "lm_loss": 9.7502, + "loss": 2.2834, + "step": 35, + "text_contrastive_loss": 1.1431, + "train_positive_log_prob": -147.2763, + "train_positive_token_accuracy": 0.0703, + "train_positive_token_prob": 0.0263 + }, + { + "contrastive_loss": 0.8581, + "epoch": 0.08126410835214447, + "grad_norm": 22.148113250732422, + "learning_rate": 7.2000000000000005e-06, + "lm_loss": 9.4479, + "loss": 2.3117, + "step": 36, + "text_contrastive_loss": 1.0177, + "train_positive_log_prob": -143.6716, + "train_positive_token_accuracy": 0.0753, + "train_positive_token_prob": 0.025 + }, + { + "contrastive_loss": 0.7491, + "epoch": 0.0835214446952596, + "grad_norm": 19.81952667236328, + "learning_rate": 7.4e-06, + "lm_loss": 9.3021, + "loss": 2.2459, + "step": 37, + "text_contrastive_loss": 1.1333, + "train_positive_log_prob": -134.7323, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0262 + }, + { + "contrastive_loss": 0.7074, + "epoch": 0.08577878103837472, + "grad_norm": 19.979900360107422, + "learning_rate": 7.600000000000001e-06, + "lm_loss": 9.1512, + "loss": 2.1831, + "step": 38, + "text_contrastive_loss": 1.1212, + "train_positive_log_prob": -140.7742, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0265 + }, + { + "contrastive_loss": 0.6507, + "epoch": 0.08803611738148984, + "grad_norm": 20.55365562438965, + "learning_rate": 7.800000000000002e-06, + "lm_loss": 9.0417, + "loss": 2.0466, + "step": 39, + "text_contrastive_loss": 0.9834, + "train_positive_log_prob": -134.1794, + "train_positive_token_accuracy": 0.0853, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.8164, + "epoch": 0.09029345372460497, + "grad_norm": 21.503141403198242, + "learning_rate": 8.000000000000001e-06, + "lm_loss": 8.9009, + "loss": 2.2032, + "step": 40, + "text_contrastive_loss": 0.9934, + "train_positive_log_prob": -133.0544, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0254 + }, + { + "contrastive_loss": 0.6318, + "epoch": 0.09255079006772009, + "grad_norm": 17.917552947998047, + "learning_rate": 8.2e-06, + "lm_loss": 8.7266, + "loss": 2.0771, + "step": 41, + "text_contrastive_loss": 1.1453, + "train_positive_log_prob": -129.5024, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0237 + }, + { + "contrastive_loss": 0.7027, + "epoch": 0.09480812641083522, + "grad_norm": 21.51190757751465, + "learning_rate": 8.400000000000001e-06, + "lm_loss": 8.6056, + "loss": 2.1076, + "step": 42, + "text_contrastive_loss": 1.0887, + "train_positive_log_prob": -128.8136, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.0242 + }, + { + "contrastive_loss": 0.7258, + "epoch": 0.09706546275395034, + "grad_norm": 22.68890380859375, + "learning_rate": 8.6e-06, + "lm_loss": 8.5734, + "loss": 2.1692, + "step": 43, + "text_contrastive_loss": 1.1719, + "train_positive_log_prob": -125.1502, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0231 + }, + { + "contrastive_loss": 0.7467, + "epoch": 0.09932279909706546, + "grad_norm": 21.590871810913086, + "learning_rate": 8.8e-06, + "lm_loss": 8.5615, + "loss": 2.0924, + "step": 44, + "text_contrastive_loss": 0.979, + "train_positive_log_prob": -124.7088, + "train_positive_token_accuracy": 0.0732, + "train_positive_token_prob": 0.0199 + }, + { + "contrastive_loss": 0.6887, + "epoch": 0.10158013544018059, + "grad_norm": 20.671302795410156, + "learning_rate": 9e-06, + "lm_loss": 8.3233, + "loss": 2.053, + "step": 45, + "text_contrastive_loss": 1.064, + "train_positive_log_prob": -122.5423, + "train_positive_token_accuracy": 0.0692, + "train_positive_token_prob": 0.0198 + }, + { + "contrastive_loss": 0.7273, + "epoch": 0.1038374717832957, + "grad_norm": 21.09688949584961, + "learning_rate": 9.200000000000002e-06, + "lm_loss": 8.4285, + "loss": 2.0631, + "step": 46, + "text_contrastive_loss": 0.9858, + "train_positive_log_prob": -127.5465, + "train_positive_token_accuracy": 0.0671, + "train_positive_token_prob": 0.0185 + }, + { + "contrastive_loss": 0.7641, + "epoch": 0.10609480812641084, + "grad_norm": 19.809520721435547, + "learning_rate": 9.4e-06, + "lm_loss": 8.3405, + "loss": 2.2092, + "step": 47, + "text_contrastive_loss": 1.222, + "train_positive_log_prob": -123.2246, + "train_positive_token_accuracy": 0.0687, + "train_positive_token_prob": 0.0192 + }, + { + "contrastive_loss": 0.6507, + "epoch": 0.10835214446952596, + "grad_norm": 18.231290817260742, + "learning_rate": 9.600000000000001e-06, + "lm_loss": 8.1848, + "loss": 2.1018, + "step": 48, + "text_contrastive_loss": 1.2653, + "train_positive_log_prob": -117.7555, + "train_positive_token_accuracy": 0.0676, + "train_positive_token_prob": 0.0178 + }, + { + "contrastive_loss": 0.6704, + "epoch": 0.11060948081264109, + "grad_norm": 20.48638153076172, + "learning_rate": 9.800000000000001e-06, + "lm_loss": 8.0175, + "loss": 1.9764, + "step": 49, + "text_contrastive_loss": 1.0083, + "train_positive_log_prob": -117.9512, + "train_positive_token_accuracy": 0.0696, + "train_positive_token_prob": 0.0172 + }, + { + "contrastive_loss": 0.7149, + "epoch": 0.11286681715575621, + "grad_norm": 19.319011688232422, + "learning_rate": 1e-05, + "lm_loss": 8.0847, + "loss": 1.9311, + "step": 50, + "text_contrastive_loss": 0.8155, + "train_positive_log_prob": -121.743, + "train_positive_token_accuracy": 0.062, + "train_positive_token_prob": 0.0167 + }, + { + "contrastive_loss": 0.6685, + "epoch": 0.11512415349887133, + "grad_norm": 19.215866088867188, + "learning_rate": 9.999994735903083e-06, + "lm_loss": 8.1015, + "loss": 2.0553, + "step": 51, + "text_contrastive_loss": 1.1531, + "train_positive_log_prob": -119.1461, + "train_positive_token_accuracy": 0.0606, + "train_positive_token_prob": 0.0151 + }, + { + "contrastive_loss": 0.7629, + "epoch": 0.11738148984198646, + "grad_norm": 21.606794357299805, + "learning_rate": 9.999978943623417e-06, + "lm_loss": 7.968, + "loss": 2.0153, + "step": 52, + "text_contrastive_loss": 0.9113, + "train_positive_log_prob": -116.2509, + "train_positive_token_accuracy": 0.0645, + "train_positive_token_prob": 0.016 + }, + { + "contrastive_loss": 0.645, + "epoch": 0.11963882618510158, + "grad_norm": 18.192424774169922, + "learning_rate": 9.999952623194252e-06, + "lm_loss": 7.9452, + "loss": 1.8982, + "step": 53, + "text_contrastive_loss": 0.9172, + "train_positive_log_prob": -117.6284, + "train_positive_token_accuracy": 0.0588, + "train_positive_token_prob": 0.0154 + }, + { + "contrastive_loss": 0.6355, + "epoch": 0.12189616252821671, + "grad_norm": 20.11510467529297, + "learning_rate": 9.999915774671009e-06, + "lm_loss": 7.8312, + "loss": 1.9191, + "step": 54, + "text_contrastive_loss": 1.0009, + "train_positive_log_prob": -114.7155, + "train_positive_token_accuracy": 0.0635, + "train_positive_token_prob": 0.0163 + }, + { + "contrastive_loss": 0.7332, + "epoch": 0.12415349887133183, + "grad_norm": 22.85787582397461, + "learning_rate": 9.999868398131282e-06, + "lm_loss": 7.9339, + "loss": 2.0805, + "step": 55, + "text_contrastive_loss": 1.1079, + "train_positive_log_prob": -119.9398, + "train_positive_token_accuracy": 0.0661, + "train_positive_token_prob": 0.0157 + }, + { + "contrastive_loss": 0.6018, + "epoch": 0.12641083521444696, + "grad_norm": 18.88653564453125, + "learning_rate": 9.999810493674826e-06, + "lm_loss": 7.8476, + "loss": 1.8545, + "step": 56, + "text_contrastive_loss": 0.9358, + "train_positive_log_prob": -114.5263, + "train_positive_token_accuracy": 0.0701, + "train_positive_token_prob": 0.016 + }, + { + "contrastive_loss": 0.7215, + "epoch": 0.12866817155756208, + "grad_norm": 20.430906295776367, + "learning_rate": 9.999742061423567e-06, + "lm_loss": 7.7903, + "loss": 1.9937, + "step": 57, + "text_contrastive_loss": 0.9862, + "train_positive_log_prob": -115.8193, + "train_positive_token_accuracy": 0.0633, + "train_positive_token_prob": 0.0157 + }, + { + "contrastive_loss": 0.6411, + "epoch": 0.1309255079006772, + "grad_norm": 18.110414505004883, + "learning_rate": 9.999663101521599e-06, + "lm_loss": 7.763, + "loss": 1.951, + "step": 58, + "text_contrastive_loss": 1.0672, + "train_positive_log_prob": -114.7438, + "train_positive_token_accuracy": 0.0671, + "train_positive_token_prob": 0.0158 + }, + { + "contrastive_loss": 0.6901, + "epoch": 0.13318284424379231, + "grad_norm": 19.704946517944336, + "learning_rate": 9.999573614135183e-06, + "lm_loss": 7.6791, + "loss": 1.9838, + "step": 59, + "text_contrastive_loss": 1.0515, + "train_positive_log_prob": -114.1909, + "train_positive_token_accuracy": 0.0612, + "train_positive_token_prob": 0.0158 + }, + { + "contrastive_loss": 0.6207, + "epoch": 0.13544018058690746, + "grad_norm": 18.407245635986328, + "learning_rate": 9.999473599452746e-06, + "lm_loss": 7.6776, + "loss": 1.9365, + "step": 60, + "text_contrastive_loss": 1.096, + "train_positive_log_prob": -115.3434, + "train_positive_token_accuracy": 0.0601, + "train_positive_token_prob": 0.0155 + }, + { + "contrastive_loss": 0.615, + "epoch": 0.13769751693002258, + "grad_norm": 19.66248321533203, + "learning_rate": 9.999363057684885e-06, + "lm_loss": 7.6661, + "loss": 1.9027, + "step": 61, + "text_contrastive_loss": 1.0421, + "train_positive_log_prob": -113.0756, + "train_positive_token_accuracy": 0.0726, + "train_positive_token_prob": 0.0168 + }, + { + "contrastive_loss": 0.6305, + "epoch": 0.1399548532731377, + "grad_norm": 17.9409236907959, + "learning_rate": 9.999241989064358e-06, + "lm_loss": 7.5125, + "loss": 1.8814, + "step": 62, + "text_contrastive_loss": 0.9995, + "train_positive_log_prob": -110.8095, + "train_positive_token_accuracy": 0.0718, + "train_positive_token_prob": 0.0172 + }, + { + "contrastive_loss": 0.7395, + "epoch": 0.14221218961625282, + "grad_norm": 18.29195213317871, + "learning_rate": 9.999110393846097e-06, + "lm_loss": 7.4877, + "loss": 2.1089, + "step": 63, + "text_contrastive_loss": 1.2413, + "train_positive_log_prob": -109.7412, + "train_positive_token_accuracy": 0.0648, + "train_positive_token_prob": 0.016 + }, + { + "contrastive_loss": 0.6463, + "epoch": 0.14446952595936793, + "grad_norm": 17.068239212036133, + "learning_rate": 9.998968272307187e-06, + "lm_loss": 7.4896, + "loss": 1.8952, + "step": 64, + "text_contrastive_loss": 0.9999, + "train_positive_log_prob": -110.9714, + "train_positive_token_accuracy": 0.0664, + "train_positive_token_prob": 0.0175 + }, + { + "contrastive_loss": 0.71, + "epoch": 0.14672686230248308, + "grad_norm": 18.741472244262695, + "learning_rate": 9.99881562474689e-06, + "lm_loss": 7.5704, + "loss": 1.9175, + "step": 65, + "text_contrastive_loss": 0.9009, + "train_positive_log_prob": -111.3639, + "train_positive_token_accuracy": 0.0664, + "train_positive_token_prob": 0.0166 + }, + { + "contrastive_loss": 0.6482, + "epoch": 0.1489841986455982, + "grad_norm": 18.477170944213867, + "learning_rate": 9.998652451486626e-06, + "lm_loss": 7.5724, + "loss": 1.8933, + "step": 66, + "text_contrastive_loss": 0.9758, + "train_positive_log_prob": -112.5775, + "train_positive_token_accuracy": 0.0673, + "train_positive_token_prob": 0.0166 + }, + { + "contrastive_loss": 0.7258, + "epoch": 0.15124153498871332, + "grad_norm": 18.677339553833008, + "learning_rate": 9.998478752869976e-06, + "lm_loss": 7.4351, + "loss": 1.9541, + "step": 67, + "text_contrastive_loss": 0.9696, + "train_positive_log_prob": -110.4729, + "train_positive_token_accuracy": 0.0718, + "train_positive_token_prob": 0.0182 + }, + { + "contrastive_loss": 0.7124, + "epoch": 0.15349887133182843, + "grad_norm": 19.702518463134766, + "learning_rate": 9.998294529262688e-06, + "lm_loss": 7.459, + "loss": 1.9834, + "step": 68, + "text_contrastive_loss": 1.0502, + "train_positive_log_prob": -108.8251, + "train_positive_token_accuracy": 0.0699, + "train_positive_token_prob": 0.0175 + }, + { + "contrastive_loss": 0.7754, + "epoch": 0.15575620767494355, + "grad_norm": 20.05575180053711, + "learning_rate": 9.998099781052673e-06, + "lm_loss": 7.3479, + "loss": 2.0937, + "step": 69, + "text_contrastive_loss": 1.167, + "train_positive_log_prob": -111.3928, + "train_positive_token_accuracy": 0.0688, + "train_positive_token_prob": 0.0164 + }, + { + "contrastive_loss": 0.7206, + "epoch": 0.1580135440180587, + "grad_norm": 22.70125389099121, + "learning_rate": 9.997894508649995e-06, + "lm_loss": 7.4852, + "loss": 1.9536, + "step": 70, + "text_contrastive_loss": 0.9691, + "train_positive_log_prob": -109.8203, + "train_positive_token_accuracy": 0.0559, + "train_positive_token_prob": 0.0161 + }, + { + "contrastive_loss": 0.5061, + "epoch": 0.16027088036117382, + "grad_norm": 15.450678825378418, + "learning_rate": 9.997678712486889e-06, + "lm_loss": 7.5078, + "loss": 1.6335, + "step": 71, + "text_contrastive_loss": 0.7534, + "train_positive_log_prob": -105.4927, + "train_positive_token_accuracy": 0.0634, + "train_positive_token_prob": 0.0164 + }, + { + "contrastive_loss": 0.689, + "epoch": 0.16252821670428894, + "grad_norm": 17.911428451538086, + "learning_rate": 9.99745239301774e-06, + "lm_loss": 7.3438, + "loss": 1.965, + "step": 72, + "text_contrastive_loss": 1.0832, + "train_positive_log_prob": -109.494, + "train_positive_token_accuracy": 0.0686, + "train_positive_token_prob": 0.0173 + }, + { + "contrastive_loss": 0.8052, + "epoch": 0.16478555304740405, + "grad_norm": 18.49860191345215, + "learning_rate": 9.997215550719097e-06, + "lm_loss": 7.3047, + "loss": 2.1129, + "step": 73, + "text_contrastive_loss": 1.1544, + "train_positive_log_prob": -108.4574, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0176 + }, + { + "contrastive_loss": 0.6399, + "epoch": 0.1670428893905192, + "grad_norm": 19.81185531616211, + "learning_rate": 9.996968186089664e-06, + "lm_loss": 7.3076, + "loss": 1.8823, + "step": 74, + "text_contrastive_loss": 1.0232, + "train_positive_log_prob": -107.8451, + "train_positive_token_accuracy": 0.0686, + "train_positive_token_prob": 0.018 + }, + { + "contrastive_loss": 0.8105, + "epoch": 0.16930022573363432, + "grad_norm": 19.230972290039062, + "learning_rate": 9.996710299650302e-06, + "lm_loss": 7.2606, + "loss": 2.0386, + "step": 75, + "text_contrastive_loss": 1.004, + "train_positive_log_prob": -106.073, + "train_positive_token_accuracy": 0.0698, + "train_positive_token_prob": 0.0174 + }, + { + "contrastive_loss": 0.7104, + "epoch": 0.17155756207674944, + "grad_norm": 19.1718807220459, + "learning_rate": 9.996441891944023e-06, + "lm_loss": 7.2542, + "loss": 1.9102, + "step": 76, + "text_contrastive_loss": 0.9488, + "train_positive_log_prob": -105.785, + "train_positive_token_accuracy": 0.0646, + "train_positive_token_prob": 0.0177 + }, + { + "contrastive_loss": 0.6975, + "epoch": 0.17381489841986456, + "grad_norm": 17.00054168701172, + "learning_rate": 9.996162963536004e-06, + "lm_loss": 7.3062, + "loss": 1.9641, + "step": 77, + "text_contrastive_loss": 1.0718, + "train_positive_log_prob": -107.2794, + "train_positive_token_accuracy": 0.0652, + "train_positive_token_prob": 0.0171 + }, + { + "contrastive_loss": 0.6981, + "epoch": 0.17607223476297967, + "grad_norm": 17.039268493652344, + "learning_rate": 9.995873515013562e-06, + "lm_loss": 7.1379, + "loss": 1.9126, + "step": 78, + "text_contrastive_loss": 1.0014, + "train_positive_log_prob": -104.3312, + "train_positive_token_accuracy": 0.0714, + "train_positive_token_prob": 0.0175 + }, + { + "contrastive_loss": 0.6859, + "epoch": 0.17832957110609482, + "grad_norm": 18.37651824951172, + "learning_rate": 9.99557354698617e-06, + "lm_loss": 7.1642, + "loss": 1.8423, + "step": 79, + "text_contrastive_loss": 0.88, + "train_positive_log_prob": -106.0609, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.0182 + }, + { + "contrastive_loss": 0.6445, + "epoch": 0.18058690744920994, + "grad_norm": 18.768932342529297, + "learning_rate": 9.995263060085456e-06, + "lm_loss": 7.1633, + "loss": 1.9017, + "step": 80, + "text_contrastive_loss": 1.0819, + "train_positive_log_prob": -108.5908, + "train_positive_token_accuracy": 0.0685, + "train_positive_token_prob": 0.0183 + }, + { + "contrastive_loss": 0.7405, + "epoch": 0.18284424379232506, + "grad_norm": 20.74585723876953, + "learning_rate": 9.99494205496519e-06, + "lm_loss": 7.1334, + "loss": 2.0325, + "step": 81, + "text_contrastive_loss": 1.1574, + "train_positive_log_prob": -106.108, + "train_positive_token_accuracy": 0.0678, + "train_positive_token_prob": 0.0183 + }, + { + "contrastive_loss": 0.681, + "epoch": 0.18510158013544017, + "grad_norm": 19.130083084106445, + "learning_rate": 9.994610532301296e-06, + "lm_loss": 7.1803, + "loss": 1.9202, + "step": 82, + "text_contrastive_loss": 1.0424, + "train_positive_log_prob": -107.8233, + "train_positive_token_accuracy": 0.0666, + "train_positive_token_prob": 0.0174 + }, + { + "contrastive_loss": 0.6693, + "epoch": 0.1873589164785553, + "grad_norm": 18.271968841552734, + "learning_rate": 9.99426849279184e-06, + "lm_loss": 7.1524, + "loss": 1.9452, + "step": 83, + "text_contrastive_loss": 1.1212, + "train_positive_log_prob": -106.1114, + "train_positive_token_accuracy": 0.0706, + "train_positive_token_prob": 0.0184 + }, + { + "contrastive_loss": 0.5934, + "epoch": 0.18961625282167044, + "grad_norm": 17.79524040222168, + "learning_rate": 9.993915937157033e-06, + "lm_loss": 7.1962, + "loss": 1.7976, + "step": 84, + "text_contrastive_loss": 0.9691, + "train_positive_log_prob": -104.9902, + "train_positive_token_accuracy": 0.0651, + "train_positive_token_prob": 0.0182 + }, + { + "contrastive_loss": 0.8511, + "epoch": 0.19187358916478556, + "grad_norm": 20.764305114746094, + "learning_rate": 9.99355286613923e-06, + "lm_loss": 6.9601, + "loss": 2.2365, + "step": 85, + "text_contrastive_loss": 1.3788, + "train_positive_log_prob": -102.9144, + "train_positive_token_accuracy": 0.0711, + "train_positive_token_prob": 0.0191 + }, + { + "contrastive_loss": 0.6804, + "epoch": 0.19413092550790068, + "grad_norm": 20.68915367126465, + "learning_rate": 9.993179280502926e-06, + "lm_loss": 7.075, + "loss": 1.8499, + "step": 86, + "text_contrastive_loss": 0.924, + "train_positive_log_prob": -104.3842, + "train_positive_token_accuracy": 0.0707, + "train_positive_token_prob": 0.0194 + }, + { + "contrastive_loss": 0.7439, + "epoch": 0.1963882618510158, + "grad_norm": 21.086267471313477, + "learning_rate": 9.99279518103476e-06, + "lm_loss": 7.1653, + "loss": 1.9803, + "step": 87, + "text_contrastive_loss": 1.0396, + "train_positive_log_prob": -103.7825, + "train_positive_token_accuracy": 0.0669, + "train_positive_token_prob": 0.0181 + }, + { + "contrastive_loss": 0.704, + "epoch": 0.1986455981941309, + "grad_norm": 18.166715621948242, + "learning_rate": 9.992400568543506e-06, + "lm_loss": 7.0027, + "loss": 1.987, + "step": 88, + "text_contrastive_loss": 1.1656, + "train_positive_log_prob": -104.4358, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0209 + }, + { + "contrastive_loss": 0.7478, + "epoch": 0.20090293453724606, + "grad_norm": 19.894939422607422, + "learning_rate": 9.991995443860074e-06, + "lm_loss": 7.1256, + "loss": 1.92, + "step": 89, + "text_contrastive_loss": 0.9194, + "train_positive_log_prob": -104.4091, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0206 + }, + { + "contrastive_loss": 0.7173, + "epoch": 0.20316027088036118, + "grad_norm": 18.498546600341797, + "learning_rate": 9.991579807837511e-06, + "lm_loss": 7.094, + "loss": 1.9442, + "step": 90, + "text_contrastive_loss": 1.035, + "train_positive_log_prob": -102.7495, + "train_positive_token_accuracy": 0.0698, + "train_positive_token_prob": 0.0195 + }, + { + "contrastive_loss": 0.5356, + "epoch": 0.2054176072234763, + "grad_norm": 16.10692596435547, + "learning_rate": 9.991153661350996e-06, + "lm_loss": 7.1971, + "loss": 1.6642, + "step": 91, + "text_contrastive_loss": 0.8177, + "train_positive_log_prob": -105.9103, + "train_positive_token_accuracy": 0.0685, + "train_positive_token_prob": 0.0193 + }, + { + "contrastive_loss": 0.6454, + "epoch": 0.2076749435665914, + "grad_norm": 19.639802932739258, + "learning_rate": 9.990717005297841e-06, + "lm_loss": 7.0934, + "loss": 1.8237, + "step": 92, + "text_contrastive_loss": 0.938, + "train_positive_log_prob": -102.0549, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0206 + }, + { + "contrastive_loss": 0.6669, + "epoch": 0.20993227990970656, + "grad_norm": 16.91734504699707, + "learning_rate": 9.990269840597484e-06, + "lm_loss": 6.9415, + "loss": 1.8666, + "step": 93, + "text_contrastive_loss": 1.011, + "train_positive_log_prob": -100.7887, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.022 + }, + { + "contrastive_loss": 0.6903, + "epoch": 0.21218961625282168, + "grad_norm": 18.62099838256836, + "learning_rate": 9.989812168191495e-06, + "lm_loss": 6.9441, + "loss": 1.9131, + "step": 94, + "text_contrastive_loss": 1.0568, + "train_positive_log_prob": -102.6439, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0217 + }, + { + "contrastive_loss": 0.759, + "epoch": 0.2144469525959368, + "grad_norm": 19.015317916870117, + "learning_rate": 9.989343989043563e-06, + "lm_loss": 6.8949, + "loss": 1.9706, + "step": 95, + "text_contrastive_loss": 1.0441, + "train_positive_log_prob": -100.4054, + "train_positive_token_accuracy": 0.073, + "train_positive_token_prob": 0.0223 + }, + { + "contrastive_loss": 0.7995, + "epoch": 0.21670428893905191, + "grad_norm": 20.863733291625977, + "learning_rate": 9.988865304139509e-06, + "lm_loss": 6.9463, + "loss": 2.0648, + "step": 96, + "text_contrastive_loss": 1.1414, + "train_positive_log_prob": -103.9494, + "train_positive_token_accuracy": 0.0835, + "train_positive_token_prob": 0.0223 + }, + { + "contrastive_loss": 0.7084, + "epoch": 0.21896162528216703, + "grad_norm": 21.328357696533203, + "learning_rate": 9.988376114487264e-06, + "lm_loss": 7.098, + "loss": 1.9105, + "step": 97, + "text_contrastive_loss": 0.9846, + "train_positive_log_prob": -103.3597, + "train_positive_token_accuracy": 0.0692, + "train_positive_token_prob": 0.0205 + }, + { + "contrastive_loss": 0.5447, + "epoch": 0.22121896162528218, + "grad_norm": 15.67144775390625, + "learning_rate": 9.98787642111689e-06, + "lm_loss": 6.91, + "loss": 1.7165, + "step": 98, + "text_contrastive_loss": 0.9615, + "train_positive_log_prob": -101.8364, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0211 + }, + { + "contrastive_loss": 0.8332, + "epoch": 0.2234762979683973, + "grad_norm": 20.161094665527344, + "learning_rate": 9.98736622508056e-06, + "lm_loss": 6.9547, + "loss": 2.0724, + "step": 99, + "text_contrastive_loss": 1.0876, + "train_positive_log_prob": -105.3551, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0221 + }, + { + "contrastive_loss": 0.8229, + "epoch": 0.22573363431151242, + "grad_norm": 25.128686904907227, + "learning_rate": 9.98684552745256e-06, + "lm_loss": 6.9163, + "loss": 1.9792, + "step": 100, + "text_contrastive_loss": 0.9292, + "train_positive_log_prob": -104.5588, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0222 + }, + { + "contrastive_loss": 0.6085, + "epoch": 0.22799097065462753, + "grad_norm": 19.56101417541504, + "learning_rate": 9.986314329329294e-06, + "lm_loss": 6.8579, + "loss": 1.8523, + "step": 101, + "text_contrastive_loss": 1.116, + "train_positive_log_prob": -102.8729, + "train_positive_token_accuracy": 0.0693, + "train_positive_token_prob": 0.0207 + }, + { + "contrastive_loss": 0.6081, + "epoch": 0.23024830699774265, + "grad_norm": 19.628129959106445, + "learning_rate": 9.985772631829272e-06, + "lm_loss": 6.8539, + "loss": 1.7848, + "step": 102, + "text_contrastive_loss": 0.9826, + "train_positive_log_prob": -101.0424, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0223 + }, + { + "contrastive_loss": 0.6272, + "epoch": 0.2325056433408578, + "grad_norm": 17.43963050842285, + "learning_rate": 9.985220436093112e-06, + "lm_loss": 6.8298, + "loss": 1.8244, + "step": 103, + "text_contrastive_loss": 1.0284, + "train_positive_log_prob": -102.3666, + "train_positive_token_accuracy": 0.0685, + "train_positive_token_prob": 0.0207 + }, + { + "contrastive_loss": 0.7114, + "epoch": 0.23476297968397292, + "grad_norm": 20.328269958496094, + "learning_rate": 9.984657743283543e-06, + "lm_loss": 6.8989, + "loss": 1.9473, + "step": 104, + "text_contrastive_loss": 1.092, + "train_positive_log_prob": -100.7624, + "train_positive_token_accuracy": 0.0677, + "train_positive_token_prob": 0.0201 + }, + { + "contrastive_loss": 0.6357, + "epoch": 0.23702031602708803, + "grad_norm": 19.055599212646484, + "learning_rate": 9.984084554585387e-06, + "lm_loss": 6.8036, + "loss": 1.8775, + "step": 105, + "text_contrastive_loss": 1.1228, + "train_positive_log_prob": -100.5402, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0228 + }, + { + "contrastive_loss": 0.52, + "epoch": 0.23927765237020315, + "grad_norm": 15.22811508178711, + "learning_rate": 9.983500871205577e-06, + "lm_loss": 6.8497, + "loss": 1.6118, + "step": 106, + "text_contrastive_loss": 0.8135, + "train_positive_log_prob": -101.6991, + "train_positive_token_accuracy": 0.0702, + "train_positive_token_prob": 0.0204 + }, + { + "contrastive_loss": 0.7511, + "epoch": 0.24153498871331827, + "grad_norm": 20.242881774902344, + "learning_rate": 9.982906694373136e-06, + "lm_loss": 6.7783, + "loss": 1.9461, + "step": 107, + "text_contrastive_loss": 1.0344, + "train_positive_log_prob": -100.0814, + "train_positive_token_accuracy": 0.0659, + "train_positive_token_prob": 0.0207 + }, + { + "contrastive_loss": 0.6672, + "epoch": 0.24379232505643342, + "grad_norm": 19.330408096313477, + "learning_rate": 9.98230202533919e-06, + "lm_loss": 6.7642, + "loss": 1.8222, + "step": 108, + "text_contrastive_loss": 0.957, + "train_positive_log_prob": -98.2935, + "train_positive_token_accuracy": 0.0669, + "train_positive_token_prob": 0.0201 + }, + { + "contrastive_loss": 0.6733, + "epoch": 0.24604966139954854, + "grad_norm": 17.462583541870117, + "learning_rate": 9.98168686537695e-06, + "lm_loss": 6.8258, + "loss": 1.9009, + "step": 109, + "text_contrastive_loss": 1.09, + "train_positive_log_prob": -102.3546, + "train_positive_token_accuracy": 0.0724, + "train_positive_token_prob": 0.0198 + }, + { + "contrastive_loss": 0.5173, + "epoch": 0.24830699774266365, + "grad_norm": 19.254297256469727, + "learning_rate": 9.98106121578172e-06, + "lm_loss": 6.8441, + "loss": 1.6937, + "step": 110, + "text_contrastive_loss": 0.9841, + "train_positive_log_prob": -99.3799, + "train_positive_token_accuracy": 0.0659, + "train_positive_token_prob": 0.02 + }, + { + "contrastive_loss": 0.5741, + "epoch": 0.2505643340857788, + "grad_norm": 18.1674861907959, + "learning_rate": 9.980425077870895e-06, + "lm_loss": 6.7829, + "loss": 1.7758, + "step": 111, + "text_contrastive_loss": 1.0467, + "train_positive_log_prob": -99.4889, + "train_positive_token_accuracy": 0.0674, + "train_positive_token_prob": 0.0202 + }, + { + "contrastive_loss": 0.662, + "epoch": 0.2528216704288939, + "grad_norm": 19.95972442626953, + "learning_rate": 9.979778452983949e-06, + "lm_loss": 6.7366, + "loss": 1.8166, + "step": 112, + "text_contrastive_loss": 0.9619, + "train_positive_log_prob": -96.9406, + "train_positive_token_accuracy": 0.0708, + "train_positive_token_prob": 0.0197 + }, + { + "contrastive_loss": 0.6015, + "epoch": 0.255079006772009, + "grad_norm": 17.444772720336914, + "learning_rate": 9.979121342482442e-06, + "lm_loss": 6.7184, + "loss": 1.7707, + "step": 113, + "text_contrastive_loss": 0.9946, + "train_positive_log_prob": -99.7769, + "train_positive_token_accuracy": 0.0689, + "train_positive_token_prob": 0.0207 + }, + { + "contrastive_loss": 0.7005, + "epoch": 0.25733634311512416, + "grad_norm": 19.67070960998535, + "learning_rate": 9.978453747750012e-06, + "lm_loss": 6.7312, + "loss": 1.8553, + "step": 114, + "text_contrastive_loss": 0.9635, + "train_positive_log_prob": -102.4711, + "train_positive_token_accuracy": 0.0695, + "train_positive_token_prob": 0.0213 + }, + { + "contrastive_loss": 0.5851, + "epoch": 0.2595936794582393, + "grad_norm": 15.300374984741211, + "learning_rate": 9.977775670192373e-06, + "lm_loss": 6.7646, + "loss": 1.7363, + "step": 115, + "text_contrastive_loss": 0.9494, + "train_positive_log_prob": -102.7355, + "train_positive_token_accuracy": 0.0669, + "train_positive_token_prob": 0.0206 + }, + { + "contrastive_loss": 0.7071, + "epoch": 0.2618510158013544, + "grad_norm": 20.292707443237305, + "learning_rate": 9.977087111237307e-06, + "lm_loss": 6.7663, + "loss": 1.8529, + "step": 116, + "text_contrastive_loss": 0.9385, + "train_positive_log_prob": -99.0656, + "train_positive_token_accuracy": 0.072, + "train_positive_token_prob": 0.0215 + }, + { + "contrastive_loss": 0.7114, + "epoch": 0.26410835214446954, + "grad_norm": 21.844806671142578, + "learning_rate": 9.976388072334674e-06, + "lm_loss": 6.7303, + "loss": 1.8336, + "step": 117, + "text_contrastive_loss": 0.8982, + "train_positive_log_prob": -101.2452, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.0215 + }, + { + "contrastive_loss": 0.6679, + "epoch": 0.26636568848758463, + "grad_norm": 17.832178115844727, + "learning_rate": 9.975678554956397e-06, + "lm_loss": 6.6895, + "loss": 1.8507, + "step": 118, + "text_contrastive_loss": 1.0277, + "train_positive_log_prob": -99.137, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0235 + }, + { + "contrastive_loss": 0.6255, + "epoch": 0.2686230248306998, + "grad_norm": 19.42404556274414, + "learning_rate": 9.974958560596464e-06, + "lm_loss": 6.6042, + "loss": 1.7383, + "step": 119, + "text_contrastive_loss": 0.9049, + "train_positive_log_prob": -97.4645, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0232 + }, + { + "contrastive_loss": 0.6203, + "epoch": 0.2708803611738149, + "grad_norm": 18.07141876220703, + "learning_rate": 9.97422809077092e-06, + "lm_loss": 6.7353, + "loss": 1.8304, + "step": 120, + "text_contrastive_loss": 1.0731, + "train_positive_log_prob": -98.4024, + "train_positive_token_accuracy": 0.0658, + "train_positive_token_prob": 0.0205 + }, + { + "contrastive_loss": 0.7014, + "epoch": 0.27313769751693, + "grad_norm": 20.61005401611328, + "learning_rate": 9.973487147017874e-06, + "lm_loss": 6.6788, + "loss": 1.9025, + "step": 121, + "text_contrastive_loss": 1.0664, + "train_positive_log_prob": -99.3984, + "train_positive_token_accuracy": 0.0722, + "train_positive_token_prob": 0.0226 + }, + { + "contrastive_loss": 0.6388, + "epoch": 0.27539503386004516, + "grad_norm": 19.510150909423828, + "learning_rate": 9.972735730897484e-06, + "lm_loss": 6.7738, + "loss": 1.7787, + "step": 122, + "text_contrastive_loss": 0.925, + "train_positive_log_prob": -99.6268, + "train_positive_token_accuracy": 0.0629, + "train_positive_token_prob": 0.0205 + }, + { + "contrastive_loss": 0.6828, + "epoch": 0.27765237020316025, + "grad_norm": 18.93800926208496, + "learning_rate": 9.97197384399196e-06, + "lm_loss": 6.677, + "loss": 1.8729, + "step": 123, + "text_contrastive_loss": 1.0447, + "train_positive_log_prob": -99.7511, + "train_positive_token_accuracy": 0.0758, + "train_positive_token_prob": 0.0229 + }, + { + "contrastive_loss": 0.5558, + "epoch": 0.2799097065462754, + "grad_norm": 18.574007034301758, + "learning_rate": 9.971201487905563e-06, + "lm_loss": 6.7474, + "loss": 1.6075, + "step": 124, + "text_contrastive_loss": 0.7539, + "train_positive_log_prob": -100.951, + "train_positive_token_accuracy": 0.0681, + "train_positive_token_prob": 0.021 + }, + { + "contrastive_loss": 0.5613, + "epoch": 0.28216704288939054, + "grad_norm": 16.574710845947266, + "learning_rate": 9.970418664264596e-06, + "lm_loss": 6.7799, + "loss": 1.7784, + "step": 125, + "text_contrastive_loss": 1.0782, + "train_positive_log_prob": -100.2656, + "train_positive_token_accuracy": 0.0655, + "train_positive_token_prob": 0.0214 + }, + { + "contrastive_loss": 0.7063, + "epoch": 0.28442437923250563, + "grad_norm": 19.18445587158203, + "learning_rate": 9.969625374717401e-06, + "lm_loss": 6.7066, + "loss": 1.8972, + "step": 126, + "text_contrastive_loss": 1.0405, + "train_positive_log_prob": -98.7174, + "train_positive_token_accuracy": 0.0703, + "train_positive_token_prob": 0.0216 + }, + { + "contrastive_loss": 0.6516, + "epoch": 0.2866817155756208, + "grad_norm": 20.365747451782227, + "learning_rate": 9.96882162093436e-06, + "lm_loss": 6.6885, + "loss": 1.7357, + "step": 127, + "text_contrastive_loss": 0.8305, + "train_positive_log_prob": -99.5426, + "train_positive_token_accuracy": 0.0627, + "train_positive_token_prob": 0.021 + }, + { + "contrastive_loss": 0.6898, + "epoch": 0.28893905191873587, + "grad_norm": 18.256505966186523, + "learning_rate": 9.968007404607887e-06, + "lm_loss": 6.6349, + "loss": 1.8094, + "step": 128, + "text_contrastive_loss": 0.9122, + "train_positive_log_prob": -99.5514, + "train_positive_token_accuracy": 0.0682, + "train_positive_token_prob": 0.0229 + }, + { + "contrastive_loss": 0.5264, + "epoch": 0.291196388261851, + "grad_norm": 16.088481903076172, + "learning_rate": 9.96718272745243e-06, + "lm_loss": 6.5939, + "loss": 1.5757, + "step": 129, + "text_contrastive_loss": 0.78, + "train_positive_log_prob": -94.6819, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0219 + }, + { + "contrastive_loss": 0.6064, + "epoch": 0.29345372460496616, + "grad_norm": 19.238529205322266, + "learning_rate": 9.966347591204459e-06, + "lm_loss": 6.4762, + "loss": 1.7043, + "step": 130, + "text_contrastive_loss": 0.9006, + "train_positive_log_prob": -93.8795, + "train_positive_token_accuracy": 0.0693, + "train_positive_token_prob": 0.0224 + }, + { + "contrastive_loss": 0.4834, + "epoch": 0.29571106094808125, + "grad_norm": 14.992815017700195, + "learning_rate": 9.96550199762247e-06, + "lm_loss": 6.7187, + "loss": 1.6328, + "step": 131, + "text_contrastive_loss": 0.9551, + "train_positive_log_prob": -99.782, + "train_positive_token_accuracy": 0.0708, + "train_positive_token_prob": 0.0218 + }, + { + "contrastive_loss": 0.6372, + "epoch": 0.2979683972911964, + "grad_norm": 19.031585693359375, + "learning_rate": 9.964645948486978e-06, + "lm_loss": 6.575, + "loss": 1.7947, + "step": 132, + "text_contrastive_loss": 1.0, + "train_positive_log_prob": -96.2154, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0231 + }, + { + "contrastive_loss": 0.7499, + "epoch": 0.3002257336343115, + "grad_norm": 19.719074249267578, + "learning_rate": 9.963779445600512e-06, + "lm_loss": 6.7013, + "loss": 1.9826, + "step": 133, + "text_contrastive_loss": 1.1252, + "train_positive_log_prob": -101.6953, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0235 + }, + { + "contrastive_loss": 0.5363, + "epoch": 0.30248306997742663, + "grad_norm": 16.6535701751709, + "learning_rate": 9.962902490787616e-06, + "lm_loss": 6.5958, + "loss": 1.699, + "step": 134, + "text_contrastive_loss": 1.0062, + "train_positive_log_prob": -96.59, + "train_positive_token_accuracy": 0.071, + "train_positive_token_prob": 0.0223 + }, + { + "contrastive_loss": 0.4763, + "epoch": 0.3047404063205418, + "grad_norm": 16.309438705444336, + "learning_rate": 9.962015085894838e-06, + "lm_loss": 6.6158, + "loss": 1.6021, + "step": 135, + "text_contrastive_loss": 0.9285, + "train_positive_log_prob": -96.8084, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0224 + }, + { + "contrastive_loss": 0.6825, + "epoch": 0.30699774266365687, + "grad_norm": 23.106409072875977, + "learning_rate": 9.961117232790734e-06, + "lm_loss": 6.5373, + "loss": 1.8454, + "step": 136, + "text_contrastive_loss": 1.0185, + "train_positive_log_prob": -96.7851, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.024 + }, + { + "contrastive_loss": 0.7419, + "epoch": 0.309255079006772, + "grad_norm": 19.632888793945312, + "learning_rate": 9.960208933365857e-06, + "lm_loss": 6.4931, + "loss": 1.893, + "step": 137, + "text_contrastive_loss": 1.0037, + "train_positive_log_prob": -96.8434, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0237 + }, + { + "contrastive_loss": 0.5699, + "epoch": 0.3115124153498871, + "grad_norm": 17.344514846801758, + "learning_rate": 9.959290189532757e-06, + "lm_loss": 6.6304, + "loss": 1.7665, + "step": 138, + "text_contrastive_loss": 1.0672, + "train_positive_log_prob": -98.5968, + "train_positive_token_accuracy": 0.0683, + "train_positive_token_prob": 0.0222 + }, + { + "contrastive_loss": 0.7102, + "epoch": 0.31376975169300225, + "grad_norm": 18.722190856933594, + "learning_rate": 9.958361003225979e-06, + "lm_loss": 6.554, + "loss": 1.88, + "step": 139, + "text_contrastive_loss": 1.0289, + "train_positive_log_prob": -98.08, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.0237 + }, + { + "contrastive_loss": 0.736, + "epoch": 0.3160270880361174, + "grad_norm": 17.325239181518555, + "learning_rate": 9.957421376402053e-06, + "lm_loss": 6.5851, + "loss": 1.8548, + "step": 140, + "text_contrastive_loss": 0.9205, + "train_positive_log_prob": -97.601, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0242 + }, + { + "contrastive_loss": 0.6273, + "epoch": 0.3182844243792325, + "grad_norm": 17.60327911376953, + "learning_rate": 9.956471311039491e-06, + "lm_loss": 6.5075, + "loss": 1.7636, + "step": 141, + "text_contrastive_loss": 0.9712, + "train_positive_log_prob": -96.73, + "train_positive_token_accuracy": 0.0849, + "train_positive_token_prob": 0.0251 + }, + { + "contrastive_loss": 0.6034, + "epoch": 0.32054176072234764, + "grad_norm": 17.8837890625, + "learning_rate": 9.95551080913879e-06, + "lm_loss": 6.5592, + "loss": 1.7227, + "step": 142, + "text_contrastive_loss": 0.9268, + "train_positive_log_prob": -98.0795, + "train_positive_token_accuracy": 0.0671, + "train_positive_token_prob": 0.0222 + }, + { + "contrastive_loss": 0.6254, + "epoch": 0.3227990970654628, + "grad_norm": 17.19380760192871, + "learning_rate": 9.954539872722417e-06, + "lm_loss": 6.6127, + "loss": 1.7606, + "step": 143, + "text_contrastive_loss": 0.9478, + "train_positive_log_prob": -99.5249, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0238 + }, + { + "contrastive_loss": 0.6927, + "epoch": 0.32505643340857787, + "grad_norm": 19.1299991607666, + "learning_rate": 9.953558503834819e-06, + "lm_loss": 6.5588, + "loss": 1.8522, + "step": 144, + "text_contrastive_loss": 1.0073, + "train_positive_log_prob": -98.8929, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0235 + }, + { + "contrastive_loss": 0.7106, + "epoch": 0.327313769751693, + "grad_norm": 19.535919189453125, + "learning_rate": 9.9525667045424e-06, + "lm_loss": 6.4189, + "loss": 1.8978, + "step": 145, + "text_contrastive_loss": 1.0906, + "train_positive_log_prob": -95.959, + "train_positive_token_accuracy": 0.0713, + "train_positive_token_prob": 0.0227 + }, + { + "contrastive_loss": 0.5681, + "epoch": 0.3295711060948081, + "grad_norm": 17.805524826049805, + "learning_rate": 9.951564476933534e-06, + "lm_loss": 6.66, + "loss": 1.651, + "step": 146, + "text_contrastive_loss": 0.8338, + "train_positive_log_prob": -100.3724, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.0225 + }, + { + "contrastive_loss": 0.5829, + "epoch": 0.33182844243792325, + "grad_norm": 18.408859252929688, + "learning_rate": 9.950551823118544e-06, + "lm_loss": 6.4689, + "loss": 1.7387, + "step": 147, + "text_contrastive_loss": 1.0179, + "train_positive_log_prob": -96.7552, + "train_positive_token_accuracy": 0.0676, + "train_positive_token_prob": 0.0227 + }, + { + "contrastive_loss": 0.7084, + "epoch": 0.3340857787810384, + "grad_norm": 17.17438316345215, + "learning_rate": 9.949528745229721e-06, + "lm_loss": 6.5945, + "loss": 1.8936, + "step": 148, + "text_contrastive_loss": 1.0516, + "train_positive_log_prob": -100.1246, + "train_positive_token_accuracy": 0.07, + "train_positive_token_prob": 0.0231 + }, + { + "contrastive_loss": 0.6445, + "epoch": 0.3363431151241535, + "grad_norm": 19.39360237121582, + "learning_rate": 9.948495245421294e-06, + "lm_loss": 6.496, + "loss": 1.7654, + "step": 149, + "text_contrastive_loss": 0.9426, + "train_positive_log_prob": -95.4652, + "train_positive_token_accuracy": 0.0673, + "train_positive_token_prob": 0.023 + }, + { + "contrastive_loss": 0.6781, + "epoch": 0.33860045146726864, + "grad_norm": 16.96933937072754, + "learning_rate": 9.94745132586944e-06, + "lm_loss": 6.51, + "loss": 1.7752, + "step": 150, + "text_contrastive_loss": 0.8922, + "train_positive_log_prob": -98.9714, + "train_positive_token_accuracy": 0.0748, + "train_positive_token_prob": 0.0239 + }, + { + "contrastive_loss": 0.6305, + "epoch": 0.34085778781038373, + "grad_norm": 18.83647918701172, + "learning_rate": 9.946396988772275e-06, + "lm_loss": 6.6107, + "loss": 1.7373, + "step": 151, + "text_contrastive_loss": 0.8916, + "train_positive_log_prob": -96.8397, + "train_positive_token_accuracy": 0.0637, + "train_positive_token_prob": 0.0223 + }, + { + "contrastive_loss": 0.5176, + "epoch": 0.3431151241534989, + "grad_norm": 19.363317489624023, + "learning_rate": 9.945332236349857e-06, + "lm_loss": 6.4914, + "loss": 1.5901, + "step": 152, + "text_contrastive_loss": 0.8467, + "train_positive_log_prob": -98.1572, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0239 + }, + { + "contrastive_loss": 0.6918, + "epoch": 0.345372460496614, + "grad_norm": 19.495018005371094, + "learning_rate": 9.944257070844165e-06, + "lm_loss": 6.4671, + "loss": 1.8173, + "step": 153, + "text_contrastive_loss": 0.9575, + "train_positive_log_prob": -95.1382, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0244 + }, + { + "contrastive_loss": 0.5684, + "epoch": 0.3476297968397291, + "grad_norm": 17.412399291992188, + "learning_rate": 9.943171494519111e-06, + "lm_loss": 6.4786, + "loss": 1.7042, + "step": 154, + "text_contrastive_loss": 0.9759, + "train_positive_log_prob": -94.2667, + "train_positive_token_accuracy": 0.0663, + "train_positive_token_prob": 0.0226 + }, + { + "contrastive_loss": 0.5654, + "epoch": 0.34988713318284426, + "grad_norm": 17.635072708129883, + "learning_rate": 9.942075509660527e-06, + "lm_loss": 6.344, + "loss": 1.6119, + "step": 155, + "text_contrastive_loss": 0.8243, + "train_positive_log_prob": -93.0513, + "train_positive_token_accuracy": 0.0682, + "train_positive_token_prob": 0.023 + }, + { + "contrastive_loss": 0.6933, + "epoch": 0.35214446952595935, + "grad_norm": 17.938695907592773, + "learning_rate": 9.94096911857616e-06, + "lm_loss": 6.3235, + "loss": 1.7882, + "step": 156, + "text_contrastive_loss": 0.9249, + "train_positive_log_prob": -95.6238, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0245 + }, + { + "contrastive_loss": 0.5936, + "epoch": 0.3544018058690745, + "grad_norm": 18.73040771484375, + "learning_rate": 9.939852323595671e-06, + "lm_loss": 6.4178, + "loss": 1.6748, + "step": 157, + "text_contrastive_loss": 0.8789, + "train_positive_log_prob": -94.5153, + "train_positive_token_accuracy": 0.0735, + "train_positive_token_prob": 0.0234 + }, + { + "contrastive_loss": 0.5734, + "epoch": 0.35665914221218964, + "grad_norm": 17.923017501831055, + "learning_rate": 9.938725127070628e-06, + "lm_loss": 6.3865, + "loss": 1.7191, + "step": 158, + "text_contrastive_loss": 1.0141, + "train_positive_log_prob": -97.5961, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.025 + }, + { + "contrastive_loss": 0.5882, + "epoch": 0.35891647855530473, + "grad_norm": 17.886707305908203, + "learning_rate": 9.937587531374497e-06, + "lm_loss": 6.53, + "loss": 1.7178, + "step": 159, + "text_contrastive_loss": 0.9532, + "train_positive_log_prob": -96.8783, + "train_positive_token_accuracy": 0.0724, + "train_positive_token_prob": 0.0232 + }, + { + "contrastive_loss": 0.6749, + "epoch": 0.3611738148984199, + "grad_norm": 20.906579971313477, + "learning_rate": 9.936439538902644e-06, + "lm_loss": 6.3858, + "loss": 1.7202, + "step": 160, + "text_contrastive_loss": 0.8134, + "train_positive_log_prob": -94.5395, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0243 + }, + { + "contrastive_loss": 0.6186, + "epoch": 0.36343115124153497, + "grad_norm": 18.395421981811523, + "learning_rate": 9.935281152072329e-06, + "lm_loss": 6.364, + "loss": 1.7822, + "step": 161, + "text_contrastive_loss": 1.0545, + "train_positive_log_prob": -93.7944, + "train_positive_token_accuracy": 0.0777, + "train_positive_token_prob": 0.0246 + }, + { + "contrastive_loss": 0.6179, + "epoch": 0.3656884875846501, + "grad_norm": 18.323909759521484, + "learning_rate": 9.934112373322695e-06, + "lm_loss": 6.3892, + "loss": 1.7232, + "step": 162, + "text_contrastive_loss": 0.9328, + "train_positive_log_prob": -94.5902, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.0242 + }, + { + "contrastive_loss": 0.5973, + "epoch": 0.36794582392776526, + "grad_norm": 17.929973602294922, + "learning_rate": 9.932933205114766e-06, + "lm_loss": 6.3688, + "loss": 1.6569, + "step": 163, + "text_contrastive_loss": 0.8453, + "train_positive_log_prob": -95.5812, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0253 + }, + { + "contrastive_loss": 0.5917, + "epoch": 0.37020316027088035, + "grad_norm": 17.14206886291504, + "learning_rate": 9.931743649931446e-06, + "lm_loss": 6.4892, + "loss": 1.6631, + "step": 164, + "text_contrastive_loss": 0.8451, + "train_positive_log_prob": -96.4541, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.023 + }, + { + "contrastive_loss": 0.5441, + "epoch": 0.3724604966139955, + "grad_norm": 17.020263671875, + "learning_rate": 9.93054371027751e-06, + "lm_loss": 6.3578, + "loss": 1.6148, + "step": 165, + "text_contrastive_loss": 0.8698, + "train_positive_log_prob": -91.0694, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0245 + }, + { + "contrastive_loss": 0.5735, + "epoch": 0.3747178329571106, + "grad_norm": 15.405448913574219, + "learning_rate": 9.929333388679593e-06, + "lm_loss": 6.3706, + "loss": 1.6279, + "step": 166, + "text_contrastive_loss": 0.8348, + "train_positive_log_prob": -92.249, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0241 + }, + { + "contrastive_loss": 0.6123, + "epoch": 0.37697516930022573, + "grad_norm": 18.756498336791992, + "learning_rate": 9.928112687686197e-06, + "lm_loss": 6.3467, + "loss": 1.6946, + "step": 167, + "text_contrastive_loss": 0.8951, + "train_positive_log_prob": -91.4306, + "train_positive_token_accuracy": 0.0727, + "train_positive_token_prob": 0.0245 + }, + { + "contrastive_loss": 0.5487, + "epoch": 0.3792325056433409, + "grad_norm": 15.464677810668945, + "learning_rate": 9.92688160986768e-06, + "lm_loss": 6.3127, + "loss": 1.6266, + "step": 168, + "text_contrastive_loss": 0.8932, + "train_positive_log_prob": -93.7578, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0249 + }, + { + "contrastive_loss": 0.6553, + "epoch": 0.38148984198645597, + "grad_norm": 19.627986907958984, + "learning_rate": 9.925640157816246e-06, + "lm_loss": 6.391, + "loss": 1.7519, + "step": 169, + "text_contrastive_loss": 0.9151, + "train_positive_log_prob": -93.4674, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.023 + }, + { + "contrastive_loss": 0.6612, + "epoch": 0.3837471783295711, + "grad_norm": 18.76019287109375, + "learning_rate": 9.924388334145943e-06, + "lm_loss": 6.4883, + "loss": 1.7803, + "step": 170, + "text_contrastive_loss": 0.9406, + "train_positive_log_prob": -96.2885, + "train_positive_token_accuracy": 0.0666, + "train_positive_token_prob": 0.0226 + }, + { + "contrastive_loss": 0.5337, + "epoch": 0.3860045146726862, + "grad_norm": 17.279727935791016, + "learning_rate": 9.92312614149266e-06, + "lm_loss": 6.3929, + "loss": 1.595, + "step": 171, + "text_contrastive_loss": 0.8442, + "train_positive_log_prob": -93.8701, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0237 + }, + { + "contrastive_loss": 0.5587, + "epoch": 0.38826185101580135, + "grad_norm": 16.400903701782227, + "learning_rate": 9.92185358251412e-06, + "lm_loss": 6.3067, + "loss": 1.6394, + "step": 172, + "text_contrastive_loss": 0.8999, + "train_positive_log_prob": -91.9461, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.025 + }, + { + "contrastive_loss": 0.6037, + "epoch": 0.3905191873589165, + "grad_norm": 17.153881072998047, + "learning_rate": 9.92057065988987e-06, + "lm_loss": 6.3433, + "loss": 1.6747, + "step": 173, + "text_contrastive_loss": 0.8733, + "train_positive_log_prob": -94.68, + "train_positive_token_accuracy": 0.0725, + "train_positive_token_prob": 0.0242 + }, + { + "contrastive_loss": 0.5307, + "epoch": 0.3927765237020316, + "grad_norm": 15.758830070495605, + "learning_rate": 9.919277376321284e-06, + "lm_loss": 6.2857, + "loss": 1.6488, + "step": 174, + "text_contrastive_loss": 0.979, + "train_positive_log_prob": -93.0069, + "train_positive_token_accuracy": 0.0681, + "train_positive_token_prob": 0.0237 + }, + { + "contrastive_loss": 0.637, + "epoch": 0.39503386004514673, + "grad_norm": 17.30881118774414, + "learning_rate": 9.917973734531549e-06, + "lm_loss": 6.3688, + "loss": 1.7385, + "step": 175, + "text_contrastive_loss": 0.9293, + "train_positive_log_prob": -93.1648, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0249 + }, + { + "contrastive_loss": 0.5644, + "epoch": 0.3972911963882618, + "grad_norm": 16.901241302490234, + "learning_rate": 9.916659737265664e-06, + "lm_loss": 6.3906, + "loss": 1.6716, + "step": 176, + "text_contrastive_loss": 0.9363, + "train_positive_log_prob": -96.2591, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0242 + }, + { + "contrastive_loss": 0.5268, + "epoch": 0.39954853273137697, + "grad_norm": 16.084991455078125, + "learning_rate": 9.915335387290432e-06, + "lm_loss": 6.356, + "loss": 1.5931, + "step": 177, + "text_contrastive_loss": 0.8613, + "train_positive_log_prob": -95.6616, + "train_positive_token_accuracy": 0.0683, + "train_positive_token_prob": 0.0238 + }, + { + "contrastive_loss": 0.6085, + "epoch": 0.4018058690744921, + "grad_norm": 17.546642303466797, + "learning_rate": 9.914000687394457e-06, + "lm_loss": 6.5608, + "loss": 1.7641, + "step": 178, + "text_contrastive_loss": 0.9989, + "train_positive_log_prob": -99.8199, + "train_positive_token_accuracy": 0.0696, + "train_positive_token_prob": 0.0233 + }, + { + "contrastive_loss": 0.6121, + "epoch": 0.4040632054176072, + "grad_norm": 15.838671684265137, + "learning_rate": 9.912655640388134e-06, + "lm_loss": 6.2749, + "loss": 1.65, + "step": 179, + "text_contrastive_loss": 0.8207, + "train_positive_log_prob": -94.2207, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0256 + }, + { + "contrastive_loss": 0.6631, + "epoch": 0.40632054176072235, + "grad_norm": 21.731502532958984, + "learning_rate": 9.911300249103646e-06, + "lm_loss": 6.3533, + "loss": 1.7943, + "step": 180, + "text_contrastive_loss": 0.9919, + "train_positive_log_prob": -94.4376, + "train_positive_token_accuracy": 0.0704, + "train_positive_token_prob": 0.0242 + }, + { + "contrastive_loss": 0.6534, + "epoch": 0.40857787810383744, + "grad_norm": 17.82054901123047, + "learning_rate": 9.909934516394957e-06, + "lm_loss": 6.2419, + "loss": 1.7963, + "step": 181, + "text_contrastive_loss": 1.0375, + "train_positive_log_prob": -92.0682, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0249 + }, + { + "contrastive_loss": 0.6771, + "epoch": 0.4108352144469526, + "grad_norm": 16.83835792541504, + "learning_rate": 9.908558445137807e-06, + "lm_loss": 6.4007, + "loss": 1.7579, + "step": 182, + "text_contrastive_loss": 0.8814, + "train_positive_log_prob": -94.0119, + "train_positive_token_accuracy": 0.0753, + "train_positive_token_prob": 0.0256 + }, + { + "contrastive_loss": 0.5952, + "epoch": 0.41309255079006774, + "grad_norm": 17.657630920410156, + "learning_rate": 9.907172038229706e-06, + "lm_loss": 6.4039, + "loss": 1.6685, + "step": 183, + "text_contrastive_loss": 0.8658, + "train_positive_log_prob": -93.4851, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0255 + }, + { + "contrastive_loss": 0.6501, + "epoch": 0.4153498871331828, + "grad_norm": 17.542133331298828, + "learning_rate": 9.905775298589923e-06, + "lm_loss": 6.3194, + "loss": 1.7214, + "step": 184, + "text_contrastive_loss": 0.8787, + "train_positive_log_prob": -92.4982, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0247 + }, + { + "contrastive_loss": 0.6792, + "epoch": 0.417607223476298, + "grad_norm": 17.310688018798828, + "learning_rate": 9.904368229159494e-06, + "lm_loss": 6.3254, + "loss": 1.7577, + "step": 185, + "text_contrastive_loss": 0.8919, + "train_positive_log_prob": -93.9423, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0253 + }, + { + "contrastive_loss": 0.5111, + "epoch": 0.4198645598194131, + "grad_norm": 16.24388313293457, + "learning_rate": 9.90295083290119e-06, + "lm_loss": 6.3002, + "loss": 1.6357, + "step": 186, + "text_contrastive_loss": 0.9892, + "train_positive_log_prob": -93.0724, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.024 + }, + { + "contrastive_loss": 0.6866, + "epoch": 0.4221218961625282, + "grad_norm": 17.37274169921875, + "learning_rate": 9.901523112799543e-06, + "lm_loss": 6.322, + "loss": 1.746, + "step": 187, + "text_contrastive_loss": 0.8543, + "train_positive_log_prob": -94.043, + "train_positive_token_accuracy": 0.0685, + "train_positive_token_prob": 0.023 + }, + { + "contrastive_loss": 0.5687, + "epoch": 0.42437923250564336, + "grad_norm": 17.317487716674805, + "learning_rate": 9.90008507186081e-06, + "lm_loss": 6.182, + "loss": 1.6143, + "step": 188, + "text_contrastive_loss": 0.8548, + "train_positive_log_prob": -91.4063, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.0251 + }, + { + "contrastive_loss": 0.5404, + "epoch": 0.42663656884875845, + "grad_norm": 16.9774112701416, + "learning_rate": 9.898636713112992e-06, + "lm_loss": 6.379, + "loss": 1.6188, + "step": 189, + "text_contrastive_loss": 0.8809, + "train_positive_log_prob": -93.4684, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0237 + }, + { + "contrastive_loss": 0.6943, + "epoch": 0.4288939051918736, + "grad_norm": 19.53595733642578, + "learning_rate": 9.897178039605803e-06, + "lm_loss": 6.2926, + "loss": 1.8783, + "step": 190, + "text_contrastive_loss": 1.1095, + "train_positive_log_prob": -91.2128, + "train_positive_token_accuracy": 0.0838, + "train_positive_token_prob": 0.0252 + }, + { + "contrastive_loss": 0.5418, + "epoch": 0.43115124153498874, + "grad_norm": 15.57643985748291, + "learning_rate": 9.895709054410686e-06, + "lm_loss": 6.4005, + "loss": 1.5909, + "step": 191, + "text_contrastive_loss": 0.8181, + "train_positive_log_prob": -97.8082, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0243 + }, + { + "contrastive_loss": 0.6808, + "epoch": 0.43340857787810383, + "grad_norm": 19.136503219604492, + "learning_rate": 9.894229760620793e-06, + "lm_loss": 6.3266, + "loss": 1.7607, + "step": 192, + "text_contrastive_loss": 0.8946, + "train_positive_log_prob": -93.7376, + "train_positive_token_accuracy": 0.072, + "train_positive_token_prob": 0.0238 + }, + { + "contrastive_loss": 0.5773, + "epoch": 0.435665914221219, + "grad_norm": 17.009017944335938, + "learning_rate": 9.892740161350981e-06, + "lm_loss": 6.3848, + "loss": 1.6124, + "step": 193, + "text_contrastive_loss": 0.7932, + "train_positive_log_prob": -94.8009, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.0224 + }, + { + "contrastive_loss": 0.5224, + "epoch": 0.43792325056433407, + "grad_norm": 16.44840431213379, + "learning_rate": 9.891240259737809e-06, + "lm_loss": 6.4039, + "loss": 1.5373, + "step": 194, + "text_contrastive_loss": 0.749, + "train_positive_log_prob": -95.6092, + "train_positive_token_accuracy": 0.0725, + "train_positive_token_prob": 0.0234 + }, + { + "contrastive_loss": 0.6318, + "epoch": 0.4401805869074492, + "grad_norm": 16.54384994506836, + "learning_rate": 9.889730058939529e-06, + "lm_loss": 6.3638, + "loss": 1.7402, + "step": 195, + "text_contrastive_loss": 0.9441, + "train_positive_log_prob": -95.1351, + "train_positive_token_accuracy": 0.0724, + "train_positive_token_prob": 0.0236 + }, + { + "contrastive_loss": 0.599, + "epoch": 0.44243792325056436, + "grad_norm": 17.633148193359375, + "learning_rate": 9.888209562136074e-06, + "lm_loss": 6.2326, + "loss": 1.6754, + "step": 196, + "text_contrastive_loss": 0.9062, + "train_positive_log_prob": -93.1788, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0251 + }, + { + "contrastive_loss": 0.5482, + "epoch": 0.44469525959367945, + "grad_norm": 15.271982192993164, + "learning_rate": 9.886678772529069e-06, + "lm_loss": 6.181, + "loss": 1.6221, + "step": 197, + "text_contrastive_loss": 0.9116, + "train_positive_log_prob": -92.3015, + "train_positive_token_accuracy": 0.0732, + "train_positive_token_prob": 0.0243 + }, + { + "contrastive_loss": 0.6004, + "epoch": 0.4469525959367946, + "grad_norm": 16.93767547607422, + "learning_rate": 9.885137693341795e-06, + "lm_loss": 6.344, + "loss": 1.7106, + "step": 198, + "text_contrastive_loss": 0.9514, + "train_positive_log_prob": -94.8828, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.025 + }, + { + "contrastive_loss": 0.6104, + "epoch": 0.4492099322799097, + "grad_norm": 18.47342872619629, + "learning_rate": 9.883586327819214e-06, + "lm_loss": 6.2951, + "loss": 1.7779, + "step": 199, + "text_contrastive_loss": 1.0761, + "train_positive_log_prob": -93.8359, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.025 + }, + { + "contrastive_loss": 0.6953, + "epoch": 0.45146726862302483, + "grad_norm": 18.460594177246094, + "learning_rate": 9.88202467922794e-06, + "lm_loss": 6.3612, + "loss": 1.7744, + "step": 200, + "text_contrastive_loss": 0.8859, + "train_positive_log_prob": -93.263, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.025 + }, + { + "contrastive_loss": 0.6033, + "epoch": 0.45372460496614, + "grad_norm": 17.545286178588867, + "learning_rate": 9.880452750856239e-06, + "lm_loss": 6.3586, + "loss": 1.7908, + "step": 201, + "text_contrastive_loss": 1.1034, + "train_positive_log_prob": -95.8297, + "train_positive_token_accuracy": 0.0731, + "train_positive_token_prob": 0.0243 + }, + { + "contrastive_loss": 0.5989, + "epoch": 0.45598194130925507, + "grad_norm": 18.77071762084961, + "learning_rate": 9.878870546014025e-06, + "lm_loss": 6.322, + "loss": 1.6702, + "step": 202, + "text_contrastive_loss": 0.8782, + "train_positive_log_prob": -93.3503, + "train_positive_token_accuracy": 0.072, + "train_positive_token_prob": 0.0242 + }, + { + "contrastive_loss": 0.5985, + "epoch": 0.4582392776523702, + "grad_norm": 18.926990509033203, + "learning_rate": 9.877278068032852e-06, + "lm_loss": 6.3298, + "loss": 1.6193, + "step": 203, + "text_contrastive_loss": 0.7755, + "train_positive_log_prob": -94.0306, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0268 + }, + { + "contrastive_loss": 0.5788, + "epoch": 0.4604966139954853, + "grad_norm": 18.48944664001465, + "learning_rate": 9.875675320265903e-06, + "lm_loss": 6.3384, + "loss": 1.6234, + "step": 204, + "text_contrastive_loss": 0.8217, + "train_positive_log_prob": -95.303, + "train_positive_token_accuracy": 0.0701, + "train_positive_token_prob": 0.0238 + }, + { + "contrastive_loss": 0.5159, + "epoch": 0.46275395033860045, + "grad_norm": 18.643436431884766, + "learning_rate": 9.874062306087983e-06, + "lm_loss": 6.2327, + "loss": 1.5362, + "step": 205, + "text_contrastive_loss": 0.794, + "train_positive_log_prob": -91.7119, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.6663, + "epoch": 0.4650112866817156, + "grad_norm": 18.86969757080078, + "learning_rate": 9.872439028895518e-06, + "lm_loss": 6.1836, + "loss": 1.752, + "step": 206, + "text_contrastive_loss": 0.9347, + "train_positive_log_prob": -90.1413, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0251 + }, + { + "contrastive_loss": 0.512, + "epoch": 0.4672686230248307, + "grad_norm": 17.134504318237305, + "learning_rate": 9.870805492106546e-06, + "lm_loss": 6.2632, + "loss": 1.5575, + "step": 207, + "text_contrastive_loss": 0.8384, + "train_positive_log_prob": -92.5245, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.6561, + "epoch": 0.46952595936794583, + "grad_norm": 18.370058059692383, + "learning_rate": 9.869161699160704e-06, + "lm_loss": 6.2048, + "loss": 1.7214, + "step": 208, + "text_contrastive_loss": 0.8895, + "train_positive_log_prob": -90.8703, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0259 + }, + { + "contrastive_loss": 0.5517, + "epoch": 0.4717832957110609, + "grad_norm": 17.58881187438965, + "learning_rate": 9.867507653519225e-06, + "lm_loss": 6.2325, + "loss": 1.5736, + "step": 209, + "text_contrastive_loss": 0.7973, + "train_positive_log_prob": -91.858, + "train_positive_token_accuracy": 0.0777, + "train_positive_token_prob": 0.0261 + }, + { + "contrastive_loss": 0.5561, + "epoch": 0.47404063205417607, + "grad_norm": 17.790889739990234, + "learning_rate": 9.865843358664933e-06, + "lm_loss": 6.3023, + "loss": 1.5916, + "step": 210, + "text_contrastive_loss": 0.8105, + "train_positive_log_prob": -93.6262, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0259 + }, + { + "contrastive_loss": 0.7172, + "epoch": 0.4762979683972912, + "grad_norm": 19.1190242767334, + "learning_rate": 9.86416881810223e-06, + "lm_loss": 6.3149, + "loss": 1.8098, + "step": 211, + "text_contrastive_loss": 0.9222, + "train_positive_log_prob": -93.5423, + "train_positive_token_accuracy": 0.0688, + "train_positive_token_prob": 0.0257 + }, + { + "contrastive_loss": 0.4886, + "epoch": 0.4785553047404063, + "grad_norm": 17.55365753173828, + "learning_rate": 9.862484035357095e-06, + "lm_loss": 6.2235, + "loss": 1.5331, + "step": 212, + "text_contrastive_loss": 0.8443, + "train_positive_log_prob": -92.0653, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0257 + }, + { + "contrastive_loss": 0.6865, + "epoch": 0.48081264108352145, + "grad_norm": 20.232894897460938, + "learning_rate": 9.860789013977074e-06, + "lm_loss": 6.2172, + "loss": 1.8539, + "step": 213, + "text_contrastive_loss": 1.0915, + "train_positive_log_prob": -94.4033, + "train_positive_token_accuracy": 0.0738, + "train_positive_token_prob": 0.026 + }, + { + "contrastive_loss": 0.5696, + "epoch": 0.48306997742663654, + "grad_norm": 17.41246223449707, + "learning_rate": 9.859083757531265e-06, + "lm_loss": 6.3156, + "loss": 1.6494, + "step": 214, + "text_contrastive_loss": 0.8965, + "train_positive_log_prob": -92.072, + "train_positive_token_accuracy": 0.0753, + "train_positive_token_prob": 0.0247 + }, + { + "contrastive_loss": 0.6229, + "epoch": 0.4853273137697517, + "grad_norm": 18.375215530395508, + "learning_rate": 9.857368269610325e-06, + "lm_loss": 6.206, + "loss": 1.646, + "step": 215, + "text_contrastive_loss": 0.8049, + "train_positive_log_prob": -92.9716, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.0259 + }, + { + "contrastive_loss": 0.6166, + "epoch": 0.48758465011286684, + "grad_norm": 16.546504974365234, + "learning_rate": 9.85564255382645e-06, + "lm_loss": 6.2631, + "loss": 1.6687, + "step": 216, + "text_contrastive_loss": 0.8515, + "train_positive_log_prob": -92.9211, + "train_positive_token_accuracy": 0.0721, + "train_positive_token_prob": 0.0247 + }, + { + "contrastive_loss": 0.5597, + "epoch": 0.4898419864559819, + "grad_norm": 16.0345458984375, + "learning_rate": 9.853906613813378e-06, + "lm_loss": 6.2298, + "loss": 1.6476, + "step": 217, + "text_contrastive_loss": 0.9299, + "train_positive_log_prob": -90.089, + "train_positive_token_accuracy": 0.0735, + "train_positive_token_prob": 0.0263 + }, + { + "contrastive_loss": 0.6694, + "epoch": 0.49209932279909707, + "grad_norm": 19.300962448120117, + "learning_rate": 9.852160453226367e-06, + "lm_loss": 6.2233, + "loss": 1.7638, + "step": 218, + "text_contrastive_loss": 0.9441, + "train_positive_log_prob": -92.8604, + "train_positive_token_accuracy": 0.0691, + "train_positive_token_prob": 0.0252 + }, + { + "contrastive_loss": 0.5989, + "epoch": 0.49435665914221216, + "grad_norm": 18.49478530883789, + "learning_rate": 9.850404075742204e-06, + "lm_loss": 6.3365, + "loss": 1.7068, + "step": 219, + "text_contrastive_loss": 0.9485, + "train_positive_log_prob": -91.3391, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0254 + }, + { + "contrastive_loss": 0.6076, + "epoch": 0.4966139954853273, + "grad_norm": 16.82369041442871, + "learning_rate": 9.848637485059183e-06, + "lm_loss": 6.0957, + "loss": 1.6197, + "step": 220, + "text_contrastive_loss": 0.8051, + "train_positive_log_prob": -88.1754, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.5866, + "epoch": 0.49887133182844245, + "grad_norm": 17.95127296447754, + "learning_rate": 9.846860684897107e-06, + "lm_loss": 6.2495, + "loss": 1.6052, + "step": 221, + "text_contrastive_loss": 0.7873, + "train_positive_log_prob": -93.0613, + "train_positive_token_accuracy": 0.0711, + "train_positive_token_prob": 0.0263 + }, + { + "contrastive_loss": 0.5556, + "epoch": 0.5011286681715575, + "grad_norm": 16.904315948486328, + "learning_rate": 9.845073678997275e-06, + "lm_loss": 6.1779, + "loss": 1.546, + "step": 222, + "text_contrastive_loss": 0.7451, + "train_positive_log_prob": -94.1144, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.419, + "epoch": 0.5033860045146726, + "grad_norm": 15.588286399841309, + "learning_rate": 9.843276471122473e-06, + "lm_loss": 6.3275, + "loss": 1.5544, + "step": 223, + "text_contrastive_loss": 1.0053, + "train_positive_log_prob": -94.6153, + "train_positive_token_accuracy": 0.0687, + "train_positive_token_prob": 0.0245 + }, + { + "contrastive_loss": 0.5892, + "epoch": 0.5056433408577878, + "grad_norm": 18.800617218017578, + "learning_rate": 9.84146906505698e-06, + "lm_loss": 6.0217, + "loss": 1.5689, + "step": 224, + "text_contrastive_loss": 0.755, + "train_positive_log_prob": -88.7513, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0265 + }, + { + "contrastive_loss": 0.5934, + "epoch": 0.5079006772009029, + "grad_norm": 16.489816665649414, + "learning_rate": 9.83965146460653e-06, + "lm_loss": 6.2332, + "loss": 1.725, + "step": 225, + "text_contrastive_loss": 1.0166, + "train_positive_log_prob": -93.1115, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.5827, + "epoch": 0.510158013544018, + "grad_norm": 17.93199920654297, + "learning_rate": 9.83782367359834e-06, + "lm_loss": 6.2292, + "loss": 1.6654, + "step": 226, + "text_contrastive_loss": 0.9195, + "train_positive_log_prob": -91.1266, + "train_positive_token_accuracy": 0.0742, + "train_positive_token_prob": 0.0245 + }, + { + "contrastive_loss": 0.5436, + "epoch": 0.5124153498871332, + "grad_norm": 16.218229293823242, + "learning_rate": 9.835985695881076e-06, + "lm_loss": 6.2419, + "loss": 1.5886, + "step": 227, + "text_contrastive_loss": 0.8415, + "train_positive_log_prob": -91.3357, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.025 + }, + { + "contrastive_loss": 0.4402, + "epoch": 0.5146726862302483, + "grad_norm": 14.854653358459473, + "learning_rate": 9.834137535324852e-06, + "lm_loss": 6.2566, + "loss": 1.4773, + "step": 228, + "text_contrastive_loss": 0.8228, + "train_positive_log_prob": -92.7761, + "train_positive_token_accuracy": 0.0735, + "train_positive_token_prob": 0.0248 + }, + { + "contrastive_loss": 0.5065, + "epoch": 0.5169300225733634, + "grad_norm": 15.590422630310059, + "learning_rate": 9.83227919582123e-06, + "lm_loss": 6.1314, + "loss": 1.5427, + "step": 229, + "text_contrastive_loss": 0.8462, + "train_positive_log_prob": -91.4149, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0258 + }, + { + "contrastive_loss": 0.4892, + "epoch": 0.5191873589164786, + "grad_norm": 14.657146453857422, + "learning_rate": 9.830410681283203e-06, + "lm_loss": 6.2549, + "loss": 1.573, + "step": 230, + "text_contrastive_loss": 0.9168, + "train_positive_log_prob": -91.3759, + "train_positive_token_accuracy": 0.073, + "train_positive_token_prob": 0.0246 + }, + { + "contrastive_loss": 0.6175, + "epoch": 0.5214446952595937, + "grad_norm": 21.20783805847168, + "learning_rate": 9.828531995645183e-06, + "lm_loss": 6.2108, + "loss": 1.6399, + "step": 231, + "text_contrastive_loss": 0.8026, + "train_positive_log_prob": -91.051, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.025 + }, + { + "contrastive_loss": 0.6642, + "epoch": 0.5237020316027088, + "grad_norm": 19.318252563476562, + "learning_rate": 9.826643142863006e-06, + "lm_loss": 6.2883, + "loss": 1.817, + "step": 232, + "text_contrastive_loss": 1.048, + "train_positive_log_prob": -92.6809, + "train_positive_token_accuracy": 0.0713, + "train_positive_token_prob": 0.0242 + }, + { + "contrastive_loss": 0.567, + "epoch": 0.5259593679458239, + "grad_norm": 15.792723655700684, + "learning_rate": 9.824744126913914e-06, + "lm_loss": 6.1747, + "loss": 1.6847, + "step": 233, + "text_contrastive_loss": 1.0004, + "train_positive_log_prob": -87.9672, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0252 + }, + { + "contrastive_loss": 0.6318, + "epoch": 0.5282167042889391, + "grad_norm": 16.700912475585938, + "learning_rate": 9.822834951796547e-06, + "lm_loss": 6.0726, + "loss": 1.6601, + "step": 234, + "text_contrastive_loss": 0.8422, + "train_positive_log_prob": -87.9323, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0257 + }, + { + "contrastive_loss": 0.5589, + "epoch": 0.5304740406320542, + "grad_norm": 16.895994186401367, + "learning_rate": 9.820915621530939e-06, + "lm_loss": 6.2513, + "loss": 1.5552, + "step": 235, + "text_contrastive_loss": 0.7424, + "train_positive_log_prob": -90.9889, + "train_positive_token_accuracy": 0.0733, + "train_positive_token_prob": 0.0252 + }, + { + "contrastive_loss": 0.6437, + "epoch": 0.5327313769751693, + "grad_norm": 18.302471160888672, + "learning_rate": 9.818986140158507e-06, + "lm_loss": 6.1499, + "loss": 1.6472, + "step": 236, + "text_contrastive_loss": 0.777, + "train_positive_log_prob": -89.9905, + "train_positive_token_accuracy": 0.0734, + "train_positive_token_prob": 0.0247 + }, + { + "contrastive_loss": 0.5229, + "epoch": 0.5349887133182845, + "grad_norm": 15.440597534179688, + "learning_rate": 9.817046511742042e-06, + "lm_loss": 6.1466, + "loss": 1.4978, + "step": 237, + "text_contrastive_loss": 0.7206, + "train_positive_log_prob": -88.4293, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0257 + }, + { + "contrastive_loss": 0.607, + "epoch": 0.5372460496613995, + "grad_norm": 16.321495056152344, + "learning_rate": 9.815096740365698e-06, + "lm_loss": 6.1507, + "loss": 1.6366, + "step": 238, + "text_contrastive_loss": 0.829, + "train_positive_log_prob": -90.6505, + "train_positive_token_accuracy": 0.0721, + "train_positive_token_prob": 0.0253 + }, + { + "contrastive_loss": 0.5298, + "epoch": 0.5395033860045146, + "grad_norm": 14.745976448059082, + "learning_rate": 9.81313683013499e-06, + "lm_loss": 6.2015, + "loss": 1.6018, + "step": 239, + "text_contrastive_loss": 0.9036, + "train_positive_log_prob": -90.2366, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0251 + }, + { + "contrastive_loss": 0.6232, + "epoch": 0.5417607223476298, + "grad_norm": 17.782798767089844, + "learning_rate": 9.811166785176785e-06, + "lm_loss": 6.1334, + "loss": 1.7553, + "step": 240, + "text_contrastive_loss": 1.0374, + "train_positive_log_prob": -89.6286, + "train_positive_token_accuracy": 0.0738, + "train_positive_token_prob": 0.0247 + }, + { + "contrastive_loss": 0.5706, + "epoch": 0.5440180586907449, + "grad_norm": 15.712726593017578, + "learning_rate": 9.809186609639281e-06, + "lm_loss": 6.181, + "loss": 1.5081, + "step": 241, + "text_contrastive_loss": 0.6387, + "train_positive_log_prob": -91.1921, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0259 + }, + { + "contrastive_loss": 0.6559, + "epoch": 0.54627539503386, + "grad_norm": 18.792688369750977, + "learning_rate": 9.807196307692015e-06, + "lm_loss": 6.1004, + "loss": 1.7599, + "step": 242, + "text_contrastive_loss": 0.9879, + "train_positive_log_prob": -89.2382, + "train_positive_token_accuracy": 0.0705, + "train_positive_token_prob": 0.0249 + }, + { + "contrastive_loss": 0.6457, + "epoch": 0.5485327313769752, + "grad_norm": 18.05009651184082, + "learning_rate": 9.805195883525844e-06, + "lm_loss": 6.2358, + "loss": 1.7435, + "step": 243, + "text_contrastive_loss": 0.9485, + "train_positive_log_prob": -91.8348, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0251 + }, + { + "contrastive_loss": 0.5531, + "epoch": 0.5507900677200903, + "grad_norm": 16.327796936035156, + "learning_rate": 9.803185341352936e-06, + "lm_loss": 6.0555, + "loss": 1.5688, + "step": 244, + "text_contrastive_loss": 0.8203, + "train_positive_log_prob": -90.5469, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0264 + }, + { + "contrastive_loss": 0.6125, + "epoch": 0.5530474040632054, + "grad_norm": 18.481801986694336, + "learning_rate": 9.80116468540677e-06, + "lm_loss": 6.1585, + "loss": 1.6504, + "step": 245, + "text_contrastive_loss": 0.844, + "train_positive_log_prob": -90.653, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0267 + }, + { + "contrastive_loss": 0.5548, + "epoch": 0.5553047404063205, + "grad_norm": 17.05265998840332, + "learning_rate": 9.799133919942117e-06, + "lm_loss": 6.235, + "loss": 1.568, + "step": 246, + "text_contrastive_loss": 0.7793, + "train_positive_log_prob": -92.8682, + "train_positive_token_accuracy": 0.0734, + "train_positive_token_prob": 0.0254 + }, + { + "contrastive_loss": 0.4932, + "epoch": 0.5575620767494357, + "grad_norm": 17.33586883544922, + "learning_rate": 9.797093049235034e-06, + "lm_loss": 6.2924, + "loss": 1.5813, + "step": 247, + "text_contrastive_loss": 0.9176, + "train_positive_log_prob": -95.1732, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0255 + }, + { + "contrastive_loss": 0.6298, + "epoch": 0.5598194130925508, + "grad_norm": 18.75750160217285, + "learning_rate": 9.795042077582856e-06, + "lm_loss": 6.1959, + "loss": 1.7182, + "step": 248, + "text_contrastive_loss": 0.9376, + "train_positive_log_prob": -90.1312, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.0255 + }, + { + "contrastive_loss": 0.5855, + "epoch": 0.5620767494356659, + "grad_norm": 15.682086944580078, + "learning_rate": 9.792981009304192e-06, + "lm_loss": 6.1694, + "loss": 1.6636, + "step": 249, + "text_contrastive_loss": 0.9223, + "train_positive_log_prob": -88.8313, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0274 + }, + { + "contrastive_loss": 0.5924, + "epoch": 0.5643340857787811, + "grad_norm": 17.35828399658203, + "learning_rate": 9.790909848738907e-06, + "lm_loss": 6.1563, + "loss": 1.7296, + "step": 250, + "text_contrastive_loss": 1.0431, + "train_positive_log_prob": -91.5747, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.6334, + "epoch": 0.5665914221218962, + "grad_norm": 18.53400421142578, + "learning_rate": 9.788828600248114e-06, + "lm_loss": 6.0707, + "loss": 1.6608, + "step": 251, + "text_contrastive_loss": 0.8408, + "train_positive_log_prob": -89.5638, + "train_positive_token_accuracy": 0.085, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.5718, + "epoch": 0.5688487584650113, + "grad_norm": 16.728124618530273, + "learning_rate": 9.786737268214172e-06, + "lm_loss": 6.1426, + "loss": 1.6647, + "step": 252, + "text_contrastive_loss": 0.9574, + "train_positive_log_prob": -91.3713, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.5576, + "epoch": 0.5711060948081265, + "grad_norm": 15.057391166687012, + "learning_rate": 9.784635857040672e-06, + "lm_loss": 6.0691, + "loss": 1.6157, + "step": 253, + "text_contrastive_loss": 0.9024, + "train_positive_log_prob": -88.6152, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.6449, + "epoch": 0.5733634311512416, + "grad_norm": 17.26353645324707, + "learning_rate": 9.782524371152425e-06, + "lm_loss": 6.0807, + "loss": 1.7184, + "step": 254, + "text_contrastive_loss": 0.9309, + "train_positive_log_prob": -89.125, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0271 + }, + { + "contrastive_loss": 0.6168, + "epoch": 0.5756207674943566, + "grad_norm": 21.18054962158203, + "learning_rate": 9.780402814995458e-06, + "lm_loss": 6.1426, + "loss": 1.6147, + "step": 255, + "text_contrastive_loss": 0.7672, + "train_positive_log_prob": -93.6269, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0267 + }, + { + "contrastive_loss": 0.5089, + "epoch": 0.5778781038374717, + "grad_norm": 16.84816551208496, + "learning_rate": 9.778271193037003e-06, + "lm_loss": 6.1003, + "loss": 1.6082, + "step": 256, + "text_contrastive_loss": 0.9787, + "train_positive_log_prob": -91.7465, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0263 + }, + { + "contrastive_loss": 0.5982, + "epoch": 0.5801354401805869, + "grad_norm": 15.272967338562012, + "learning_rate": 9.776129509765487e-06, + "lm_loss": 6.199, + "loss": 1.6385, + "step": 257, + "text_contrastive_loss": 0.8408, + "train_positive_log_prob": -92.5016, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0263 + }, + { + "contrastive_loss": 0.6971, + "epoch": 0.582392776523702, + "grad_norm": 18.228771209716797, + "learning_rate": 9.773977769690517e-06, + "lm_loss": 6.0258, + "loss": 1.7625, + "step": 258, + "text_contrastive_loss": 0.9256, + "train_positive_log_prob": -88.0321, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.6436, + "epoch": 0.5846501128668171, + "grad_norm": 16.504350662231445, + "learning_rate": 9.771815977342882e-06, + "lm_loss": 6.1205, + "loss": 1.7137, + "step": 259, + "text_contrastive_loss": 0.9161, + "train_positive_log_prob": -90.1575, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0275 + }, + { + "contrastive_loss": 0.5884, + "epoch": 0.5869074492099323, + "grad_norm": 20.116111755371094, + "learning_rate": 9.76964413727454e-06, + "lm_loss": 6.2153, + "loss": 1.6786, + "step": 260, + "text_contrastive_loss": 0.9373, + "train_positive_log_prob": -94.9474, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0267 + }, + { + "contrastive_loss": 0.7094, + "epoch": 0.5891647855530474, + "grad_norm": 21.144668579101562, + "learning_rate": 9.767462254058593e-06, + "lm_loss": 6.06, + "loss": 1.8456, + "step": 261, + "text_contrastive_loss": 1.0603, + "train_positive_log_prob": -87.6828, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.027 + }, + { + "contrastive_loss": 0.5407, + "epoch": 0.5914221218961625, + "grad_norm": 15.427517890930176, + "learning_rate": 9.765270332289307e-06, + "lm_loss": 6.0893, + "loss": 1.5182, + "step": 262, + "text_contrastive_loss": 0.7373, + "train_positive_log_prob": -91.1746, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0275 + }, + { + "contrastive_loss": 0.7205, + "epoch": 0.5936794582392777, + "grad_norm": 16.98758888244629, + "learning_rate": 9.763068376582075e-06, + "lm_loss": 6.1663, + "loss": 1.9144, + "step": 263, + "text_contrastive_loss": 1.1544, + "train_positive_log_prob": -91.2903, + "train_positive_token_accuracy": 0.0734, + "train_positive_token_prob": 0.0263 + }, + { + "contrastive_loss": 0.4464, + "epoch": 0.5959367945823928, + "grad_norm": 14.028817176818848, + "learning_rate": 9.76085639157342e-06, + "lm_loss": 6.0759, + "loss": 1.4396, + "step": 264, + "text_contrastive_loss": 0.7711, + "train_positive_log_prob": -90.659, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.4934, + "epoch": 0.5981941309255079, + "grad_norm": 15.249479293823242, + "learning_rate": 9.758634381920982e-06, + "lm_loss": 6.1209, + "loss": 1.5151, + "step": 265, + "text_contrastive_loss": 0.8191, + "train_positive_log_prob": -91.5006, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0254 + }, + { + "contrastive_loss": 0.6012, + "epoch": 0.600451467268623, + "grad_norm": 17.35218620300293, + "learning_rate": 9.756402352303513e-06, + "lm_loss": 6.2228, + "loss": 1.7405, + "step": 266, + "text_contrastive_loss": 1.0339, + "train_positive_log_prob": -94.0188, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.025 + }, + { + "contrastive_loss": 0.582, + "epoch": 0.6027088036117382, + "grad_norm": 15.984393119812012, + "learning_rate": 9.754160307420858e-06, + "lm_loss": 6.1993, + "loss": 1.6754, + "step": 267, + "text_contrastive_loss": 0.9469, + "train_positive_log_prob": -91.147, + "train_positive_token_accuracy": 0.0728, + "train_positive_token_prob": 0.0257 + }, + { + "contrastive_loss": 0.684, + "epoch": 0.6049661399548533, + "grad_norm": 18.76258087158203, + "learning_rate": 9.751908251993956e-06, + "lm_loss": 6.1995, + "loss": 1.8141, + "step": 268, + "text_contrastive_loss": 1.0204, + "train_positive_log_prob": -91.397, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.5901, + "epoch": 0.6072234762979684, + "grad_norm": 17.343738555908203, + "learning_rate": 9.749646190764823e-06, + "lm_loss": 6.1357, + "loss": 1.705, + "step": 269, + "text_contrastive_loss": 1.0026, + "train_positive_log_prob": -91.4402, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.6906, + "epoch": 0.6094808126410836, + "grad_norm": 20.076309204101562, + "learning_rate": 9.747374128496541e-06, + "lm_loss": 6.1197, + "loss": 1.8324, + "step": 270, + "text_contrastive_loss": 1.0596, + "train_positive_log_prob": -92.3694, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.0271 + }, + { + "contrastive_loss": 0.468, + "epoch": 0.6117381489841986, + "grad_norm": 13.200486183166504, + "learning_rate": 9.745092069973254e-06, + "lm_loss": 6.018, + "loss": 1.5125, + "step": 271, + "text_contrastive_loss": 0.8853, + "train_positive_log_prob": -89.4729, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0271 + }, + { + "contrastive_loss": 0.4372, + "epoch": 0.6139954853273137, + "grad_norm": 15.85975170135498, + "learning_rate": 9.74280002000015e-06, + "lm_loss": 5.9881, + "loss": 1.4418, + "step": 272, + "text_contrastive_loss": 0.8114, + "train_positive_log_prob": -87.4899, + "train_positive_token_accuracy": 0.0751, + "train_positive_token_prob": 0.0267 + }, + { + "contrastive_loss": 0.6535, + "epoch": 0.6162528216704289, + "grad_norm": 18.72348403930664, + "learning_rate": 9.74049798340346e-06, + "lm_loss": 6.0186, + "loss": 1.7262, + "step": 273, + "text_contrastive_loss": 0.9417, + "train_positive_log_prob": -90.8882, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.027 + }, + { + "contrastive_loss": 0.536, + "epoch": 0.618510158013544, + "grad_norm": 16.871328353881836, + "learning_rate": 9.738185965030444e-06, + "lm_loss": 6.1023, + "loss": 1.634, + "step": 274, + "text_contrastive_loss": 0.9756, + "train_positive_log_prob": -91.4379, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0259 + }, + { + "contrastive_loss": 0.6151, + "epoch": 0.6207674943566591, + "grad_norm": 19.051944732666016, + "learning_rate": 9.735863969749373e-06, + "lm_loss": 6.1173, + "loss": 1.6855, + "step": 275, + "text_contrastive_loss": 0.9173, + "train_positive_log_prob": -93.8667, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0268 + }, + { + "contrastive_loss": 0.5263, + "epoch": 0.6230248306997742, + "grad_norm": 18.60106086730957, + "learning_rate": 9.733532002449533e-06, + "lm_loss": 6.0893, + "loss": 1.497, + "step": 276, + "text_contrastive_loss": 0.7235, + "train_positive_log_prob": -91.4847, + "train_positive_token_accuracy": 0.0835, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.7154, + "epoch": 0.6252821670428894, + "grad_norm": 19.08406639099121, + "learning_rate": 9.731190068041205e-06, + "lm_loss": 5.9736, + "loss": 1.8019, + "step": 277, + "text_contrastive_loss": 0.9784, + "train_positive_log_prob": -89.1106, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.6488, + "epoch": 0.6275395033860045, + "grad_norm": 18.82988739013672, + "learning_rate": 9.728838171455655e-06, + "lm_loss": 6.1067, + "loss": 1.7215, + "step": 278, + "text_contrastive_loss": 0.924, + "train_positive_log_prob": -91.6936, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0259 + }, + { + "contrastive_loss": 0.4961, + "epoch": 0.6297968397291196, + "grad_norm": 15.423564910888672, + "learning_rate": 9.72647631764513e-06, + "lm_loss": 6.0616, + "loss": 1.485, + "step": 279, + "text_contrastive_loss": 0.7654, + "train_positive_log_prob": -88.407, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0262 + }, + { + "contrastive_loss": 0.5438, + "epoch": 0.6320541760722348, + "grad_norm": 15.89189338684082, + "learning_rate": 9.724104511582838e-06, + "lm_loss": 6.0332, + "loss": 1.5878, + "step": 280, + "text_contrastive_loss": 0.8814, + "train_positive_log_prob": -89.4613, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.6927, + "epoch": 0.6343115124153499, + "grad_norm": 19.552799224853516, + "learning_rate": 9.721722758262948e-06, + "lm_loss": 6.1385, + "loss": 1.7347, + "step": 281, + "text_contrastive_loss": 0.8562, + "train_positive_log_prob": -91.6472, + "train_positive_token_accuracy": 0.0699, + "train_positive_token_prob": 0.0246 + }, + { + "contrastive_loss": 0.5809, + "epoch": 0.636568848758465, + "grad_norm": 15.961932182312012, + "learning_rate": 9.719331062700572e-06, + "lm_loss": 6.0942, + "loss": 1.5922, + "step": 282, + "text_contrastive_loss": 0.8037, + "train_positive_log_prob": -90.2909, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.6324, + "epoch": 0.6388261851015802, + "grad_norm": 17.922679901123047, + "learning_rate": 9.716929429931757e-06, + "lm_loss": 6.0546, + "loss": 1.7432, + "step": 283, + "text_contrastive_loss": 1.0107, + "train_positive_log_prob": -89.4753, + "train_positive_token_accuracy": 0.0722, + "train_positive_token_prob": 0.0254 + }, + { + "contrastive_loss": 0.6418, + "epoch": 0.6410835214446953, + "grad_norm": 20.517499923706055, + "learning_rate": 9.714517865013473e-06, + "lm_loss": 6.1544, + "loss": 1.7021, + "step": 284, + "text_contrastive_loss": 0.8897, + "train_positive_log_prob": -90.8493, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0255 + }, + { + "contrastive_loss": 0.6084, + "epoch": 0.6433408577878104, + "grad_norm": 18.2070369720459, + "learning_rate": 9.712096373023603e-06, + "lm_loss": 6.0911, + "loss": 1.6499, + "step": 285, + "text_contrastive_loss": 0.865, + "train_positive_log_prob": -94.2008, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.7458, + "epoch": 0.6455981941309256, + "grad_norm": 20.294166564941406, + "learning_rate": 9.70966495906094e-06, + "lm_loss": 5.9798, + "loss": 1.9069, + "step": 286, + "text_contrastive_loss": 1.1263, + "train_positive_log_prob": -88.6691, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.5274, + "epoch": 0.6478555304740407, + "grad_norm": 17.86669921875, + "learning_rate": 9.707223628245157e-06, + "lm_loss": 5.9527, + "loss": 1.5507, + "step": 287, + "text_contrastive_loss": 0.856, + "train_positive_log_prob": -88.4569, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.5397, + "epoch": 0.6501128668171557, + "grad_norm": 17.671733856201172, + "learning_rate": 9.70477238571682e-06, + "lm_loss": 6.1065, + "loss": 1.5949, + "step": 288, + "text_contrastive_loss": 0.8893, + "train_positive_log_prob": -90.4553, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0267 + }, + { + "contrastive_loss": 0.7287, + "epoch": 0.6523702031602708, + "grad_norm": 18.42026710510254, + "learning_rate": 9.702311236637357e-06, + "lm_loss": 6.0455, + "loss": 1.8139, + "step": 289, + "text_contrastive_loss": 0.9612, + "train_positive_log_prob": -90.3055, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.6308, + "epoch": 0.654627539503386, + "grad_norm": 17.732276916503906, + "learning_rate": 9.699840186189061e-06, + "lm_loss": 6.0318, + "loss": 1.6461, + "step": 290, + "text_contrastive_loss": 0.8243, + "train_positive_log_prob": -90.9443, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.5274, + "epoch": 0.6568848758465011, + "grad_norm": 15.064861297607422, + "learning_rate": 9.697359239575069e-06, + "lm_loss": 6.1117, + "loss": 1.652, + "step": 291, + "text_contrastive_loss": 1.0269, + "train_positive_log_prob": -93.8008, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.5341, + "epoch": 0.6591422121896162, + "grad_norm": 17.14569854736328, + "learning_rate": 9.694868402019362e-06, + "lm_loss": 6.0816, + "loss": 1.6432, + "step": 292, + "text_contrastive_loss": 1.0019, + "train_positive_log_prob": -90.1991, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0261 + }, + { + "contrastive_loss": 0.744, + "epoch": 0.6613995485327314, + "grad_norm": 16.110231399536133, + "learning_rate": 9.69236767876674e-06, + "lm_loss": 6.0076, + "loss": 1.8819, + "step": 293, + "text_contrastive_loss": 1.0743, + "train_positive_log_prob": -89.9718, + "train_positive_token_accuracy": 0.073, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.5429, + "epoch": 0.6636568848758465, + "grad_norm": 15.464397430419922, + "learning_rate": 9.689857075082828e-06, + "lm_loss": 6.0331, + "loss": 1.6538, + "step": 294, + "text_contrastive_loss": 1.0153, + "train_positive_log_prob": -90.5463, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0282 + }, + { + "contrastive_loss": 0.7329, + "epoch": 0.6659142212189616, + "grad_norm": 18.795854568481445, + "learning_rate": 9.687336596254045e-06, + "lm_loss": 6.1155, + "loss": 1.8078, + "step": 295, + "text_contrastive_loss": 0.9266, + "train_positive_log_prob": -88.817, + "train_positive_token_accuracy": 0.0764, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.6235, + "epoch": 0.6681715575620768, + "grad_norm": 16.672197341918945, + "learning_rate": 9.68480624758761e-06, + "lm_loss": 6.0868, + "loss": 1.6619, + "step": 296, + "text_contrastive_loss": 0.8594, + "train_positive_log_prob": -88.3735, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0261 + }, + { + "contrastive_loss": 0.5046, + "epoch": 0.6704288939051919, + "grad_norm": 15.676862716674805, + "learning_rate": 9.682266034411527e-06, + "lm_loss": 6.0407, + "loss": 1.4822, + "step": 297, + "text_contrastive_loss": 0.7469, + "train_positive_log_prob": -88.7241, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.5503, + "epoch": 0.672686230248307, + "grad_norm": 20.169675827026367, + "learning_rate": 9.679715962074566e-06, + "lm_loss": 6.0401, + "loss": 1.6306, + "step": 298, + "text_contrastive_loss": 0.9526, + "train_positive_log_prob": -88.2241, + "train_positive_token_accuracy": 0.077, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.5345, + "epoch": 0.6749435665914221, + "grad_norm": 15.508736610412598, + "learning_rate": 9.677156035946253e-06, + "lm_loss": 6.0042, + "loss": 1.6291, + "step": 299, + "text_contrastive_loss": 0.9883, + "train_positive_log_prob": -88.866, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.5659, + "epoch": 0.6772009029345373, + "grad_norm": 16.520841598510742, + "learning_rate": 9.674586261416874e-06, + "lm_loss": 6.0084, + "loss": 1.5701, + "step": 300, + "text_contrastive_loss": 0.8068, + "train_positive_log_prob": -87.9608, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.6958, + "epoch": 0.6794582392776524, + "grad_norm": 18.629053115844727, + "learning_rate": 9.672006643897444e-06, + "lm_loss": 6.0449, + "loss": 1.8541, + "step": 301, + "text_contrastive_loss": 1.1076, + "train_positive_log_prob": -88.7139, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.027 + }, + { + "contrastive_loss": 0.5912, + "epoch": 0.6817155756207675, + "grad_norm": 16.101329803466797, + "learning_rate": 9.669417188819704e-06, + "lm_loss": 6.1, + "loss": 1.673, + "step": 302, + "text_contrastive_loss": 0.9436, + "train_positive_log_prob": -91.1838, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.55, + "epoch": 0.6839729119638827, + "grad_norm": 16.299318313598633, + "learning_rate": 9.666817901636115e-06, + "lm_loss": 6.0448, + "loss": 1.6336, + "step": 303, + "text_contrastive_loss": 0.9583, + "train_positive_log_prob": -88.6405, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.5321, + "epoch": 0.6862302483069977, + "grad_norm": 16.60855484008789, + "learning_rate": 9.664208787819833e-06, + "lm_loss": 6.0191, + "loss": 1.5967, + "step": 304, + "text_contrastive_loss": 0.9254, + "train_positive_log_prob": -87.8427, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.5293, + "epoch": 0.6884875846501128, + "grad_norm": 14.58573055267334, + "learning_rate": 9.66158985286471e-06, + "lm_loss": 5.9924, + "loss": 1.5595, + "step": 305, + "text_contrastive_loss": 0.8619, + "train_positive_log_prob": -88.343, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0267 + }, + { + "contrastive_loss": 0.5092, + "epoch": 0.690744920993228, + "grad_norm": 15.70984935760498, + "learning_rate": 9.658961102285276e-06, + "lm_loss": 5.9689, + "loss": 1.5339, + "step": 306, + "text_contrastive_loss": 0.8556, + "train_positive_log_prob": -87.7885, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.6478, + "epoch": 0.6930022573363431, + "grad_norm": 16.367740631103516, + "learning_rate": 9.656322541616734e-06, + "lm_loss": 6.0104, + "loss": 1.6669, + "step": 307, + "text_contrastive_loss": 0.8361, + "train_positive_log_prob": -90.9339, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.6696, + "epoch": 0.6952595936794582, + "grad_norm": 19.108896255493164, + "learning_rate": 9.653674176414936e-06, + "lm_loss": 6.0181, + "loss": 1.7642, + "step": 308, + "text_contrastive_loss": 0.9858, + "train_positive_log_prob": -88.1778, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0271 + }, + { + "contrastive_loss": 0.5434, + "epoch": 0.6975169300225733, + "grad_norm": 15.466970443725586, + "learning_rate": 9.651016012256382e-06, + "lm_loss": 5.9611, + "loss": 1.572, + "step": 309, + "text_contrastive_loss": 0.8651, + "train_positive_log_prob": -86.557, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0277 + }, + { + "contrastive_loss": 0.6418, + "epoch": 0.6997742663656885, + "grad_norm": 16.334604263305664, + "learning_rate": 9.648348054738208e-06, + "lm_loss": 6.1037, + "loss": 1.793, + "step": 310, + "text_contrastive_loss": 1.0817, + "train_positive_log_prob": -88.9581, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.026 + }, + { + "contrastive_loss": 0.4674, + "epoch": 0.7020316027088036, + "grad_norm": 15.780133247375488, + "learning_rate": 9.64567030947817e-06, + "lm_loss": 6.1404, + "loss": 1.4728, + "step": 311, + "text_contrastive_loss": 0.7827, + "train_positive_log_prob": -90.9331, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0264 + }, + { + "contrastive_loss": 0.6237, + "epoch": 0.7042889390519187, + "grad_norm": 18.079483032226562, + "learning_rate": 9.642982782114628e-06, + "lm_loss": 5.9596, + "loss": 1.6928, + "step": 312, + "text_contrastive_loss": 0.9464, + "train_positive_log_prob": -86.2285, + "train_positive_token_accuracy": 0.0734, + "train_positive_token_prob": 0.0259 + }, + { + "contrastive_loss": 0.6042, + "epoch": 0.7065462753950339, + "grad_norm": 16.58669090270996, + "learning_rate": 9.640285478306546e-06, + "lm_loss": 5.9693, + "loss": 1.7108, + "step": 313, + "text_contrastive_loss": 1.0194, + "train_positive_log_prob": -87.3963, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.4788, + "epoch": 0.708803611738149, + "grad_norm": 14.912610054016113, + "learning_rate": 9.63757840373347e-06, + "lm_loss": 5.9504, + "loss": 1.4448, + "step": 314, + "text_contrastive_loss": 0.742, + "train_positive_log_prob": -86.3959, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.5168, + "epoch": 0.7110609480812641, + "grad_norm": 15.224937438964844, + "learning_rate": 9.634861564095525e-06, + "lm_loss": 6.1028, + "loss": 1.568, + "step": 315, + "text_contrastive_loss": 0.882, + "train_positive_log_prob": -87.4658, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.026 + }, + { + "contrastive_loss": 0.6257, + "epoch": 0.7133182844243793, + "grad_norm": 18.58464241027832, + "learning_rate": 9.632134965113389e-06, + "lm_loss": 5.8945, + "loss": 1.8267, + "step": 316, + "text_contrastive_loss": 1.2232, + "train_positive_log_prob": -88.2119, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.6276, + "epoch": 0.7155756207674944, + "grad_norm": 19.075607299804688, + "learning_rate": 9.629398612528299e-06, + "lm_loss": 6.1204, + "loss": 1.7269, + "step": 317, + "text_contrastive_loss": 0.9745, + "train_positive_log_prob": -93.1918, + "train_positive_token_accuracy": 0.0734, + "train_positive_token_prob": 0.0265 + }, + { + "contrastive_loss": 0.5368, + "epoch": 0.7178329571106095, + "grad_norm": 15.370393753051758, + "learning_rate": 9.626652512102021e-06, + "lm_loss": 6.0609, + "loss": 1.6136, + "step": 318, + "text_contrastive_loss": 0.9414, + "train_positive_log_prob": -89.9175, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0271 + }, + { + "contrastive_loss": 0.511, + "epoch": 0.7200902934537246, + "grad_norm": 14.638890266418457, + "learning_rate": 9.623896669616855e-06, + "lm_loss": 6.01, + "loss": 1.5746, + "step": 319, + "text_contrastive_loss": 0.9252, + "train_positive_log_prob": -87.8223, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0263 + }, + { + "contrastive_loss": 0.5293, + "epoch": 0.7223476297968398, + "grad_norm": 15.804308891296387, + "learning_rate": 9.621131090875603e-06, + "lm_loss": 5.9965, + "loss": 1.5178, + "step": 320, + "text_contrastive_loss": 0.7776, + "train_positive_log_prob": -87.047, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.026 + }, + { + "contrastive_loss": 0.7324, + "epoch": 0.7246049661399548, + "grad_norm": 16.726764678955078, + "learning_rate": 9.618355781701584e-06, + "lm_loss": 5.8973, + "loss": 1.725, + "step": 321, + "text_contrastive_loss": 0.8058, + "train_positive_log_prob": -87.1469, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0274 + }, + { + "contrastive_loss": 0.6087, + "epoch": 0.7268623024830699, + "grad_norm": 16.43556022644043, + "learning_rate": 9.61557074793859e-06, + "lm_loss": 6.008, + "loss": 1.7036, + "step": 322, + "text_contrastive_loss": 0.9882, + "train_positive_log_prob": -87.3777, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0277 + }, + { + "contrastive_loss": 0.7146, + "epoch": 0.7291196388261851, + "grad_norm": 17.283023834228516, + "learning_rate": 9.612775995450896e-06, + "lm_loss": 6.1591, + "loss": 1.8088, + "step": 323, + "text_contrastive_loss": 0.9567, + "train_positive_log_prob": -92.0007, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0261 + }, + { + "contrastive_loss": 0.568, + "epoch": 0.7313769751693002, + "grad_norm": 18.272823333740234, + "learning_rate": 9.609971530123243e-06, + "lm_loss": 5.9726, + "loss": 1.5599, + "step": 324, + "text_contrastive_loss": 0.7891, + "train_positive_log_prob": -88.33, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0271 + }, + { + "contrastive_loss": 0.5346, + "epoch": 0.7336343115124153, + "grad_norm": 16.084165573120117, + "learning_rate": 9.607157357860823e-06, + "lm_loss": 6.1526, + "loss": 1.622, + "step": 325, + "text_contrastive_loss": 0.9443, + "train_positive_log_prob": -92.1002, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.0263 + }, + { + "contrastive_loss": 0.618, + "epoch": 0.7358916478555305, + "grad_norm": 18.810020446777344, + "learning_rate": 9.604333484589266e-06, + "lm_loss": 5.9696, + "loss": 1.695, + "step": 326, + "text_contrastive_loss": 0.9601, + "train_positive_log_prob": -88.8006, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.0265 + }, + { + "contrastive_loss": 0.5853, + "epoch": 0.7381489841986456, + "grad_norm": 17.179550170898438, + "learning_rate": 9.601499916254626e-06, + "lm_loss": 5.9537, + "loss": 1.6728, + "step": 327, + "text_contrastive_loss": 0.9842, + "train_positive_log_prob": -88.3761, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0282 + }, + { + "contrastive_loss": 0.5711, + "epoch": 0.7404063205417607, + "grad_norm": 15.827096939086914, + "learning_rate": 9.598656658823378e-06, + "lm_loss": 6.0092, + "loss": 1.6077, + "step": 328, + "text_contrastive_loss": 0.8714, + "train_positive_log_prob": -87.3207, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.4781, + "epoch": 0.7426636568848759, + "grad_norm": 13.675418853759766, + "learning_rate": 9.595803718282391e-06, + "lm_loss": 6.0014, + "loss": 1.4617, + "step": 329, + "text_contrastive_loss": 0.7669, + "train_positive_log_prob": -89.9279, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.5728, + "epoch": 0.744920993227991, + "grad_norm": 15.08580493927002, + "learning_rate": 9.59294110063893e-06, + "lm_loss": 5.9199, + "loss": 1.5606, + "step": 330, + "text_contrastive_loss": 0.7916, + "train_positive_log_prob": -88.2864, + "train_positive_token_accuracy": 0.0836, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.6369, + "epoch": 0.7471783295711061, + "grad_norm": 15.638589859008789, + "learning_rate": 9.590068811920637e-06, + "lm_loss": 6.0176, + "loss": 1.7106, + "step": 331, + "text_contrastive_loss": 0.944, + "train_positive_log_prob": -87.8237, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.4567, + "epoch": 0.7494356659142212, + "grad_norm": 13.649426460266113, + "learning_rate": 9.587186858175507e-06, + "lm_loss": 5.9268, + "loss": 1.4566, + "step": 332, + "text_contrastive_loss": 0.8145, + "train_positive_log_prob": -87.7401, + "train_positive_token_accuracy": 0.0858, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.5302, + "epoch": 0.7516930022573364, + "grad_norm": 13.907849311828613, + "learning_rate": 9.584295245471898e-06, + "lm_loss": 6.095, + "loss": 1.5373, + "step": 333, + "text_contrastive_loss": 0.7951, + "train_positive_log_prob": -90.3569, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.6269, + "epoch": 0.7539503386004515, + "grad_norm": 17.2265682220459, + "learning_rate": 9.581393979898502e-06, + "lm_loss": 6.0453, + "loss": 1.6644, + "step": 334, + "text_contrastive_loss": 0.8658, + "train_positive_log_prob": -90.305, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.5781, + "epoch": 0.7562076749435666, + "grad_norm": 17.97529411315918, + "learning_rate": 9.578483067564335e-06, + "lm_loss": 5.9504, + "loss": 1.6473, + "step": 335, + "text_contrastive_loss": 0.9484, + "train_positive_log_prob": -88.7701, + "train_positive_token_accuracy": 0.0694, + "train_positive_token_prob": 0.0252 + }, + { + "contrastive_loss": 0.5501, + "epoch": 0.7584650112866818, + "grad_norm": 15.79790210723877, + "learning_rate": 9.575562514598727e-06, + "lm_loss": 6.0716, + "loss": 1.5896, + "step": 336, + "text_contrastive_loss": 0.8647, + "train_positive_log_prob": -90.6702, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.0271 + }, + { + "contrastive_loss": 0.494, + "epoch": 0.7607223476297968, + "grad_norm": 14.60803508758545, + "learning_rate": 9.572632327151309e-06, + "lm_loss": 5.9756, + "loss": 1.4763, + "step": 337, + "text_contrastive_loss": 0.7693, + "train_positive_log_prob": -87.2331, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0275 + }, + { + "contrastive_loss": 0.5638, + "epoch": 0.7629796839729119, + "grad_norm": 16.311325073242188, + "learning_rate": 9.569692511391995e-06, + "lm_loss": 6.0347, + "loss": 1.6862, + "step": 338, + "text_contrastive_loss": 1.0379, + "train_positive_log_prob": -89.7927, + "train_positive_token_accuracy": 0.0709, + "train_positive_token_prob": 0.0256 + }, + { + "contrastive_loss": 0.63, + "epoch": 0.7652370203160271, + "grad_norm": 19.387428283691406, + "learning_rate": 9.566743073510976e-06, + "lm_loss": 5.8585, + "loss": 1.634, + "step": 339, + "text_contrastive_loss": 0.8361, + "train_positive_log_prob": -84.7889, + "train_positive_token_accuracy": 0.0807, + "train_positive_token_prob": 0.0282 + }, + { + "contrastive_loss": 0.5412, + "epoch": 0.7674943566591422, + "grad_norm": 14.615008354187012, + "learning_rate": 9.563784019718704e-06, + "lm_loss": 6.014, + "loss": 1.5382, + "step": 340, + "text_contrastive_loss": 0.7911, + "train_positive_log_prob": -87.5809, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0274 + }, + { + "contrastive_loss": 0.6141, + "epoch": 0.7697516930022573, + "grad_norm": 15.120708465576172, + "learning_rate": 9.560815356245875e-06, + "lm_loss": 5.8754, + "loss": 1.6377, + "step": 341, + "text_contrastive_loss": 0.8721, + "train_positive_log_prob": -86.847, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.56, + "epoch": 0.7720090293453724, + "grad_norm": 16.741840362548828, + "learning_rate": 9.557837089343424e-06, + "lm_loss": 6.0862, + "loss": 1.5532, + "step": 342, + "text_contrastive_loss": 0.7692, + "train_positive_log_prob": -92.213, + "train_positive_token_accuracy": 0.0704, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.6168, + "epoch": 0.7742663656884876, + "grad_norm": 16.309051513671875, + "learning_rate": 9.554849225282503e-06, + "lm_loss": 6.1134, + "loss": 1.6829, + "step": 343, + "text_contrastive_loss": 0.9094, + "train_positive_log_prob": -91.3158, + "train_positive_token_accuracy": 0.0677, + "train_positive_token_prob": 0.0254 + }, + { + "contrastive_loss": 0.6046, + "epoch": 0.7765237020316027, + "grad_norm": 15.494705200195312, + "learning_rate": 9.551851770354477e-06, + "lm_loss": 6.1145, + "loss": 1.6974, + "step": 344, + "text_contrastive_loss": 0.9626, + "train_positive_log_prob": -89.1876, + "train_positive_token_accuracy": 0.0731, + "train_positive_token_prob": 0.0259 + }, + { + "contrastive_loss": 0.6118, + "epoch": 0.7787810383747178, + "grad_norm": 19.388378143310547, + "learning_rate": 9.548844730870903e-06, + "lm_loss": 6.0134, + "loss": 1.7066, + "step": 345, + "text_contrastive_loss": 0.987, + "train_positive_log_prob": -89.7113, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0274 + }, + { + "contrastive_loss": 0.6835, + "epoch": 0.781038374717833, + "grad_norm": 18.696331024169922, + "learning_rate": 9.545828113163516e-06, + "lm_loss": 6.0323, + "loss": 1.7312, + "step": 346, + "text_contrastive_loss": 0.8889, + "train_positive_log_prob": -88.3829, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0274 + }, + { + "contrastive_loss": 0.5436, + "epoch": 0.7832957110609481, + "grad_norm": 16.435501098632812, + "learning_rate": 9.542801923584228e-06, + "lm_loss": 6.0153, + "loss": 1.5761, + "step": 347, + "text_contrastive_loss": 0.862, + "train_positive_log_prob": -89.1267, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0265 + }, + { + "contrastive_loss": 0.6617, + "epoch": 0.7855530474040632, + "grad_norm": 16.152393341064453, + "learning_rate": 9.5397661685051e-06, + "lm_loss": 5.9657, + "loss": 1.6929, + "step": 348, + "text_contrastive_loss": 0.8693, + "train_positive_log_prob": -86.4758, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0274 + }, + { + "contrastive_loss": 0.638, + "epoch": 0.7878103837471784, + "grad_norm": 16.217805862426758, + "learning_rate": 9.536720854318333e-06, + "lm_loss": 6.0146, + "loss": 1.7951, + "step": 349, + "text_contrastive_loss": 1.1114, + "train_positive_log_prob": -87.0471, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.0258 + }, + { + "contrastive_loss": 0.5622, + "epoch": 0.7900677200902935, + "grad_norm": 16.489063262939453, + "learning_rate": 9.533665987436262e-06, + "lm_loss": 6.0753, + "loss": 1.625, + "step": 350, + "text_contrastive_loss": 0.9106, + "train_positive_log_prob": -91.5571, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.4797, + "epoch": 0.7923250564334086, + "grad_norm": 13.74478530883789, + "learning_rate": 9.530601574291331e-06, + "lm_loss": 5.9487, + "loss": 1.4712, + "step": 351, + "text_contrastive_loss": 0.7932, + "train_positive_log_prob": -87.9771, + "train_positive_token_accuracy": 0.0713, + "train_positive_token_prob": 0.0263 + }, + { + "contrastive_loss": 0.577, + "epoch": 0.7945823927765236, + "grad_norm": 16.449974060058594, + "learning_rate": 9.527527621336087e-06, + "lm_loss": 6.0159, + "loss": 1.6129, + "step": 352, + "text_contrastive_loss": 0.8686, + "train_positive_log_prob": -89.9788, + "train_positive_token_accuracy": 0.0682, + "train_positive_token_prob": 0.027 + }, + { + "contrastive_loss": 0.5742, + "epoch": 0.7968397291196389, + "grad_norm": 13.938642501831055, + "learning_rate": 9.524444135043168e-06, + "lm_loss": 5.9435, + "loss": 1.614, + "step": 353, + "text_contrastive_loss": 0.8909, + "train_positive_log_prob": -86.781, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0268 + }, + { + "contrastive_loss": 0.5088, + "epoch": 0.7990970654627539, + "grad_norm": 14.588152885437012, + "learning_rate": 9.521351121905278e-06, + "lm_loss": 5.9525, + "loss": 1.5603, + "step": 354, + "text_contrastive_loss": 0.9125, + "train_positive_log_prob": -87.9171, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0265 + }, + { + "contrastive_loss": 0.4601, + "epoch": 0.801354401805869, + "grad_norm": 16.702144622802734, + "learning_rate": 9.518248588435185e-06, + "lm_loss": 6.1349, + "loss": 1.4862, + "step": 355, + "text_contrastive_loss": 0.8252, + "train_positive_log_prob": -90.8901, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.644, + "epoch": 0.8036117381489842, + "grad_norm": 16.598875045776367, + "learning_rate": 9.515136541165708e-06, + "lm_loss": 5.8817, + "loss": 1.6703, + "step": 356, + "text_contrastive_loss": 0.8762, + "train_positive_log_prob": -86.1602, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0271 + }, + { + "contrastive_loss": 0.5047, + "epoch": 0.8058690744920993, + "grad_norm": 14.9662504196167, + "learning_rate": 9.512014986649691e-06, + "lm_loss": 5.9766, + "loss": 1.5899, + "step": 357, + "text_contrastive_loss": 0.975, + "train_positive_log_prob": -89.181, + "train_positive_token_accuracy": 0.0688, + "train_positive_token_prob": 0.0258 + }, + { + "contrastive_loss": 0.5079, + "epoch": 0.8081264108352144, + "grad_norm": 17.661785125732422, + "learning_rate": 9.50888393146e-06, + "lm_loss": 5.9192, + "loss": 1.58, + "step": 358, + "text_contrastive_loss": 0.9603, + "train_positive_log_prob": -84.8167, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.027 + }, + { + "contrastive_loss": 0.561, + "epoch": 0.8103837471783296, + "grad_norm": 15.982047080993652, + "learning_rate": 9.50574338218951e-06, + "lm_loss": 5.9751, + "loss": 1.5926, + "step": 359, + "text_contrastive_loss": 0.8683, + "train_positive_log_prob": -87.3648, + "train_positive_token_accuracy": 0.0727, + "train_positive_token_prob": 0.0257 + }, + { + "contrastive_loss": 0.6164, + "epoch": 0.8126410835214447, + "grad_norm": 16.443511962890625, + "learning_rate": 9.502593345451078e-06, + "lm_loss": 5.9671, + "loss": 1.6524, + "step": 360, + "text_contrastive_loss": 0.8785, + "train_positive_log_prob": -88.3896, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.4518, + "epoch": 0.8148984198645598, + "grad_norm": 14.49591064453125, + "learning_rate": 9.499433827877547e-06, + "lm_loss": 5.9642, + "loss": 1.5254, + "step": 361, + "text_contrastive_loss": 0.9544, + "train_positive_log_prob": -85.2224, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.4817, + "epoch": 0.8171557562076749, + "grad_norm": 15.663738250732422, + "learning_rate": 9.49626483612172e-06, + "lm_loss": 5.909, + "loss": 1.4851, + "step": 362, + "text_contrastive_loss": 0.8251, + "train_positive_log_prob": -86.5855, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0262 + }, + { + "contrastive_loss": 0.5994, + "epoch": 0.8194130925507901, + "grad_norm": 18.769023895263672, + "learning_rate": 9.493086376856346e-06, + "lm_loss": 5.913, + "loss": 1.7104, + "step": 363, + "text_contrastive_loss": 1.0395, + "train_positive_log_prob": -87.1714, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.5879, + "epoch": 0.8216704288939052, + "grad_norm": 17.848712921142578, + "learning_rate": 9.489898456774116e-06, + "lm_loss": 5.9088, + "loss": 1.6593, + "step": 364, + "text_contrastive_loss": 0.9611, + "train_positive_log_prob": -86.1629, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0277 + }, + { + "contrastive_loss": 0.5684, + "epoch": 0.8239277652370203, + "grad_norm": 15.90553092956543, + "learning_rate": 9.486701082587635e-06, + "lm_loss": 5.9272, + "loss": 1.5514, + "step": 365, + "text_contrastive_loss": 0.7805, + "train_positive_log_prob": -86.9133, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.56, + "epoch": 0.8261851015801355, + "grad_norm": 17.990188598632812, + "learning_rate": 9.483494261029418e-06, + "lm_loss": 5.8794, + "loss": 1.5516, + "step": 366, + "text_contrastive_loss": 0.8074, + "train_positive_log_prob": -86.2323, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.6597, + "epoch": 0.8284424379232506, + "grad_norm": 17.917858123779297, + "learning_rate": 9.480277998851875e-06, + "lm_loss": 5.9231, + "loss": 1.7323, + "step": 367, + "text_contrastive_loss": 0.9606, + "train_positive_log_prob": -88.1575, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.5715, + "epoch": 0.8306997742663657, + "grad_norm": 17.245140075683594, + "learning_rate": 9.47705230282729e-06, + "lm_loss": 5.8636, + "loss": 1.6157, + "step": 368, + "text_contrastive_loss": 0.9156, + "train_positive_log_prob": -87.4541, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.5463, + "epoch": 0.8329571106094809, + "grad_norm": 15.327033996582031, + "learning_rate": 9.473817179747815e-06, + "lm_loss": 5.9957, + "loss": 1.5364, + "step": 369, + "text_contrastive_loss": 0.781, + "train_positive_log_prob": -88.5815, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.7253, + "epoch": 0.835214446952596, + "grad_norm": 17.313541412353516, + "learning_rate": 9.470572636425451e-06, + "lm_loss": 6.002, + "loss": 1.8759, + "step": 370, + "text_contrastive_loss": 1.1009, + "train_positive_log_prob": -89.3764, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.5523, + "epoch": 0.837471783295711, + "grad_norm": 16.950382232666016, + "learning_rate": 9.467318679692031e-06, + "lm_loss": 5.8676, + "loss": 1.6445, + "step": 371, + "text_contrastive_loss": 1.0108, + "train_positive_log_prob": -87.6269, + "train_positive_token_accuracy": 0.0824, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.4851, + "epoch": 0.8397291196388262, + "grad_norm": 15.869084358215332, + "learning_rate": 9.464055316399217e-06, + "lm_loss": 5.9227, + "loss": 1.491, + "step": 372, + "text_contrastive_loss": 0.8272, + "train_positive_log_prob": -88.1466, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.5553, + "epoch": 0.8419864559819413, + "grad_norm": 16.377538681030273, + "learning_rate": 9.46078255341847e-06, + "lm_loss": 5.909, + "loss": 1.5253, + "step": 373, + "text_contrastive_loss": 0.7581, + "train_positive_log_prob": -86.6937, + "train_positive_token_accuracy": 0.0876, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.5857, + "epoch": 0.8442437923250564, + "grad_norm": 15.260660171508789, + "learning_rate": 9.457500397641049e-06, + "lm_loss": 5.9267, + "loss": 1.6021, + "step": 374, + "text_contrastive_loss": 0.8473, + "train_positive_log_prob": -89.8467, + "train_positive_token_accuracy": 0.0835, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.5048, + "epoch": 0.8465011286681715, + "grad_norm": 15.242878913879395, + "learning_rate": 9.454208855977986e-06, + "lm_loss": 5.9061, + "loss": 1.498, + "step": 375, + "text_contrastive_loss": 0.8053, + "train_positive_log_prob": -86.8605, + "train_positive_token_accuracy": 0.0845, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.4818, + "epoch": 0.8487584650112867, + "grad_norm": 15.809320449829102, + "learning_rate": 9.450907935360081e-06, + "lm_loss": 5.781, + "loss": 1.507, + "step": 376, + "text_contrastive_loss": 0.8943, + "train_positive_log_prob": -84.8604, + "train_positive_token_accuracy": 0.0854, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.643, + "epoch": 0.8510158013544018, + "grad_norm": 16.839052200317383, + "learning_rate": 9.447597642737878e-06, + "lm_loss": 5.9999, + "loss": 1.7132, + "step": 377, + "text_contrastive_loss": 0.9403, + "train_positive_log_prob": -90.0478, + "train_positive_token_accuracy": 0.0877, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.5483, + "epoch": 0.8532731376975169, + "grad_norm": 17.23625946044922, + "learning_rate": 9.44427798508166e-06, + "lm_loss": 5.9502, + "loss": 1.571, + "step": 378, + "text_contrastive_loss": 0.8553, + "train_positive_log_prob": -86.0941, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.5727, + "epoch": 0.8555304740406321, + "grad_norm": 15.801608085632324, + "learning_rate": 9.440948969381425e-06, + "lm_loss": 5.8905, + "loss": 1.6178, + "step": 379, + "text_contrastive_loss": 0.9121, + "train_positive_log_prob": -87.5111, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.5745, + "epoch": 0.8577878103837472, + "grad_norm": 15.277762413024902, + "learning_rate": 9.437610602646878e-06, + "lm_loss": 5.9217, + "loss": 1.563, + "step": 380, + "text_contrastive_loss": 0.7927, + "train_positive_log_prob": -88.6911, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.5893, + "epoch": 0.8600451467268623, + "grad_norm": 15.415613174438477, + "learning_rate": 9.434262891907413e-06, + "lm_loss": 5.9829, + "loss": 1.599, + "step": 381, + "text_contrastive_loss": 0.8228, + "train_positive_log_prob": -87.9692, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0268 + }, + { + "contrastive_loss": 0.5768, + "epoch": 0.8623024830699775, + "grad_norm": 16.42424201965332, + "learning_rate": 9.430905844212102e-06, + "lm_loss": 5.9006, + "loss": 1.6524, + "step": 382, + "text_contrastive_loss": 0.9712, + "train_positive_log_prob": -85.9495, + "train_positive_token_accuracy": 0.0727, + "train_positive_token_prob": 0.0259 + }, + { + "contrastive_loss": 0.6797, + "epoch": 0.8645598194130926, + "grad_norm": 19.66604232788086, + "learning_rate": 9.427539466629672e-06, + "lm_loss": 5.8311, + "loss": 1.6869, + "step": 383, + "text_contrastive_loss": 0.8481, + "train_positive_log_prob": -86.2587, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0277 + }, + { + "contrastive_loss": 0.4721, + "epoch": 0.8668171557562077, + "grad_norm": 15.0642671585083, + "learning_rate": 9.424163766248499e-06, + "lm_loss": 6.01, + "loss": 1.4261, + "step": 384, + "text_contrastive_loss": 0.7058, + "train_positive_log_prob": -90.3445, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0261 + }, + { + "contrastive_loss": 0.6619, + "epoch": 0.8690744920993227, + "grad_norm": 17.312721252441406, + "learning_rate": 9.420778750176588e-06, + "lm_loss": 5.8863, + "loss": 1.7113, + "step": 385, + "text_contrastive_loss": 0.9214, + "train_positive_log_prob": -86.7322, + "train_positive_token_accuracy": 0.0734, + "train_positive_token_prob": 0.0267 + }, + { + "contrastive_loss": 0.6184, + "epoch": 0.871331828442438, + "grad_norm": 17.015729904174805, + "learning_rate": 9.41738442554156e-06, + "lm_loss": 5.9385, + "loss": 1.6154, + "step": 386, + "text_contrastive_loss": 0.8063, + "train_positive_log_prob": -89.7967, + "train_positive_token_accuracy": 0.0827, + "train_positive_token_prob": 0.027 + }, + { + "contrastive_loss": 0.6557, + "epoch": 0.873589164785553, + "grad_norm": 17.221759796142578, + "learning_rate": 9.41398079949064e-06, + "lm_loss": 5.744, + "loss": 1.6751, + "step": 387, + "text_contrastive_loss": 0.8901, + "train_positive_log_prob": -82.8399, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0274 + }, + { + "contrastive_loss": 0.6435, + "epoch": 0.8758465011286681, + "grad_norm": 15.763158798217773, + "learning_rate": 9.41056787919063e-06, + "lm_loss": 5.9629, + "loss": 1.6614, + "step": 388, + "text_contrastive_loss": 0.8432, + "train_positive_log_prob": -88.813, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0268 + }, + { + "contrastive_loss": 0.5498, + "epoch": 0.8781038374717833, + "grad_norm": 15.931221008300781, + "learning_rate": 9.407145671827909e-06, + "lm_loss": 5.8882, + "loss": 1.5251, + "step": 389, + "text_contrastive_loss": 0.7728, + "train_positive_log_prob": -87.4479, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.5045, + "epoch": 0.8803611738148984, + "grad_norm": 15.974322319030762, + "learning_rate": 9.403714184608411e-06, + "lm_loss": 5.8505, + "loss": 1.5032, + "step": 390, + "text_contrastive_loss": 0.8274, + "train_positive_log_prob": -87.7139, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.6073, + "epoch": 0.8826185101580135, + "grad_norm": 15.8613862991333, + "learning_rate": 9.400273424757607e-06, + "lm_loss": 5.783, + "loss": 1.6205, + "step": 391, + "text_contrastive_loss": 0.87, + "train_positive_log_prob": -86.5663, + "train_positive_token_accuracy": 0.0848, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.5515, + "epoch": 0.8848758465011287, + "grad_norm": 15.88589859008789, + "learning_rate": 9.396823399520495e-06, + "lm_loss": 5.937, + "loss": 1.6114, + "step": 392, + "text_contrastive_loss": 0.9325, + "train_positive_log_prob": -89.8743, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.027 + }, + { + "contrastive_loss": 0.6382, + "epoch": 0.8871331828442438, + "grad_norm": 18.790132522583008, + "learning_rate": 9.393364116161582e-06, + "lm_loss": 5.9734, + "loss": 1.7433, + "step": 393, + "text_contrastive_loss": 1.0156, + "train_positive_log_prob": -87.2908, + "train_positive_token_accuracy": 0.0714, + "train_positive_token_prob": 0.026 + }, + { + "contrastive_loss": 0.5867, + "epoch": 0.8893905191873589, + "grad_norm": 15.837054252624512, + "learning_rate": 9.38989558196487e-06, + "lm_loss": 5.8251, + "loss": 1.6473, + "step": 394, + "text_contrastive_loss": 0.956, + "train_positive_log_prob": -85.8974, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.7086, + "epoch": 0.891647855530474, + "grad_norm": 18.514312744140625, + "learning_rate": 9.386417804233836e-06, + "lm_loss": 5.9453, + "loss": 1.7063, + "step": 395, + "text_contrastive_loss": 0.8063, + "train_positive_log_prob": -88.3693, + "train_positive_token_accuracy": 0.0723, + "train_positive_token_prob": 0.0261 + }, + { + "contrastive_loss": 0.6228, + "epoch": 0.8939051918735892, + "grad_norm": 16.663631439208984, + "learning_rate": 9.382930790291426e-06, + "lm_loss": 5.922, + "loss": 1.6739, + "step": 396, + "text_contrastive_loss": 0.918, + "train_positive_log_prob": -88.758, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.4702, + "epoch": 0.8961625282167043, + "grad_norm": 12.86646556854248, + "learning_rate": 9.37943454748003e-06, + "lm_loss": 6.0231, + "loss": 1.4457, + "step": 397, + "text_contrastive_loss": 0.7464, + "train_positive_log_prob": -89.2865, + "train_positive_token_accuracy": 0.0711, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.5632, + "epoch": 0.8984198645598194, + "grad_norm": 16.527372360229492, + "learning_rate": 9.375929083161475e-06, + "lm_loss": 5.9265, + "loss": 1.6135, + "step": 398, + "text_contrastive_loss": 0.9152, + "train_positive_log_prob": -86.9769, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.5095, + "epoch": 0.9006772009029346, + "grad_norm": 15.113215446472168, + "learning_rate": 9.372414404717001e-06, + "lm_loss": 5.7751, + "loss": 1.4844, + "step": 399, + "text_contrastive_loss": 0.7947, + "train_positive_log_prob": -85.9039, + "train_positive_token_accuracy": 0.0738, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.5488, + "epoch": 0.9029345372460497, + "grad_norm": 18.123912811279297, + "learning_rate": 9.36889051954725e-06, + "lm_loss": 5.9217, + "loss": 1.5527, + "step": 400, + "text_contrastive_loss": 0.8235, + "train_positive_log_prob": -86.2131, + "train_positive_token_accuracy": 0.0727, + "train_positive_token_prob": 0.0265 + }, + { + "contrastive_loss": 0.6158, + "epoch": 0.9051918735891648, + "grad_norm": 17.38731575012207, + "learning_rate": 9.365357435072255e-06, + "lm_loss": 5.9116, + "loss": 1.6158, + "step": 401, + "text_contrastive_loss": 0.8176, + "train_positive_log_prob": -88.9062, + "train_positive_token_accuracy": 0.0844, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.5791, + "epoch": 0.90744920993228, + "grad_norm": 16.729616165161133, + "learning_rate": 9.361815158731413e-06, + "lm_loss": 5.9322, + "loss": 1.6214, + "step": 402, + "text_contrastive_loss": 0.8981, + "train_positive_log_prob": -87.8731, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.682, + "epoch": 0.909706546275395, + "grad_norm": 19.316072463989258, + "learning_rate": 9.358263697983479e-06, + "lm_loss": 5.7965, + "loss": 1.7201, + "step": 403, + "text_contrastive_loss": 0.9169, + "train_positive_log_prob": -85.2488, + "train_positive_token_accuracy": 0.0738, + "train_positive_token_prob": 0.0275 + }, + { + "contrastive_loss": 0.5819, + "epoch": 0.9119638826185101, + "grad_norm": 17.301490783691406, + "learning_rate": 9.354703060306546e-06, + "lm_loss": 5.8588, + "loss": 1.6453, + "step": 404, + "text_contrastive_loss": 0.9551, + "train_positive_log_prob": -85.1157, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.5187, + "epoch": 0.9142212189616253, + "grad_norm": 17.567800521850586, + "learning_rate": 9.351133253198027e-06, + "lm_loss": 5.9289, + "loss": 1.587, + "step": 405, + "text_contrastive_loss": 0.9508, + "train_positive_log_prob": -88.7766, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0275 + }, + { + "contrastive_loss": 0.565, + "epoch": 0.9164785553047404, + "grad_norm": 16.85062599182129, + "learning_rate": 9.347554284174654e-06, + "lm_loss": 5.8882, + "loss": 1.6227, + "step": 406, + "text_contrastive_loss": 0.9377, + "train_positive_log_prob": -86.8987, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.6226, + "epoch": 0.9187358916478555, + "grad_norm": 17.073469161987305, + "learning_rate": 9.343966160772438e-06, + "lm_loss": 5.9151, + "loss": 1.6906, + "step": 407, + "text_contrastive_loss": 0.9529, + "train_positive_log_prob": -85.957, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0267 + }, + { + "contrastive_loss": 0.5385, + "epoch": 0.9209932279909706, + "grad_norm": 14.826162338256836, + "learning_rate": 9.340368890546672e-06, + "lm_loss": 5.9824, + "loss": 1.5431, + "step": 408, + "text_contrastive_loss": 0.8127, + "train_positive_log_prob": -89.7344, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.027 + }, + { + "contrastive_loss": 0.4769, + "epoch": 0.9232505643340858, + "grad_norm": 15.338921546936035, + "learning_rate": 9.336762481071906e-06, + "lm_loss": 5.9406, + "loss": 1.4438, + "step": 409, + "text_contrastive_loss": 0.7459, + "train_positive_log_prob": -87.5311, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.5995, + "epoch": 0.9255079006772009, + "grad_norm": 16.352493286132812, + "learning_rate": 9.333146939941938e-06, + "lm_loss": 5.9679, + "loss": 1.6731, + "step": 410, + "text_contrastive_loss": 0.9537, + "train_positive_log_prob": -89.3072, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0271 + }, + { + "contrastive_loss": 0.6094, + "epoch": 0.927765237020316, + "grad_norm": 14.538776397705078, + "learning_rate": 9.329522274769791e-06, + "lm_loss": 5.81, + "loss": 1.5912, + "step": 411, + "text_contrastive_loss": 0.8016, + "train_positive_log_prob": -83.6767, + "train_positive_token_accuracy": 0.0776, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.6187, + "epoch": 0.9300225733634312, + "grad_norm": 17.85922622680664, + "learning_rate": 9.325888493187699e-06, + "lm_loss": 6.0291, + "loss": 1.7039, + "step": 412, + "text_contrastive_loss": 0.9646, + "train_positive_log_prob": -90.7396, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.57, + "epoch": 0.9322799097065463, + "grad_norm": 16.823604583740234, + "learning_rate": 9.322245602847094e-06, + "lm_loss": 5.9313, + "loss": 1.6159, + "step": 413, + "text_contrastive_loss": 0.9055, + "train_positive_log_prob": -88.0885, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0264 + }, + { + "contrastive_loss": 0.5034, + "epoch": 0.9345372460496614, + "grad_norm": 15.533576965332031, + "learning_rate": 9.31859361141859e-06, + "lm_loss": 5.9045, + "loss": 1.4692, + "step": 414, + "text_contrastive_loss": 0.7508, + "train_positive_log_prob": -83.6319, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.5157, + "epoch": 0.9367945823927766, + "grad_norm": 16.13719367980957, + "learning_rate": 9.314932526591956e-06, + "lm_loss": 5.9571, + "loss": 1.4991, + "step": 415, + "text_contrastive_loss": 0.7753, + "train_positive_log_prob": -89.6366, + "train_positive_token_accuracy": 0.0748, + "train_positive_token_prob": 0.0265 + }, + { + "contrastive_loss": 0.6012, + "epoch": 0.9390519187358917, + "grad_norm": 15.884916305541992, + "learning_rate": 9.311262356076118e-06, + "lm_loss": 5.8162, + "loss": 1.66, + "step": 416, + "text_contrastive_loss": 0.9543, + "train_positive_log_prob": -85.0836, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.027 + }, + { + "contrastive_loss": 0.581, + "epoch": 0.9413092550790068, + "grad_norm": 17.31962776184082, + "learning_rate": 9.30758310759913e-06, + "lm_loss": 5.8727, + "loss": 1.5944, + "step": 417, + "text_contrastive_loss": 0.8524, + "train_positive_log_prob": -89.1009, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.6036, + "epoch": 0.9435665914221218, + "grad_norm": 17.83316421508789, + "learning_rate": 9.303894788908158e-06, + "lm_loss": 5.8111, + "loss": 1.609, + "step": 418, + "text_contrastive_loss": 0.8486, + "train_positive_log_prob": -85.8924, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0277 + }, + { + "contrastive_loss": 0.5781, + "epoch": 0.945823927765237, + "grad_norm": 16.921295166015625, + "learning_rate": 9.300197407769472e-06, + "lm_loss": 5.7745, + "loss": 1.6715, + "step": 419, + "text_contrastive_loss": 1.0319, + "train_positive_log_prob": -84.1146, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.4572, + "epoch": 0.9480812641083521, + "grad_norm": 15.227783203125, + "learning_rate": 9.296490971968416e-06, + "lm_loss": 5.843, + "loss": 1.4378, + "step": 420, + "text_contrastive_loss": 0.7925, + "train_positive_log_prob": -84.9493, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.5502, + "epoch": 0.9503386004514672, + "grad_norm": 17.646320343017578, + "learning_rate": 9.292775489309409e-06, + "lm_loss": 5.88, + "loss": 1.5161, + "step": 421, + "text_contrastive_loss": 0.7558, + "train_positive_log_prob": -88.1982, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.4811, + "epoch": 0.9525959367945824, + "grad_norm": 15.949664115905762, + "learning_rate": 9.289050967615914e-06, + "lm_loss": 5.9256, + "loss": 1.493, + "step": 422, + "text_contrastive_loss": 0.8389, + "train_positive_log_prob": -89.4667, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.559, + "epoch": 0.9548532731376975, + "grad_norm": 17.920934677124023, + "learning_rate": 9.285317414730427e-06, + "lm_loss": 6.0225, + "loss": 1.6332, + "step": 423, + "text_contrastive_loss": 0.9439, + "train_positive_log_prob": -90.2229, + "train_positive_token_accuracy": 0.0728, + "train_positive_token_prob": 0.0264 + }, + { + "contrastive_loss": 0.5286, + "epoch": 0.9571106094808126, + "grad_norm": 16.39118003845215, + "learning_rate": 9.281574838514464e-06, + "lm_loss": 5.8816, + "loss": 1.5649, + "step": 424, + "text_contrastive_loss": 0.8964, + "train_positive_log_prob": -89.2078, + "train_positive_token_accuracy": 0.0876, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.5654, + "epoch": 0.9593679458239278, + "grad_norm": 15.611202239990234, + "learning_rate": 9.277823246848537e-06, + "lm_loss": 6.0108, + "loss": 1.6427, + "step": 425, + "text_contrastive_loss": 0.9524, + "train_positive_log_prob": -86.64, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.6069, + "epoch": 0.9616252821670429, + "grad_norm": 15.747174263000488, + "learning_rate": 9.274062647632144e-06, + "lm_loss": 5.8607, + "loss": 1.6155, + "step": 426, + "text_contrastive_loss": 0.845, + "train_positive_log_prob": -87.5638, + "train_positive_token_accuracy": 0.0753, + "train_positive_token_prob": 0.0274 + }, + { + "contrastive_loss": 0.6088, + "epoch": 0.963882618510158, + "grad_norm": 17.132394790649414, + "learning_rate": 9.270293048783747e-06, + "lm_loss": 5.877, + "loss": 1.6123, + "step": 427, + "text_contrastive_loss": 0.8315, + "train_positive_log_prob": -86.5295, + "train_positive_token_accuracy": 0.0706, + "train_positive_token_prob": 0.0268 + }, + { + "contrastive_loss": 0.5106, + "epoch": 0.9661399548532731, + "grad_norm": 16.926727294921875, + "learning_rate": 9.266514458240762e-06, + "lm_loss": 5.8414, + "loss": 1.4763, + "step": 428, + "text_contrastive_loss": 0.7632, + "train_positive_log_prob": -86.0964, + "train_positive_token_accuracy": 0.0694, + "train_positive_token_prob": 0.0275 + }, + { + "contrastive_loss": 0.5355, + "epoch": 0.9683972911963883, + "grad_norm": 16.437231063842773, + "learning_rate": 9.262726883959535e-06, + "lm_loss": 5.8534, + "loss": 1.4985, + "step": 429, + "text_contrastive_loss": 0.7552, + "train_positive_log_prob": -88.8963, + "train_positive_token_accuracy": 0.0838, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.5618, + "epoch": 0.9706546275395034, + "grad_norm": 16.431184768676758, + "learning_rate": 9.258930333915325e-06, + "lm_loss": 5.818, + "loss": 1.5769, + "step": 430, + "text_contrastive_loss": 0.8666, + "train_positive_log_prob": -86.364, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.4542, + "epoch": 0.9729119638826185, + "grad_norm": 15.110556602478027, + "learning_rate": 9.2551248161023e-06, + "lm_loss": 5.8151, + "loss": 1.4491, + "step": 431, + "text_contrastive_loss": 0.8268, + "train_positive_log_prob": -86.5963, + "train_positive_token_accuracy": 0.0758, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.5255, + "epoch": 0.9751693002257337, + "grad_norm": 15.16142749786377, + "learning_rate": 9.251310338533504e-06, + "lm_loss": 5.8124, + "loss": 1.576, + "step": 432, + "text_contrastive_loss": 0.9384, + "train_positive_log_prob": -85.0496, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.5851, + "epoch": 0.9774266365688488, + "grad_norm": 17.669143676757812, + "learning_rate": 9.247486909240849e-06, + "lm_loss": 6.052, + "loss": 1.6605, + "step": 433, + "text_contrastive_loss": 0.9404, + "train_positive_log_prob": -87.401, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.6662, + "epoch": 0.9796839729119639, + "grad_norm": 19.100378036499023, + "learning_rate": 9.243654536275095e-06, + "lm_loss": 5.8619, + "loss": 1.661, + "step": 434, + "text_contrastive_loss": 0.8173, + "train_positive_log_prob": -87.9776, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.6445, + "epoch": 0.981941309255079, + "grad_norm": 16.625484466552734, + "learning_rate": 9.23981322770584e-06, + "lm_loss": 5.7741, + "loss": 1.6627, + "step": 435, + "text_contrastive_loss": 0.8815, + "train_positive_log_prob": -85.8903, + "train_positive_token_accuracy": 0.0827, + "train_positive_token_prob": 0.0282 + }, + { + "contrastive_loss": 0.6179, + "epoch": 0.9841986455981941, + "grad_norm": 16.835329055786133, + "learning_rate": 9.235962991621484e-06, + "lm_loss": 5.7838, + "loss": 1.7484, + "step": 436, + "text_contrastive_loss": 1.1043, + "train_positive_log_prob": -86.3556, + "train_positive_token_accuracy": 0.0866, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.6033, + "epoch": 0.9864559819413092, + "grad_norm": 15.998946189880371, + "learning_rate": 9.232103836129239e-06, + "lm_loss": 5.8915, + "loss": 1.643, + "step": 437, + "text_contrastive_loss": 0.9012, + "train_positive_log_prob": -88.9053, + "train_positive_token_accuracy": 0.0748, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.5116, + "epoch": 0.9887133182844243, + "grad_norm": 14.224339485168457, + "learning_rate": 9.22823576935509e-06, + "lm_loss": 5.8724, + "loss": 1.5582, + "step": 438, + "text_contrastive_loss": 0.9187, + "train_positive_log_prob": -87.8349, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.6276, + "epoch": 0.9909706546275395, + "grad_norm": 16.852880477905273, + "learning_rate": 9.224358799443791e-06, + "lm_loss": 5.8422, + "loss": 1.6438, + "step": 439, + "text_contrastive_loss": 0.8639, + "train_positive_log_prob": -85.4203, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0271 + }, + { + "contrastive_loss": 0.5279, + "epoch": 0.9932279909706546, + "grad_norm": 14.803088188171387, + "learning_rate": 9.220472934558838e-06, + "lm_loss": 5.9497, + "loss": 1.5146, + "step": 440, + "text_contrastive_loss": 0.7835, + "train_positive_log_prob": -88.8007, + "train_positive_token_accuracy": 0.0714, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.5428, + "epoch": 0.9954853273137697, + "grad_norm": 16.091028213500977, + "learning_rate": 9.216578182882459e-06, + "lm_loss": 5.7901, + "loss": 1.5402, + "step": 441, + "text_contrastive_loss": 0.8367, + "train_positive_log_prob": -86.4666, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0282 + }, + { + "contrastive_loss": 0.6105, + "epoch": 0.9977426636568849, + "grad_norm": 16.647401809692383, + "learning_rate": 9.212674552615594e-06, + "lm_loss": 5.9314, + "loss": 1.6395, + "step": 442, + "text_contrastive_loss": 0.8717, + "train_positive_log_prob": -86.4921, + "train_positive_token_accuracy": 0.0715, + "train_positive_token_prob": 0.0282 + }, + { + "contrastive_loss": 0.4074, + "epoch": 1.0, + "grad_norm": 21.615554809570312, + "learning_rate": 9.208762051977879e-06, + "lm_loss": 5.8848, + "loss": 1.3437, + "step": 443, + "text_contrastive_loss": 0.6957, + "train_positive_log_prob": -89.149, + "train_positive_token_accuracy": 0.0714, + "train_positive_token_prob": 0.0268 + }, + { + "contrastive_loss": 0.4997, + "epoch": 1.002257336343115, + "grad_norm": 12.534616470336914, + "learning_rate": 9.204840689207626e-06, + "lm_loss": 5.7986, + "loss": 1.5238, + "step": 444, + "text_contrastive_loss": 0.8885, + "train_positive_log_prob": -84.1489, + "train_positive_token_accuracy": 0.0756, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.6509, + "epoch": 1.0045146726862302, + "grad_norm": 15.559388160705566, + "learning_rate": 9.20091047256181e-06, + "lm_loss": 5.7731, + "loss": 1.746, + "step": 445, + "text_contrastive_loss": 1.0355, + "train_positive_log_prob": -86.3517, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.521, + "epoch": 1.0067720090293453, + "grad_norm": 14.204784393310547, + "learning_rate": 9.196971410316047e-06, + "lm_loss": 5.8651, + "loss": 1.5433, + "step": 446, + "text_contrastive_loss": 0.8717, + "train_positive_log_prob": -87.9094, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.44, + "epoch": 1.0090293453724606, + "grad_norm": 12.919805526733398, + "learning_rate": 9.193023510764578e-06, + "lm_loss": 5.8029, + "loss": 1.3964, + "step": 447, + "text_contrastive_loss": 0.7523, + "train_positive_log_prob": -85.9992, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.5281, + "epoch": 1.0112866817155757, + "grad_norm": 14.745123863220215, + "learning_rate": 9.189066782220253e-06, + "lm_loss": 5.8893, + "loss": 1.5481, + "step": 448, + "text_contrastive_loss": 0.8621, + "train_positive_log_prob": -86.1519, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0269 + }, + { + "contrastive_loss": 0.5417, + "epoch": 1.0135440180586908, + "grad_norm": 14.97851276397705, + "learning_rate": 9.185101233014516e-06, + "lm_loss": 5.892, + "loss": 1.6188, + "step": 449, + "text_contrastive_loss": 0.9759, + "train_positive_log_prob": -88.6506, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.5095, + "epoch": 1.0158013544018059, + "grad_norm": 12.922365188598633, + "learning_rate": 9.181126871497378e-06, + "lm_loss": 5.8521, + "loss": 1.5345, + "step": 450, + "text_contrastive_loss": 0.8796, + "train_positive_log_prob": -88.3738, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0275 + }, + { + "contrastive_loss": 0.4321, + "epoch": 1.018058690744921, + "grad_norm": 12.805669784545898, + "learning_rate": 9.177143706037411e-06, + "lm_loss": 5.8234, + "loss": 1.3938, + "step": 451, + "text_contrastive_loss": 0.7586, + "train_positive_log_prob": -86.8497, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0282 + }, + { + "contrastive_loss": 0.4301, + "epoch": 1.020316027088036, + "grad_norm": 13.98415470123291, + "learning_rate": 9.173151745021722e-06, + "lm_loss": 5.7765, + "loss": 1.4034, + "step": 452, + "text_contrastive_loss": 0.7914, + "train_positive_log_prob": -84.8423, + "train_positive_token_accuracy": 0.0742, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.3565, + "epoch": 1.0225733634311513, + "grad_norm": 12.754312515258789, + "learning_rate": 9.169150996855939e-06, + "lm_loss": 5.8434, + "loss": 1.3144, + "step": 453, + "text_contrastive_loss": 0.7471, + "train_positive_log_prob": -87.193, + "train_positive_token_accuracy": 0.0764, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.5363, + "epoch": 1.0248306997742664, + "grad_norm": 14.950727462768555, + "learning_rate": 9.16514146996419e-06, + "lm_loss": 5.8094, + "loss": 1.5116, + "step": 454, + "text_contrastive_loss": 0.7889, + "train_positive_log_prob": -87.9752, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.4892, + "epoch": 1.0270880361173815, + "grad_norm": 15.539891242980957, + "learning_rate": 9.161123172789091e-06, + "lm_loss": 5.7628, + "loss": 1.4316, + "step": 455, + "text_contrastive_loss": 0.7323, + "train_positive_log_prob": -85.8789, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.5357, + "epoch": 1.0293453724604966, + "grad_norm": 14.641853332519531, + "learning_rate": 9.157096113791727e-06, + "lm_loss": 5.8329, + "loss": 1.5993, + "step": 456, + "text_contrastive_loss": 0.9605, + "train_positive_log_prob": -85.2536, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.4901, + "epoch": 1.0316027088036117, + "grad_norm": 13.549795150756836, + "learning_rate": 9.153060301451629e-06, + "lm_loss": 5.9588, + "loss": 1.5299, + "step": 457, + "text_contrastive_loss": 0.8878, + "train_positive_log_prob": -86.7073, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.517, + "epoch": 1.0338600451467268, + "grad_norm": 16.06815528869629, + "learning_rate": 9.149015744266759e-06, + "lm_loss": 5.7347, + "loss": 1.6093, + "step": 458, + "text_contrastive_loss": 1.0377, + "train_positive_log_prob": -86.0017, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.4481, + "epoch": 1.036117381489842, + "grad_norm": 16.42928695678711, + "learning_rate": 9.144962450753491e-06, + "lm_loss": 5.9885, + "loss": 1.4851, + "step": 459, + "text_contrastive_loss": 0.8763, + "train_positive_log_prob": -88.0703, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.5313, + "epoch": 1.0383747178329572, + "grad_norm": 16.827699661254883, + "learning_rate": 9.140900429446601e-06, + "lm_loss": 5.8495, + "loss": 1.5206, + "step": 460, + "text_contrastive_loss": 0.8086, + "train_positive_log_prob": -84.1785, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.5778, + "epoch": 1.0406320541760723, + "grad_norm": 14.369438171386719, + "learning_rate": 9.136829688899236e-06, + "lm_loss": 5.8839, + "loss": 1.641, + "step": 461, + "text_contrastive_loss": 0.9495, + "train_positive_log_prob": -86.6483, + "train_positive_token_accuracy": 0.0684, + "train_positive_token_prob": 0.0258 + }, + { + "contrastive_loss": 0.4081, + "epoch": 1.0428893905191874, + "grad_norm": 13.87952709197998, + "learning_rate": 9.132750237682907e-06, + "lm_loss": 5.8483, + "loss": 1.3558, + "step": 462, + "text_contrastive_loss": 0.7257, + "train_positive_log_prob": -87.6557, + "train_positive_token_accuracy": 0.08, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.5475, + "epoch": 1.0451467268623025, + "grad_norm": 16.40715217590332, + "learning_rate": 9.128662084387462e-06, + "lm_loss": 5.8787, + "loss": 1.5305, + "step": 463, + "text_contrastive_loss": 0.7903, + "train_positive_log_prob": -85.792, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.6128, + "epoch": 1.0474040632054176, + "grad_norm": 22.7849063873291, + "learning_rate": 9.12456523762108e-06, + "lm_loss": 5.7487, + "loss": 1.6906, + "step": 464, + "text_contrastive_loss": 1.0059, + "train_positive_log_prob": -82.4837, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.4092, + "epoch": 1.0496613995485327, + "grad_norm": 15.759406089782715, + "learning_rate": 9.120459706010233e-06, + "lm_loss": 5.8403, + "loss": 1.3888, + "step": 465, + "text_contrastive_loss": 0.7912, + "train_positive_log_prob": -86.6266, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.3896, + "epoch": 1.0519187358916477, + "grad_norm": 14.026328086853027, + "learning_rate": 9.116345498199693e-06, + "lm_loss": 5.7976, + "loss": 1.3452, + "step": 466, + "text_contrastive_loss": 0.7516, + "train_positive_log_prob": -85.0623, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.4572, + "epoch": 1.054176072234763, + "grad_norm": 14.722611427307129, + "learning_rate": 9.112222622852494e-06, + "lm_loss": 5.7998, + "loss": 1.4623, + "step": 467, + "text_contrastive_loss": 0.8501, + "train_positive_log_prob": -85.3879, + "train_positive_token_accuracy": 0.0748, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.4256, + "epoch": 1.0564334085778782, + "grad_norm": 13.734708786010742, + "learning_rate": 9.108091088649922e-06, + "lm_loss": 5.7911, + "loss": 1.4313, + "step": 468, + "text_contrastive_loss": 0.8532, + "train_positive_log_prob": -86.4608, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.5696, + "epoch": 1.0586907449209932, + "grad_norm": 15.819417953491211, + "learning_rate": 9.103950904291496e-06, + "lm_loss": 5.717, + "loss": 1.5987, + "step": 469, + "text_contrastive_loss": 0.9148, + "train_positive_log_prob": -85.0369, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.4718, + "epoch": 1.0609480812641083, + "grad_norm": 14.002315521240234, + "learning_rate": 9.099802078494947e-06, + "lm_loss": 5.6815, + "loss": 1.4892, + "step": 470, + "text_contrastive_loss": 0.8986, + "train_positive_log_prob": -82.9807, + "train_positive_token_accuracy": 0.0856, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.515, + "epoch": 1.0632054176072234, + "grad_norm": 14.49440860748291, + "learning_rate": 9.095644619996206e-06, + "lm_loss": 5.9142, + "loss": 1.6303, + "step": 471, + "text_contrastive_loss": 1.0477, + "train_positive_log_prob": -87.7014, + "train_positive_token_accuracy": 0.0753, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.4708, + "epoch": 1.0654627539503385, + "grad_norm": 14.554546356201172, + "learning_rate": 9.09147853754938e-06, + "lm_loss": 5.8102, + "loss": 1.4567, + "step": 472, + "text_contrastive_loss": 0.8098, + "train_positive_log_prob": -85.3781, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.5137, + "epoch": 1.0677200902934538, + "grad_norm": 15.420254707336426, + "learning_rate": 9.087303839926727e-06, + "lm_loss": 5.8407, + "loss": 1.5252, + "step": 473, + "text_contrastive_loss": 0.855, + "train_positive_log_prob": -86.5128, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0277 + }, + { + "contrastive_loss": 0.5185, + "epoch": 1.069977426636569, + "grad_norm": 14.635124206542969, + "learning_rate": 9.08312053591866e-06, + "lm_loss": 5.8402, + "loss": 1.5589, + "step": 474, + "text_contrastive_loss": 0.9127, + "train_positive_log_prob": -85.3483, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0277 + }, + { + "contrastive_loss": 0.5677, + "epoch": 1.072234762979684, + "grad_norm": 15.2582368850708, + "learning_rate": 9.0789286343337e-06, + "lm_loss": 5.8084, + "loss": 1.563, + "step": 475, + "text_contrastive_loss": 0.8288, + "train_positive_log_prob": -85.7859, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.4783, + "epoch": 1.074492099322799, + "grad_norm": 15.656068801879883, + "learning_rate": 9.07472814399848e-06, + "lm_loss": 5.7801, + "loss": 1.5328, + "step": 476, + "text_contrastive_loss": 0.953, + "train_positive_log_prob": -86.9935, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.5867, + "epoch": 1.0767494356659142, + "grad_norm": 15.501360893249512, + "learning_rate": 9.070519073757717e-06, + "lm_loss": 5.747, + "loss": 1.5746, + "step": 477, + "text_contrastive_loss": 0.8264, + "train_positive_log_prob": -85.5354, + "train_positive_token_accuracy": 0.0738, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.5735, + "epoch": 1.0790067720090293, + "grad_norm": 14.747407913208008, + "learning_rate": 9.06630143247419e-06, + "lm_loss": 5.8876, + "loss": 1.649, + "step": 478, + "text_contrastive_loss": 0.9735, + "train_positive_log_prob": -83.9897, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0267 + }, + { + "contrastive_loss": 0.455, + "epoch": 1.0812641083521444, + "grad_norm": 14.040230751037598, + "learning_rate": 9.062075229028728e-06, + "lm_loss": 5.825, + "loss": 1.4871, + "step": 479, + "text_contrastive_loss": 0.8991, + "train_positive_log_prob": -86.5831, + "train_positive_token_accuracy": 0.0691, + "train_positive_token_prob": 0.0268 + }, + { + "contrastive_loss": 0.5394, + "epoch": 1.0835214446952597, + "grad_norm": 15.170976638793945, + "learning_rate": 9.057840472320192e-06, + "lm_loss": 5.758, + "loss": 1.4797, + "step": 480, + "text_contrastive_loss": 0.729, + "train_positive_log_prob": -85.067, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0271 + }, + { + "contrastive_loss": 0.4516, + "epoch": 1.0857787810383748, + "grad_norm": 12.979825019836426, + "learning_rate": 9.053597171265447e-06, + "lm_loss": 5.8438, + "loss": 1.4699, + "step": 481, + "text_contrastive_loss": 0.8678, + "train_positive_log_prob": -88.4812, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.4443, + "epoch": 1.0880361173814899, + "grad_norm": 15.909980773925781, + "learning_rate": 9.04934533479935e-06, + "lm_loss": 5.8316, + "loss": 1.4434, + "step": 482, + "text_contrastive_loss": 0.8318, + "train_positive_log_prob": -86.4701, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.492, + "epoch": 1.090293453724605, + "grad_norm": 17.079206466674805, + "learning_rate": 9.045084971874738e-06, + "lm_loss": 5.8496, + "loss": 1.5202, + "step": 483, + "text_contrastive_loss": 0.8865, + "train_positive_log_prob": -88.863, + "train_positive_token_accuracy": 0.0748, + "train_positive_token_prob": 0.0274 + }, + { + "contrastive_loss": 0.4064, + "epoch": 1.09255079006772, + "grad_norm": 16.53702735900879, + "learning_rate": 9.040816091462393e-06, + "lm_loss": 5.9194, + "loss": 1.4017, + "step": 484, + "text_contrastive_loss": 0.8067, + "train_positive_log_prob": -88.1422, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.5243, + "epoch": 1.0948081264108351, + "grad_norm": 15.225687026977539, + "learning_rate": 9.036538702551037e-06, + "lm_loss": 5.7128, + "loss": 1.5493, + "step": 485, + "text_contrastive_loss": 0.9074, + "train_positive_log_prob": -82.5884, + "train_positive_token_accuracy": 0.0724, + "train_positive_token_prob": 0.0282 + }, + { + "contrastive_loss": 0.4534, + "epoch": 1.0970654627539504, + "grad_norm": 13.470952987670898, + "learning_rate": 9.032252814147302e-06, + "lm_loss": 5.7989, + "loss": 1.4695, + "step": 486, + "text_contrastive_loss": 0.8724, + "train_positive_log_prob": -85.6214, + "train_positive_token_accuracy": 0.0738, + "train_positive_token_prob": 0.0275 + }, + { + "contrastive_loss": 0.4768, + "epoch": 1.0993227990970655, + "grad_norm": 14.641610145568848, + "learning_rate": 9.027958435275726e-06, + "lm_loss": 5.8596, + "loss": 1.4641, + "step": 487, + "text_contrastive_loss": 0.8026, + "train_positive_log_prob": -87.3528, + "train_positive_token_accuracy": 0.0742, + "train_positive_token_prob": 0.0266 + }, + { + "contrastive_loss": 0.4762, + "epoch": 1.1015801354401806, + "grad_norm": 15.02444076538086, + "learning_rate": 9.023655574978716e-06, + "lm_loss": 5.7585, + "loss": 1.5156, + "step": 488, + "text_contrastive_loss": 0.9271, + "train_positive_log_prob": -88.4278, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.3907, + "epoch": 1.1038374717832957, + "grad_norm": 14.093965530395508, + "learning_rate": 9.019344242316542e-06, + "lm_loss": 5.8088, + "loss": 1.4166, + "step": 489, + "text_contrastive_loss": 0.8902, + "train_positive_log_prob": -86.7681, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.5091, + "epoch": 1.1060948081264108, + "grad_norm": 20.027910232543945, + "learning_rate": 9.015024446367315e-06, + "lm_loss": 5.7096, + "loss": 1.5067, + "step": 490, + "text_contrastive_loss": 0.8532, + "train_positive_log_prob": -83.4806, + "train_positive_token_accuracy": 0.086, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4379, + "epoch": 1.108352144469526, + "grad_norm": 14.882514953613281, + "learning_rate": 9.010696196226963e-06, + "lm_loss": 5.7856, + "loss": 1.5241, + "step": 491, + "text_contrastive_loss": 1.0153, + "train_positive_log_prob": -82.536, + "train_positive_token_accuracy": 0.0728, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.4813, + "epoch": 1.110609480812641, + "grad_norm": 14.565542221069336, + "learning_rate": 9.00635950100922e-06, + "lm_loss": 5.752, + "loss": 1.5072, + "step": 492, + "text_contrastive_loss": 0.9013, + "train_positive_log_prob": -88.9991, + "train_positive_token_accuracy": 0.0722, + "train_positive_token_prob": 0.0282 + }, + { + "contrastive_loss": 0.5157, + "epoch": 1.1128668171557563, + "grad_norm": 15.493803024291992, + "learning_rate": 9.002014369845592e-06, + "lm_loss": 5.8447, + "loss": 1.5698, + "step": 493, + "text_contrastive_loss": 0.9394, + "train_positive_log_prob": -86.2793, + "train_positive_token_accuracy": 0.0826, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.5211, + "epoch": 1.1151241534988714, + "grad_norm": 16.29731559753418, + "learning_rate": 8.997660811885367e-06, + "lm_loss": 5.8996, + "loss": 1.5655, + "step": 494, + "text_contrastive_loss": 0.9089, + "train_positive_log_prob": -84.4844, + "train_positive_token_accuracy": 0.0753, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.5258, + "epoch": 1.1173814898419865, + "grad_norm": 14.3463134765625, + "learning_rate": 8.993298836295556e-06, + "lm_loss": 5.7836, + "loss": 1.5113, + "step": 495, + "text_contrastive_loss": 0.8144, + "train_positive_log_prob": -83.9034, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.4295, + "epoch": 1.1196388261851016, + "grad_norm": 13.856711387634277, + "learning_rate": 8.988928452260909e-06, + "lm_loss": 5.8693, + "loss": 1.3667, + "step": 496, + "text_contrastive_loss": 0.7005, + "train_positive_log_prob": -88.31, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.4648, + "epoch": 1.1218961625282167, + "grad_norm": 15.214653968811035, + "learning_rate": 8.984549668983875e-06, + "lm_loss": 5.8318, + "loss": 1.4636, + "step": 497, + "text_contrastive_loss": 0.8313, + "train_positive_log_prob": -86.9489, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.4833, + "epoch": 1.1241534988713318, + "grad_norm": 15.539227485656738, + "learning_rate": 8.980162495684587e-06, + "lm_loss": 5.6598, + "loss": 1.4292, + "step": 498, + "text_contrastive_loss": 0.7598, + "train_positive_log_prob": -83.8779, + "train_positive_token_accuracy": 0.0764, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4891, + "epoch": 1.1264108352144468, + "grad_norm": 14.838481903076172, + "learning_rate": 8.975766941600852e-06, + "lm_loss": 5.7019, + "loss": 1.4699, + "step": 499, + "text_contrastive_loss": 0.8214, + "train_positive_log_prob": -81.542, + "train_positive_token_accuracy": 0.0748, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.4984, + "epoch": 1.1286681715575622, + "grad_norm": 14.815726280212402, + "learning_rate": 8.971363015988115e-06, + "lm_loss": 5.7222, + "loss": 1.4426, + "step": 500, + "text_contrastive_loss": 0.744, + "train_positive_log_prob": -86.3017, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.4378, + "epoch": 1.1309255079006773, + "grad_norm": 14.244773864746094, + "learning_rate": 8.966950728119453e-06, + "lm_loss": 5.7203, + "loss": 1.4365, + "step": 501, + "text_contrastive_loss": 0.8533, + "train_positive_log_prob": -86.3226, + "train_positive_token_accuracy": 0.0826, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.5596, + "epoch": 1.1331828442437923, + "grad_norm": 14.889687538146973, + "learning_rate": 8.962530087285552e-06, + "lm_loss": 5.7719, + "loss": 1.537, + "step": 502, + "text_contrastive_loss": 0.8005, + "train_positive_log_prob": -84.6413, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.3819, + "epoch": 1.1354401805869074, + "grad_norm": 12.292415618896484, + "learning_rate": 8.958101102794686e-06, + "lm_loss": 5.7606, + "loss": 1.3988, + "step": 503, + "text_contrastive_loss": 0.8817, + "train_positive_log_prob": -86.0021, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.525, + "epoch": 1.1376975169300225, + "grad_norm": 14.934059143066406, + "learning_rate": 8.953663783972692e-06, + "lm_loss": 5.7507, + "loss": 1.5203, + "step": 504, + "text_contrastive_loss": 0.8404, + "train_positive_log_prob": -85.9879, + "train_positive_token_accuracy": 0.0731, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.385, + "epoch": 1.1399548532731376, + "grad_norm": 14.680123329162598, + "learning_rate": 8.949218140162965e-06, + "lm_loss": 5.7674, + "loss": 1.4086, + "step": 505, + "text_contrastive_loss": 0.8937, + "train_positive_log_prob": -84.6673, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0275 + }, + { + "contrastive_loss": 0.4543, + "epoch": 1.1422121896162527, + "grad_norm": 15.095152854919434, + "learning_rate": 8.944764180726423e-06, + "lm_loss": 5.7739, + "loss": 1.5434, + "step": 506, + "text_contrastive_loss": 1.0236, + "train_positive_log_prob": -84.7846, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.6487, + "epoch": 1.144469525959368, + "grad_norm": 18.035188674926758, + "learning_rate": 8.940301915041496e-06, + "lm_loss": 5.7316, + "loss": 1.6844, + "step": 507, + "text_contrastive_loss": 0.925, + "train_positive_log_prob": -84.2501, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.5164, + "epoch": 1.146726862302483, + "grad_norm": 15.376690864562988, + "learning_rate": 8.935831352504103e-06, + "lm_loss": 5.7625, + "loss": 1.4631, + "step": 508, + "text_contrastive_loss": 0.741, + "train_positive_log_prob": -84.071, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.5285, + "epoch": 1.1489841986455982, + "grad_norm": 17.431364059448242, + "learning_rate": 8.931352502527633e-06, + "lm_loss": 5.7171, + "loss": 1.5349, + "step": 509, + "text_contrastive_loss": 0.8694, + "train_positive_log_prob": -82.9397, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.5439, + "epoch": 1.1512415349887133, + "grad_norm": 16.915279388427734, + "learning_rate": 8.926865374542928e-06, + "lm_loss": 5.6972, + "loss": 1.6579, + "step": 510, + "text_contrastive_loss": 1.0885, + "train_positive_log_prob": -82.3006, + "train_positive_token_accuracy": 0.08, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.5082, + "epoch": 1.1534988713318284, + "grad_norm": 16.22882843017578, + "learning_rate": 8.922369977998257e-06, + "lm_loss": 5.8366, + "loss": 1.6019, + "step": 511, + "text_contrastive_loss": 1.0202, + "train_positive_log_prob": -88.008, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.4701, + "epoch": 1.1557562076749435, + "grad_norm": 15.099251747131348, + "learning_rate": 8.917866322359303e-06, + "lm_loss": 5.823, + "loss": 1.4856, + "step": 512, + "text_contrastive_loss": 0.8665, + "train_positive_log_prob": -87.0958, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0274 + }, + { + "contrastive_loss": 0.4683, + "epoch": 1.1580135440180588, + "grad_norm": 14.21667766571045, + "learning_rate": 8.913354417109136e-06, + "lm_loss": 5.7688, + "loss": 1.41, + "step": 513, + "text_contrastive_loss": 0.7296, + "train_positive_log_prob": -84.2089, + "train_positive_token_accuracy": 0.0728, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.4401, + "epoch": 1.1602708803611739, + "grad_norm": 14.73274040222168, + "learning_rate": 8.908834271748202e-06, + "lm_loss": 5.6971, + "loss": 1.3945, + "step": 514, + "text_contrastive_loss": 0.7693, + "train_positive_log_prob": -85.4097, + "train_positive_token_accuracy": 0.0711, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.4365, + "epoch": 1.162528216704289, + "grad_norm": 14.375649452209473, + "learning_rate": 8.904305895794292e-06, + "lm_loss": 5.781, + "loss": 1.4547, + "step": 515, + "text_contrastive_loss": 0.8803, + "train_positive_log_prob": -86.5361, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.608, + "epoch": 1.164785553047404, + "grad_norm": 15.808117866516113, + "learning_rate": 8.899769298782528e-06, + "lm_loss": 5.8422, + "loss": 1.6683, + "step": 516, + "text_contrastive_loss": 0.9521, + "train_positive_log_prob": -87.3811, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.545, + "epoch": 1.1670428893905191, + "grad_norm": 17.832517623901367, + "learning_rate": 8.895224490265346e-06, + "lm_loss": 5.6745, + "loss": 1.5618, + "step": 517, + "text_contrastive_loss": 0.8988, + "train_positive_log_prob": -82.1474, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.453, + "epoch": 1.1693002257336342, + "grad_norm": 14.00742244720459, + "learning_rate": 8.890671479812472e-06, + "lm_loss": 5.8005, + "loss": 1.3826, + "step": 518, + "text_contrastive_loss": 0.6992, + "train_positive_log_prob": -84.5601, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.532, + "epoch": 1.1715575620767495, + "grad_norm": 14.758885383605957, + "learning_rate": 8.886110277010902e-06, + "lm_loss": 5.7368, + "loss": 1.5059, + "step": 519, + "text_contrastive_loss": 0.8005, + "train_positive_log_prob": -86.0945, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.5874, + "epoch": 1.1738148984198646, + "grad_norm": 16.27832794189453, + "learning_rate": 8.88154089146488e-06, + "lm_loss": 5.6483, + "loss": 1.6038, + "step": 520, + "text_contrastive_loss": 0.9031, + "train_positive_log_prob": -80.9988, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.4337, + "epoch": 1.1760722347629797, + "grad_norm": 13.473103523254395, + "learning_rate": 8.876963332795881e-06, + "lm_loss": 5.7246, + "loss": 1.4329, + "step": 521, + "text_contrastive_loss": 0.8535, + "train_positive_log_prob": -85.4215, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.5869, + "epoch": 1.1783295711060948, + "grad_norm": 14.969170570373535, + "learning_rate": 8.87237761064259e-06, + "lm_loss": 5.7173, + "loss": 1.5843, + "step": 522, + "text_contrastive_loss": 0.8513, + "train_positive_log_prob": -83.929, + "train_positive_token_accuracy": 0.0857, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4317, + "epoch": 1.18058690744921, + "grad_norm": 14.787362098693848, + "learning_rate": 8.867783734660883e-06, + "lm_loss": 5.7335, + "loss": 1.4473, + "step": 523, + "text_contrastive_loss": 0.8844, + "train_positive_log_prob": -82.3088, + "train_positive_token_accuracy": 0.0844, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4609, + "epoch": 1.182844243792325, + "grad_norm": 14.429769515991211, + "learning_rate": 8.8631817145238e-06, + "lm_loss": 5.8172, + "loss": 1.5025, + "step": 524, + "text_contrastive_loss": 0.9197, + "train_positive_log_prob": -86.6676, + "train_positive_token_accuracy": 0.0829, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.47, + "epoch": 1.18510158013544, + "grad_norm": 14.706186294555664, + "learning_rate": 8.858571559921539e-06, + "lm_loss": 5.6513, + "loss": 1.465, + "step": 525, + "text_contrastive_loss": 0.8598, + "train_positive_log_prob": -83.7151, + "train_positive_token_accuracy": 0.0744, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.5212, + "epoch": 1.1873589164785554, + "grad_norm": 14.896646499633789, + "learning_rate": 8.853953280561412e-06, + "lm_loss": 5.6576, + "loss": 1.5066, + "step": 526, + "text_contrastive_loss": 0.8394, + "train_positive_log_prob": -81.4381, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.3822, + "epoch": 1.1896162528216705, + "grad_norm": 12.50956916809082, + "learning_rate": 8.849326886167854e-06, + "lm_loss": 5.781, + "loss": 1.3273, + "step": 527, + "text_contrastive_loss": 0.734, + "train_positive_log_prob": -85.8618, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.4379, + "epoch": 1.1918735891647856, + "grad_norm": 14.234749794006348, + "learning_rate": 8.844692386482379e-06, + "lm_loss": 5.6949, + "loss": 1.3826, + "step": 528, + "text_contrastive_loss": 0.7505, + "train_positive_log_prob": -84.6665, + "train_positive_token_accuracy": 0.0846, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4017, + "epoch": 1.1941309255079007, + "grad_norm": 13.032413482666016, + "learning_rate": 8.840049791263567e-06, + "lm_loss": 5.7709, + "loss": 1.3419, + "step": 529, + "text_contrastive_loss": 0.7263, + "train_positive_log_prob": -84.9181, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4921, + "epoch": 1.1963882618510158, + "grad_norm": 15.530366897583008, + "learning_rate": 8.835399110287046e-06, + "lm_loss": 5.7254, + "loss": 1.559, + "step": 530, + "text_contrastive_loss": 0.9888, + "train_positive_log_prob": -84.117, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.53, + "epoch": 1.1986455981941309, + "grad_norm": 14.476029396057129, + "learning_rate": 8.830740353345475e-06, + "lm_loss": 5.6928, + "loss": 1.5779, + "step": 531, + "text_contrastive_loss": 0.9572, + "train_positive_log_prob": -83.7219, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.4554, + "epoch": 1.200902934537246, + "grad_norm": 14.346333503723145, + "learning_rate": 8.826073530248508e-06, + "lm_loss": 5.659, + "loss": 1.3771, + "step": 532, + "text_contrastive_loss": 0.7116, + "train_positive_log_prob": -82.5678, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.3841, + "epoch": 1.2031602708803613, + "grad_norm": 12.81987190246582, + "learning_rate": 8.82139865082279e-06, + "lm_loss": 5.7435, + "loss": 1.3075, + "step": 533, + "text_contrastive_loss": 0.6981, + "train_positive_log_prob": -85.3931, + "train_positive_token_accuracy": 0.0723, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.5547, + "epoch": 1.2054176072234764, + "grad_norm": 14.714592933654785, + "learning_rate": 8.81671572491193e-06, + "lm_loss": 5.8595, + "loss": 1.5969, + "step": 534, + "text_contrastive_loss": 0.9126, + "train_positive_log_prob": -86.0575, + "train_positive_token_accuracy": 0.0713, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.5548, + "epoch": 1.2076749435665914, + "grad_norm": 15.354697227478027, + "learning_rate": 8.812024762376477e-06, + "lm_loss": 5.7331, + "loss": 1.5698, + "step": 535, + "text_contrastive_loss": 0.8833, + "train_positive_log_prob": -82.7032, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.4599, + "epoch": 1.2099322799097065, + "grad_norm": 16.497697830200195, + "learning_rate": 8.807325773093904e-06, + "lm_loss": 5.7642, + "loss": 1.5156, + "step": 536, + "text_contrastive_loss": 0.9587, + "train_positive_log_prob": -84.4365, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0275 + }, + { + "contrastive_loss": 0.5371, + "epoch": 1.2121896162528216, + "grad_norm": 15.794730186462402, + "learning_rate": 8.802618766958586e-06, + "lm_loss": 5.748, + "loss": 1.5209, + "step": 537, + "text_contrastive_loss": 0.8179, + "train_positive_log_prob": -86.759, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.53, + "epoch": 1.2144469525959367, + "grad_norm": 14.895218849182129, + "learning_rate": 8.797903753881775e-06, + "lm_loss": 5.6988, + "loss": 1.5466, + "step": 538, + "text_contrastive_loss": 0.8936, + "train_positive_log_prob": -84.2733, + "train_positive_token_accuracy": 0.0724, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.4879, + "epoch": 1.2167042889390518, + "grad_norm": 15.272669792175293, + "learning_rate": 8.793180743791587e-06, + "lm_loss": 5.6559, + "loss": 1.4853, + "step": 539, + "text_contrastive_loss": 0.8636, + "train_positive_log_prob": -84.2196, + "train_positive_token_accuracy": 0.085, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4846, + "epoch": 1.2189616252821671, + "grad_norm": 15.89705753326416, + "learning_rate": 8.788449746632976e-06, + "lm_loss": 5.7469, + "loss": 1.5015, + "step": 540, + "text_contrastive_loss": 0.8843, + "train_positive_log_prob": -84.9917, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.454, + "epoch": 1.2212189616252822, + "grad_norm": 13.411090850830078, + "learning_rate": 8.78371077236771e-06, + "lm_loss": 5.7542, + "loss": 1.4535, + "step": 541, + "text_contrastive_loss": 0.8482, + "train_positive_log_prob": -83.0143, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.5016, + "epoch": 1.2234762979683973, + "grad_norm": 15.005658149719238, + "learning_rate": 8.778963830974362e-06, + "lm_loss": 5.8561, + "loss": 1.6098, + "step": 542, + "text_contrastive_loss": 1.045, + "train_positive_log_prob": -86.3997, + "train_positive_token_accuracy": 0.0696, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.3232, + "epoch": 1.2257336343115124, + "grad_norm": 11.32888126373291, + "learning_rate": 8.77420893244827e-06, + "lm_loss": 5.7101, + "loss": 1.2731, + "step": 543, + "text_contrastive_loss": 0.7577, + "train_positive_log_prob": -84.8507, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.4342, + "epoch": 1.2279909706546275, + "grad_norm": 15.624367713928223, + "learning_rate": 8.769446086801536e-06, + "lm_loss": 5.7126, + "loss": 1.4107, + "step": 544, + "text_contrastive_loss": 0.8105, + "train_positive_log_prob": -85.9818, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.4569, + "epoch": 1.2302483069977426, + "grad_norm": 15.966873168945312, + "learning_rate": 8.764675304062992e-06, + "lm_loss": 5.8162, + "loss": 1.493, + "step": 545, + "text_contrastive_loss": 0.909, + "train_positive_log_prob": -86.5342, + "train_positive_token_accuracy": 0.0683, + "train_positive_token_prob": 0.0271 + }, + { + "contrastive_loss": 0.5854, + "epoch": 1.2325056433408579, + "grad_norm": 16.919601440429688, + "learning_rate": 8.759896594278183e-06, + "lm_loss": 5.7145, + "loss": 1.6254, + "step": 546, + "text_contrastive_loss": 0.9372, + "train_positive_log_prob": -83.5294, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.4979, + "epoch": 1.234762979683973, + "grad_norm": 15.050749778747559, + "learning_rate": 8.755109967509345e-06, + "lm_loss": 5.668, + "loss": 1.4536, + "step": 547, + "text_contrastive_loss": 0.7776, + "train_positive_log_prob": -82.9564, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.4772, + "epoch": 1.237020316027088, + "grad_norm": 14.770718574523926, + "learning_rate": 8.750315433835387e-06, + "lm_loss": 5.6782, + "loss": 1.5186, + "step": 548, + "text_contrastive_loss": 0.9473, + "train_positive_log_prob": -83.6194, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.5056, + "epoch": 1.2392776523702032, + "grad_norm": 16.688947677612305, + "learning_rate": 8.745513003351862e-06, + "lm_loss": 5.7111, + "loss": 1.5077, + "step": 549, + "text_contrastive_loss": 0.8621, + "train_positive_log_prob": -86.1599, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.5524, + "epoch": 1.2415349887133182, + "grad_norm": 18.52263641357422, + "learning_rate": 8.740702686170955e-06, + "lm_loss": 5.8226, + "loss": 1.6603, + "step": 550, + "text_contrastive_loss": 1.0512, + "train_positive_log_prob": -86.1766, + "train_positive_token_accuracy": 0.0728, + "train_positive_token_prob": 0.0264 + }, + { + "contrastive_loss": 0.4038, + "epoch": 1.2437923250564333, + "grad_norm": 16.549697875976562, + "learning_rate": 8.735884492421457e-06, + "lm_loss": 5.6869, + "loss": 1.3436, + "step": 551, + "text_contrastive_loss": 0.7422, + "train_positive_log_prob": -83.1054, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.518, + "epoch": 1.2460496613995486, + "grad_norm": 18.150911331176758, + "learning_rate": 8.731058432248743e-06, + "lm_loss": 5.7851, + "loss": 1.6201, + "step": 552, + "text_contrastive_loss": 1.0474, + "train_positive_log_prob": -84.7862, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.481, + "epoch": 1.2483069977426637, + "grad_norm": 15.780801773071289, + "learning_rate": 8.726224515814752e-06, + "lm_loss": 5.748, + "loss": 1.5469, + "step": 553, + "text_contrastive_loss": 0.9822, + "train_positive_log_prob": -86.1575, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.587, + "epoch": 1.2505643340857788, + "grad_norm": 16.04880142211914, + "learning_rate": 8.721382753297967e-06, + "lm_loss": 5.7894, + "loss": 1.5874, + "step": 554, + "text_contrastive_loss": 0.8431, + "train_positive_log_prob": -84.9223, + "train_positive_token_accuracy": 0.0735, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.4011, + "epoch": 1.252821670428894, + "grad_norm": 15.039539337158203, + "learning_rate": 8.71653315489339e-06, + "lm_loss": 5.7547, + "loss": 1.408, + "step": 555, + "text_contrastive_loss": 0.8627, + "train_positive_log_prob": -86.407, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.3833, + "epoch": 1.255079006772009, + "grad_norm": 13.640923500061035, + "learning_rate": 8.711675730812522e-06, + "lm_loss": 5.7028, + "loss": 1.3673, + "step": 556, + "text_contrastive_loss": 0.8273, + "train_positive_log_prob": -83.7397, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.5099, + "epoch": 1.257336343115124, + "grad_norm": 15.794052124023438, + "learning_rate": 8.706810491283346e-06, + "lm_loss": 5.7546, + "loss": 1.4729, + "step": 557, + "text_contrastive_loss": 0.7752, + "train_positive_log_prob": -83.6014, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0264 + }, + { + "contrastive_loss": 0.5847, + "epoch": 1.2595936794582392, + "grad_norm": 17.90753173828125, + "learning_rate": 8.701937446550298e-06, + "lm_loss": 5.6646, + "loss": 1.6231, + "step": 558, + "text_contrastive_loss": 0.944, + "train_positive_log_prob": -84.5496, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.4804, + "epoch": 1.2618510158013545, + "grad_norm": 14.088818550109863, + "learning_rate": 8.69705660687425e-06, + "lm_loss": 5.7768, + "loss": 1.4574, + "step": 559, + "text_contrastive_loss": 0.7986, + "train_positive_log_prob": -86.2928, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.6334, + "epoch": 1.2641083521444696, + "grad_norm": 16.963218688964844, + "learning_rate": 8.692167982532487e-06, + "lm_loss": 5.7333, + "loss": 1.7436, + "step": 560, + "text_contrastive_loss": 1.0737, + "train_positive_log_prob": -84.0678, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.5061, + "epoch": 1.2663656884875847, + "grad_norm": 15.70823860168457, + "learning_rate": 8.687271583818687e-06, + "lm_loss": 5.6966, + "loss": 1.5194, + "step": 561, + "text_contrastive_loss": 0.8874, + "train_positive_log_prob": -84.5356, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4712, + "epoch": 1.2686230248306998, + "grad_norm": 14.498242378234863, + "learning_rate": 8.682367421042895e-06, + "lm_loss": 5.7649, + "loss": 1.4344, + "step": 562, + "text_contrastive_loss": 0.7734, + "train_positive_log_prob": -86.6987, + "train_positive_token_accuracy": 0.0842, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.5931, + "epoch": 1.2708803611738149, + "grad_norm": 15.68873119354248, + "learning_rate": 8.677455504531507e-06, + "lm_loss": 5.8184, + "loss": 1.6899, + "step": 563, + "text_contrastive_loss": 1.03, + "train_positive_log_prob": -87.5506, + "train_positive_token_accuracy": 0.0844, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4186, + "epoch": 1.27313769751693, + "grad_norm": 14.044842720031738, + "learning_rate": 8.672535844627243e-06, + "lm_loss": 5.8392, + "loss": 1.455, + "step": 564, + "text_contrastive_loss": 0.905, + "train_positive_log_prob": -89.1832, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.491, + "epoch": 1.275395033860045, + "grad_norm": 14.954693794250488, + "learning_rate": 8.667608451689135e-06, + "lm_loss": 5.7487, + "loss": 1.4478, + "step": 565, + "text_contrastive_loss": 0.7639, + "train_positive_log_prob": -85.3305, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.5236, + "epoch": 1.2776523702031604, + "grad_norm": 14.860040664672852, + "learning_rate": 8.662673336092487e-06, + "lm_loss": 5.802, + "loss": 1.5874, + "step": 566, + "text_contrastive_loss": 0.9673, + "train_positive_log_prob": -85.9879, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.4478, + "epoch": 1.2799097065462754, + "grad_norm": 13.928128242492676, + "learning_rate": 8.657730508228874e-06, + "lm_loss": 5.8576, + "loss": 1.4809, + "step": 567, + "text_contrastive_loss": 0.8947, + "train_positive_log_prob": -87.1361, + "train_positive_token_accuracy": 0.0727, + "train_positive_token_prob": 0.0277 + }, + { + "contrastive_loss": 0.4975, + "epoch": 1.2821670428893905, + "grad_norm": 15.510313034057617, + "learning_rate": 8.652779978506103e-06, + "lm_loss": 5.8604, + "loss": 1.5746, + "step": 568, + "text_contrastive_loss": 0.9822, + "train_positive_log_prob": -87.65, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.439, + "epoch": 1.2844243792325056, + "grad_norm": 14.851070404052734, + "learning_rate": 8.647821757348202e-06, + "lm_loss": 5.8687, + "loss": 1.3764, + "step": 569, + "text_contrastive_loss": 0.7012, + "train_positive_log_prob": -88.8985, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.5623, + "epoch": 1.2866817155756207, + "grad_norm": 14.462052345275879, + "learning_rate": 8.642855855195394e-06, + "lm_loss": 5.7478, + "loss": 1.5526, + "step": 570, + "text_contrastive_loss": 0.8311, + "train_positive_log_prob": -85.6769, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.4577, + "epoch": 1.2889390519187358, + "grad_norm": 13.05922794342041, + "learning_rate": 8.637882282504075e-06, + "lm_loss": 5.9133, + "loss": 1.4615, + "step": 571, + "text_contrastive_loss": 0.8249, + "train_positive_log_prob": -90.2507, + "train_positive_token_accuracy": 0.0735, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.5146, + "epoch": 1.291196388261851, + "grad_norm": 14.528520584106445, + "learning_rate": 8.632901049746793e-06, + "lm_loss": 5.7245, + "loss": 1.6191, + "step": 572, + "text_contrastive_loss": 1.0641, + "train_positive_log_prob": -84.5484, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.5348, + "epoch": 1.2934537246049662, + "grad_norm": 16.75908088684082, + "learning_rate": 8.627912167412222e-06, + "lm_loss": 5.6572, + "loss": 1.5361, + "step": 573, + "text_contrastive_loss": 0.8712, + "train_positive_log_prob": -84.4386, + "train_positive_token_accuracy": 0.0871, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.5063, + "epoch": 1.2957110609480813, + "grad_norm": 16.46881103515625, + "learning_rate": 8.622915646005152e-06, + "lm_loss": 5.7237, + "loss": 1.5235, + "step": 574, + "text_contrastive_loss": 0.8896, + "train_positive_log_prob": -85.2738, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.4242, + "epoch": 1.2979683972911964, + "grad_norm": 15.066044807434082, + "learning_rate": 8.617911496046446e-06, + "lm_loss": 5.7074, + "loss": 1.3531, + "step": 575, + "text_contrastive_loss": 0.7163, + "train_positive_log_prob": -85.5419, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3851, + "epoch": 1.3002257336343115, + "grad_norm": 13.20824146270752, + "learning_rate": 8.612899728073039e-06, + "lm_loss": 5.7833, + "loss": 1.3576, + "step": 576, + "text_contrastive_loss": 0.7883, + "train_positive_log_prob": -87.5173, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.4448, + "epoch": 1.3024830699774266, + "grad_norm": 13.5263090133667, + "learning_rate": 8.607880352637905e-06, + "lm_loss": 5.6824, + "loss": 1.3716, + "step": 577, + "text_contrastive_loss": 0.7171, + "train_positive_log_prob": -81.9375, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4699, + "epoch": 1.304740406320542, + "grad_norm": 15.998811721801758, + "learning_rate": 8.602853380310033e-06, + "lm_loss": 5.7364, + "loss": 1.4334, + "step": 578, + "text_contrastive_loss": 0.7798, + "train_positive_log_prob": -83.5601, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.5394, + "epoch": 1.3069977426636568, + "grad_norm": 16.599050521850586, + "learning_rate": 8.59781882167441e-06, + "lm_loss": 5.757, + "loss": 1.5103, + "step": 579, + "text_contrastive_loss": 0.7903, + "train_positive_log_prob": -85.3923, + "train_positive_token_accuracy": 0.0711, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.474, + "epoch": 1.309255079006772, + "grad_norm": 14.849519729614258, + "learning_rate": 8.592776687332003e-06, + "lm_loss": 5.795, + "loss": 1.5491, + "step": 580, + "text_contrastive_loss": 0.9912, + "train_positive_log_prob": -85.046, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.4694, + "epoch": 1.3115124153498872, + "grad_norm": 14.33668327331543, + "learning_rate": 8.58772698789972e-06, + "lm_loss": 5.7345, + "loss": 1.4764, + "step": 581, + "text_contrastive_loss": 0.8671, + "train_positive_log_prob": -84.1147, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.4464, + "epoch": 1.3137697516930023, + "grad_norm": 14.263465881347656, + "learning_rate": 8.582669734010407e-06, + "lm_loss": 5.6715, + "loss": 1.4431, + "step": 582, + "text_contrastive_loss": 0.859, + "train_positive_log_prob": -83.7635, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.4944, + "epoch": 1.3160270880361173, + "grad_norm": 15.106565475463867, + "learning_rate": 8.577604936312813e-06, + "lm_loss": 5.6653, + "loss": 1.4897, + "step": 583, + "text_contrastive_loss": 0.8574, + "train_positive_log_prob": -82.7656, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.5126, + "epoch": 1.3182844243792324, + "grad_norm": 16.442745208740234, + "learning_rate": 8.572532605471572e-06, + "lm_loss": 5.7601, + "loss": 1.5595, + "step": 584, + "text_contrastive_loss": 0.9418, + "train_positive_log_prob": -85.9525, + "train_positive_token_accuracy": 0.0725, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.5381, + "epoch": 1.3205417607223477, + "grad_norm": 15.679627418518066, + "learning_rate": 8.567452752167183e-06, + "lm_loss": 5.6624, + "loss": 1.5796, + "step": 585, + "text_contrastive_loss": 0.9504, + "train_positive_log_prob": -83.5007, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.425, + "epoch": 1.3227990970654628, + "grad_norm": 13.634334564208984, + "learning_rate": 8.562365387095977e-06, + "lm_loss": 5.636, + "loss": 1.4691, + "step": 586, + "text_contrastive_loss": 0.9611, + "train_positive_log_prob": -82.6454, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.4202, + "epoch": 1.325056433408578, + "grad_norm": 14.67137622833252, + "learning_rate": 8.557270520970111e-06, + "lm_loss": 5.6212, + "loss": 1.3774, + "step": 587, + "text_contrastive_loss": 0.7902, + "train_positive_log_prob": -84.9089, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.3977, + "epoch": 1.327313769751693, + "grad_norm": 12.816587448120117, + "learning_rate": 8.552168164517532e-06, + "lm_loss": 5.6157, + "loss": 1.3217, + "step": 588, + "text_contrastive_loss": 0.7249, + "train_positive_log_prob": -84.7167, + "train_positive_token_accuracy": 0.0751, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.5178, + "epoch": 1.329571106094808, + "grad_norm": 14.338798522949219, + "learning_rate": 8.547058328481959e-06, + "lm_loss": 5.7423, + "loss": 1.5171, + "step": 589, + "text_contrastive_loss": 0.8502, + "train_positive_log_prob": -83.9107, + "train_positive_token_accuracy": 0.0666, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.4659, + "epoch": 1.3318284424379232, + "grad_norm": 14.707366943359375, + "learning_rate": 8.54194102362286e-06, + "lm_loss": 5.6306, + "loss": 1.4245, + "step": 590, + "text_contrastive_loss": 0.7912, + "train_positive_log_prob": -83.8864, + "train_positive_token_accuracy": 0.0836, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.5681, + "epoch": 1.3340857787810383, + "grad_norm": 16.14177703857422, + "learning_rate": 8.536816260715433e-06, + "lm_loss": 5.8007, + "loss": 1.6138, + "step": 591, + "text_contrastive_loss": 0.9313, + "train_positive_log_prob": -86.6648, + "train_positive_token_accuracy": 0.0776, + "train_positive_token_prob": 0.0282 + }, + { + "contrastive_loss": 0.5292, + "epoch": 1.3363431151241536, + "grad_norm": 14.355437278747559, + "learning_rate": 8.531684050550575e-06, + "lm_loss": 5.6559, + "loss": 1.5654, + "step": 592, + "text_contrastive_loss": 0.9413, + "train_positive_log_prob": -82.7566, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.4348, + "epoch": 1.3386004514672687, + "grad_norm": 13.12289047241211, + "learning_rate": 8.526544403934868e-06, + "lm_loss": 5.7249, + "loss": 1.4359, + "step": 593, + "text_contrastive_loss": 0.8573, + "train_positive_log_prob": -85.3804, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.429, + "epoch": 1.3408577878103838, + "grad_norm": 12.86688232421875, + "learning_rate": 8.521397331690551e-06, + "lm_loss": 5.6226, + "loss": 1.385, + "step": 594, + "text_contrastive_loss": 0.7875, + "train_positive_log_prob": -83.5659, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.4238, + "epoch": 1.3431151241534989, + "grad_norm": 14.663098335266113, + "learning_rate": 8.516242844655498e-06, + "lm_loss": 5.7442, + "loss": 1.3678, + "step": 595, + "text_contrastive_loss": 0.7393, + "train_positive_log_prob": -84.6363, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.587, + "epoch": 1.345372460496614, + "grad_norm": 17.255664825439453, + "learning_rate": 8.5110809536832e-06, + "lm_loss": 5.8162, + "loss": 1.6221, + "step": 596, + "text_contrastive_loss": 0.9069, + "train_positive_log_prob": -86.0171, + "train_positive_token_accuracy": 0.0734, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.5174, + "epoch": 1.347629796839729, + "grad_norm": 16.211050033569336, + "learning_rate": 8.50591166964273e-06, + "lm_loss": 5.8307, + "loss": 1.5411, + "step": 597, + "text_contrastive_loss": 0.8813, + "train_positive_log_prob": -87.0938, + "train_positive_token_accuracy": 0.0764, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.5364, + "epoch": 1.3498871331828441, + "grad_norm": 12.976540565490723, + "learning_rate": 8.500735003418734e-06, + "lm_loss": 5.6894, + "loss": 1.596, + "step": 598, + "text_contrastive_loss": 0.9813, + "train_positive_log_prob": -82.5231, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.5375, + "epoch": 1.3521444695259595, + "grad_norm": 16.37051010131836, + "learning_rate": 8.495550965911403e-06, + "lm_loss": 5.7075, + "loss": 1.6446, + "step": 599, + "text_contrastive_loss": 1.0727, + "train_positive_log_prob": -85.6098, + "train_positive_token_accuracy": 0.0734, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.5672, + "epoch": 1.3544018058690745, + "grad_norm": 17.133121490478516, + "learning_rate": 8.490359568036446e-06, + "lm_loss": 5.7303, + "loss": 1.6151, + "step": 600, + "text_contrastive_loss": 0.9497, + "train_positive_log_prob": -86.4432, + "train_positive_token_accuracy": 0.0826, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.5085, + "epoch": 1.3566591422121896, + "grad_norm": 14.417841911315918, + "learning_rate": 8.485160820725073e-06, + "lm_loss": 5.8329, + "loss": 1.4963, + "step": 601, + "text_contrastive_loss": 0.8091, + "train_positive_log_prob": -88.7936, + "train_positive_token_accuracy": 0.0842, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.4975, + "epoch": 1.3589164785553047, + "grad_norm": 14.838998794555664, + "learning_rate": 8.479954734923967e-06, + "lm_loss": 5.6464, + "loss": 1.4615, + "step": 602, + "text_contrastive_loss": 0.7986, + "train_positive_log_prob": -83.7024, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.4185, + "epoch": 1.3611738148984198, + "grad_norm": 13.288290023803711, + "learning_rate": 8.474741321595263e-06, + "lm_loss": 5.7524, + "loss": 1.3916, + "step": 603, + "text_contrastive_loss": 0.7957, + "train_positive_log_prob": -84.735, + "train_positive_token_accuracy": 0.0735, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.5676, + "epoch": 1.363431151241535, + "grad_norm": 17.322425842285156, + "learning_rate": 8.46952059171653e-06, + "lm_loss": 5.8024, + "loss": 1.5585, + "step": 604, + "text_contrastive_loss": 0.8212, + "train_positive_log_prob": -87.2656, + "train_positive_token_accuracy": 0.0777, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.5144, + "epoch": 1.36568848758465, + "grad_norm": 14.924680709838867, + "learning_rate": 8.464292556280734e-06, + "lm_loss": 5.6436, + "loss": 1.4354, + "step": 605, + "text_contrastive_loss": 0.7133, + "train_positive_log_prob": -82.735, + "train_positive_token_accuracy": 0.0853, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4219, + "epoch": 1.3679458239277653, + "grad_norm": 14.398249626159668, + "learning_rate": 8.459057226296232e-06, + "lm_loss": 5.6432, + "loss": 1.3932, + "step": 606, + "text_contrastive_loss": 0.8139, + "train_positive_log_prob": -83.3475, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4475, + "epoch": 1.3702031602708804, + "grad_norm": 14.201505661010742, + "learning_rate": 8.453814612786736e-06, + "lm_loss": 5.6835, + "loss": 1.506, + "step": 607, + "text_contrastive_loss": 0.9801, + "train_positive_log_prob": -85.784, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.5011, + "epoch": 1.3724604966139955, + "grad_norm": 18.383134841918945, + "learning_rate": 8.4485647267913e-06, + "lm_loss": 5.672, + "loss": 1.4904, + "step": 608, + "text_contrastive_loss": 0.8443, + "train_positive_log_prob": -85.1024, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.4708, + "epoch": 1.3747178329571106, + "grad_norm": 17.422670364379883, + "learning_rate": 8.443307579364282e-06, + "lm_loss": 5.762, + "loss": 1.4807, + "step": 609, + "text_contrastive_loss": 0.8674, + "train_positive_log_prob": -86.7848, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.5097, + "epoch": 1.3769751693002257, + "grad_norm": 16.978023529052734, + "learning_rate": 8.43804318157534e-06, + "lm_loss": 5.7197, + "loss": 1.4968, + "step": 610, + "text_contrastive_loss": 0.8303, + "train_positive_log_prob": -84.6329, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.5675, + "epoch": 1.379232505643341, + "grad_norm": 15.62741756439209, + "learning_rate": 8.432771544509395e-06, + "lm_loss": 5.8113, + "loss": 1.6394, + "step": 611, + "text_contrastive_loss": 0.9815, + "train_positive_log_prob": -89.5089, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.4102, + "epoch": 1.3814898419864559, + "grad_norm": 17.065153121948242, + "learning_rate": 8.427492679266605e-06, + "lm_loss": 5.7215, + "loss": 1.3907, + "step": 612, + "text_contrastive_loss": 0.8168, + "train_positive_log_prob": -86.4046, + "train_positive_token_accuracy": 0.0758, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.5305, + "epoch": 1.3837471783295712, + "grad_norm": 14.382501602172852, + "learning_rate": 8.422206596962357e-06, + "lm_loss": 5.6203, + "loss": 1.4797, + "step": 613, + "text_contrastive_loss": 0.7743, + "train_positive_log_prob": -80.3763, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.6152, + "epoch": 1.3860045146726863, + "grad_norm": 17.33323097229004, + "learning_rate": 8.416913308727229e-06, + "lm_loss": 5.6527, + "loss": 1.5777, + "step": 614, + "text_contrastive_loss": 0.7946, + "train_positive_log_prob": -84.0208, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4664, + "epoch": 1.3882618510158014, + "grad_norm": 16.54546546936035, + "learning_rate": 8.411612825706976e-06, + "lm_loss": 5.7262, + "loss": 1.4231, + "step": 615, + "text_contrastive_loss": 0.7682, + "train_positive_log_prob": -85.2425, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.5116, + "epoch": 1.3905191873589164, + "grad_norm": 15.819067001342773, + "learning_rate": 8.4063051590625e-06, + "lm_loss": 5.5944, + "loss": 1.5268, + "step": 616, + "text_contrastive_loss": 0.9116, + "train_positive_log_prob": -83.9601, + "train_positive_token_accuracy": 0.083, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3799, + "epoch": 1.3927765237020315, + "grad_norm": 13.957484245300293, + "learning_rate": 8.400990319969829e-06, + "lm_loss": 5.5935, + "loss": 1.3107, + "step": 617, + "text_contrastive_loss": 0.7429, + "train_positive_log_prob": -81.2594, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.5722, + "epoch": 1.3950338600451468, + "grad_norm": 15.774820327758789, + "learning_rate": 8.395668319620092e-06, + "lm_loss": 5.5542, + "loss": 1.5603, + "step": 618, + "text_contrastive_loss": 0.8653, + "train_positive_log_prob": -81.0311, + "train_positive_token_accuracy": 0.0846, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4964, + "epoch": 1.3972911963882617, + "grad_norm": 17.090864181518555, + "learning_rate": 8.390339169219504e-06, + "lm_loss": 5.6777, + "loss": 1.4867, + "step": 619, + "text_contrastive_loss": 0.8451, + "train_positive_log_prob": -85.4361, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.5244, + "epoch": 1.399548532731377, + "grad_norm": 16.13720703125, + "learning_rate": 8.385002879989328e-06, + "lm_loss": 5.7067, + "loss": 1.5402, + "step": 620, + "text_contrastive_loss": 0.8904, + "train_positive_log_prob": -83.3921, + "train_positive_token_accuracy": 0.0724, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.467, + "epoch": 1.4018058690744921, + "grad_norm": 16.108928680419922, + "learning_rate": 8.37965946316586e-06, + "lm_loss": 5.7693, + "loss": 1.4954, + "step": 621, + "text_contrastive_loss": 0.9029, + "train_positive_log_prob": -83.8387, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0277 + }, + { + "contrastive_loss": 0.3771, + "epoch": 1.4040632054176072, + "grad_norm": 13.751592636108398, + "learning_rate": 8.37430893000041e-06, + "lm_loss": 5.6708, + "loss": 1.3665, + "step": 622, + "text_contrastive_loss": 0.8446, + "train_positive_log_prob": -84.245, + "train_positive_token_accuracy": 0.0715, + "train_positive_token_prob": 0.0272 + }, + { + "contrastive_loss": 0.5291, + "epoch": 1.4063205417607223, + "grad_norm": 18.190946578979492, + "learning_rate": 8.368951291759264e-06, + "lm_loss": 5.6352, + "loss": 1.5548, + "step": 623, + "text_contrastive_loss": 0.9244, + "train_positive_log_prob": -83.1633, + "train_positive_token_accuracy": 0.0868, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.6423, + "epoch": 1.4085778781038374, + "grad_norm": 16.78770637512207, + "learning_rate": 8.363586559723675e-06, + "lm_loss": 5.6508, + "loss": 1.646, + "step": 624, + "text_contrastive_loss": 0.8771, + "train_positive_log_prob": -83.3269, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0282 + }, + { + "contrastive_loss": 0.4697, + "epoch": 1.4108352144469527, + "grad_norm": 15.0231351852417, + "learning_rate": 8.35821474518983e-06, + "lm_loss": 5.6833, + "loss": 1.4476, + "step": 625, + "text_contrastive_loss": 0.8193, + "train_positive_log_prob": -84.0054, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.4274, + "epoch": 1.4130925507900678, + "grad_norm": 13.26618480682373, + "learning_rate": 8.352835859468829e-06, + "lm_loss": 5.6779, + "loss": 1.4559, + "step": 626, + "text_contrastive_loss": 0.9215, + "train_positive_log_prob": -82.7277, + "train_positive_token_accuracy": 0.0764, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.4691, + "epoch": 1.4153498871331829, + "grad_norm": 15.268219947814941, + "learning_rate": 8.347449913886662e-06, + "lm_loss": 5.5979, + "loss": 1.4997, + "step": 627, + "text_contrastive_loss": 0.9416, + "train_positive_log_prob": -82.7623, + "train_positive_token_accuracy": 0.0856, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4887, + "epoch": 1.417607223476298, + "grad_norm": 12.510571479797363, + "learning_rate": 8.34205691978419e-06, + "lm_loss": 5.6359, + "loss": 1.5133, + "step": 628, + "text_contrastive_loss": 0.922, + "train_positive_log_prob": -84.6696, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.4575, + "epoch": 1.419864559819413, + "grad_norm": 15.527823448181152, + "learning_rate": 8.336656888517103e-06, + "lm_loss": 5.6734, + "loss": 1.4663, + "step": 629, + "text_contrastive_loss": 0.8828, + "train_positive_log_prob": -84.7624, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.4747, + "epoch": 1.4221218961625282, + "grad_norm": 13.94395637512207, + "learning_rate": 8.331249831455921e-06, + "lm_loss": 5.7445, + "loss": 1.48, + "step": 630, + "text_contrastive_loss": 0.8616, + "train_positive_log_prob": -84.3742, + "train_positive_token_accuracy": 0.0753, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.3821, + "epoch": 1.4243792325056432, + "grad_norm": 11.797964096069336, + "learning_rate": 8.325835759985951e-06, + "lm_loss": 5.7322, + "loss": 1.3203, + "step": 631, + "text_contrastive_loss": 0.73, + "train_positive_log_prob": -84.7315, + "train_positive_token_accuracy": 0.0854, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.4394, + "epoch": 1.4266365688487586, + "grad_norm": 14.449109077453613, + "learning_rate": 8.320414685507272e-06, + "lm_loss": 5.7717, + "loss": 1.3673, + "step": 632, + "text_contrastive_loss": 0.7015, + "train_positive_log_prob": -87.8374, + "train_positive_token_accuracy": 0.077, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.5201, + "epoch": 1.4288939051918736, + "grad_norm": 15.04293441772461, + "learning_rate": 8.31498661943471e-06, + "lm_loss": 5.7345, + "loss": 1.5467, + "step": 633, + "text_contrastive_loss": 0.9064, + "train_positive_log_prob": -85.7974, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.4807, + "epoch": 1.4311512415349887, + "grad_norm": 14.117101669311523, + "learning_rate": 8.309551573197809e-06, + "lm_loss": 5.7371, + "loss": 1.4817, + "step": 634, + "text_contrastive_loss": 0.8546, + "train_positive_log_prob": -86.3923, + "train_positive_token_accuracy": 0.0719, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.3696, + "epoch": 1.4334085778781038, + "grad_norm": 13.330780029296875, + "learning_rate": 8.304109558240817e-06, + "lm_loss": 5.7594, + "loss": 1.29, + "step": 635, + "text_contrastive_loss": 0.689, + "train_positive_log_prob": -85.536, + "train_positive_token_accuracy": 0.0713, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.492, + "epoch": 1.435665914221219, + "grad_norm": 14.88065242767334, + "learning_rate": 8.298660586022646e-06, + "lm_loss": 5.7777, + "loss": 1.5729, + "step": 636, + "text_contrastive_loss": 1.0061, + "train_positive_log_prob": -84.8845, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.554, + "epoch": 1.437923250564334, + "grad_norm": 20.81431007385254, + "learning_rate": 8.293204668016867e-06, + "lm_loss": 5.7308, + "loss": 1.6185, + "step": 637, + "text_contrastive_loss": 0.9828, + "train_positive_log_prob": -87.0118, + "train_positive_token_accuracy": 0.0756, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.5157, + "epoch": 1.440180586907449, + "grad_norm": 15.25027847290039, + "learning_rate": 8.287741815711674e-06, + "lm_loss": 5.7778, + "loss": 1.5548, + "step": 638, + "text_contrastive_loss": 0.9227, + "train_positive_log_prob": -85.8069, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.5825, + "epoch": 1.4424379232505644, + "grad_norm": 15.03612995147705, + "learning_rate": 8.282272040609855e-06, + "lm_loss": 5.7508, + "loss": 1.5904, + "step": 639, + "text_contrastive_loss": 0.8656, + "train_positive_log_prob": -85.8813, + "train_positive_token_accuracy": 0.0711, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.5239, + "epoch": 1.4446952595936795, + "grad_norm": 15.463916778564453, + "learning_rate": 8.276795354228785e-06, + "lm_loss": 5.7049, + "loss": 1.5021, + "step": 640, + "text_contrastive_loss": 0.8154, + "train_positive_log_prob": -84.7054, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3737, + "epoch": 1.4469525959367946, + "grad_norm": 11.515005111694336, + "learning_rate": 8.271311768100386e-06, + "lm_loss": 5.53, + "loss": 1.3209, + "step": 641, + "text_contrastive_loss": 0.7886, + "train_positive_log_prob": -82.5808, + "train_positive_token_accuracy": 0.0866, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.5577, + "epoch": 1.4492099322799097, + "grad_norm": 15.942440032958984, + "learning_rate": 8.26582129377111e-06, + "lm_loss": 5.6361, + "loss": 1.5841, + "step": 642, + "text_contrastive_loss": 0.9256, + "train_positive_log_prob": -82.3154, + "train_positive_token_accuracy": 0.0864, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3763, + "epoch": 1.4514672686230248, + "grad_norm": 14.784561157226562, + "learning_rate": 8.26032394280191e-06, + "lm_loss": 5.7662, + "loss": 1.3506, + "step": 643, + "text_contrastive_loss": 0.7954, + "train_positive_log_prob": -86.0615, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.5579, + "epoch": 1.45372460496614, + "grad_norm": 15.00743293762207, + "learning_rate": 8.254819726768224e-06, + "lm_loss": 5.645, + "loss": 1.5832, + "step": 644, + "text_contrastive_loss": 0.9215, + "train_positive_log_prob": -84.2969, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.392, + "epoch": 1.455981941309255, + "grad_norm": 14.503917694091797, + "learning_rate": 8.249308657259943e-06, + "lm_loss": 5.6424, + "loss": 1.3551, + "step": 645, + "text_contrastive_loss": 0.7977, + "train_positive_log_prob": -84.6817, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4541, + "epoch": 1.4582392776523703, + "grad_norm": 14.661574363708496, + "learning_rate": 8.243790745881389e-06, + "lm_loss": 5.6649, + "loss": 1.448, + "step": 646, + "text_contrastive_loss": 0.8549, + "train_positive_log_prob": -83.5575, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.5007, + "epoch": 1.4604966139954854, + "grad_norm": 16.050247192382812, + "learning_rate": 8.238266004251284e-06, + "lm_loss": 5.7432, + "loss": 1.4659, + "step": 647, + "text_contrastive_loss": 0.7819, + "train_positive_log_prob": -85.8399, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.4631, + "epoch": 1.4627539503386005, + "grad_norm": 13.094605445861816, + "learning_rate": 8.232734444002748e-06, + "lm_loss": 5.7808, + "loss": 1.4476, + "step": 648, + "text_contrastive_loss": 0.8129, + "train_positive_log_prob": -86.0128, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.4685, + "epoch": 1.4650112866817155, + "grad_norm": 17.743589401245117, + "learning_rate": 8.22719607678324e-06, + "lm_loss": 5.7041, + "loss": 1.4344, + "step": 649, + "text_contrastive_loss": 0.7911, + "train_positive_log_prob": -84.6504, + "train_positive_token_accuracy": 0.0845, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.4111, + "epoch": 1.4672686230248306, + "grad_norm": 14.644586563110352, + "learning_rate": 8.221650914254566e-06, + "lm_loss": 5.7006, + "loss": 1.3335, + "step": 650, + "text_contrastive_loss": 0.7047, + "train_positive_log_prob": -83.7309, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.3515, + "epoch": 1.469525959367946, + "grad_norm": 12.375837326049805, + "learning_rate": 8.216098968092833e-06, + "lm_loss": 5.6449, + "loss": 1.2718, + "step": 651, + "text_contrastive_loss": 0.7116, + "train_positive_log_prob": -83.8368, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3966, + "epoch": 1.4717832957110608, + "grad_norm": 16.15328598022461, + "learning_rate": 8.210540249988435e-06, + "lm_loss": 5.6452, + "loss": 1.2726, + "step": 652, + "text_contrastive_loss": 0.6229, + "train_positive_log_prob": -84.1476, + "train_positive_token_accuracy": 0.0844, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4653, + "epoch": 1.4740406320541761, + "grad_norm": 15.987441062927246, + "learning_rate": 8.204974771646023e-06, + "lm_loss": 5.68, + "loss": 1.439, + "step": 653, + "text_contrastive_loss": 0.8114, + "train_positive_log_prob": -84.05, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.5352, + "epoch": 1.4762979683972912, + "grad_norm": 16.285802841186523, + "learning_rate": 8.199402544784485e-06, + "lm_loss": 5.6198, + "loss": 1.6268, + "step": 654, + "text_contrastive_loss": 1.0592, + "train_positive_log_prob": -82.9575, + "train_positive_token_accuracy": 0.0828, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.5053, + "epoch": 1.4785553047404063, + "grad_norm": 16.442710876464844, + "learning_rate": 8.193823581136919e-06, + "lm_loss": 5.7476, + "loss": 1.5762, + "step": 655, + "text_contrastive_loss": 0.9924, + "train_positive_log_prob": -84.8666, + "train_positive_token_accuracy": 0.0751, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.4455, + "epoch": 1.4808126410835214, + "grad_norm": 14.24592113494873, + "learning_rate": 8.188237892450603e-06, + "lm_loss": 5.7624, + "loss": 1.4015, + "step": 656, + "text_contrastive_loss": 0.7596, + "train_positive_log_prob": -85.0196, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.5272, + "epoch": 1.4830699774266365, + "grad_norm": 15.385213851928711, + "learning_rate": 8.182645490486986e-06, + "lm_loss": 5.6447, + "loss": 1.4802, + "step": 657, + "text_contrastive_loss": 0.777, + "train_positive_log_prob": -84.211, + "train_positive_token_accuracy": 0.0723, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.4371, + "epoch": 1.4853273137697518, + "grad_norm": 13.881762504577637, + "learning_rate": 8.177046387021641e-06, + "lm_loss": 5.6705, + "loss": 1.4286, + "step": 658, + "text_contrastive_loss": 0.8489, + "train_positive_log_prob": -84.0849, + "train_positive_token_accuracy": 0.0843, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.5435, + "epoch": 1.487584650112867, + "grad_norm": 17.392467498779297, + "learning_rate": 8.17144059384426e-06, + "lm_loss": 5.6384, + "loss": 1.5087, + "step": 659, + "text_contrastive_loss": 0.8026, + "train_positive_log_prob": -84.641, + "train_positive_token_accuracy": 0.0833, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.4224, + "epoch": 1.489841986455982, + "grad_norm": 13.379764556884766, + "learning_rate": 8.165828122758615e-06, + "lm_loss": 5.7283, + "loss": 1.3672, + "step": 660, + "text_contrastive_loss": 0.744, + "train_positive_log_prob": -87.2672, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.6114, + "epoch": 1.492099322799097, + "grad_norm": 18.610767364501953, + "learning_rate": 8.160208985582547e-06, + "lm_loss": 5.6818, + "loss": 1.6163, + "step": 661, + "text_contrastive_loss": 0.8734, + "train_positive_log_prob": -83.3614, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3857, + "epoch": 1.4943566591422122, + "grad_norm": 12.912287712097168, + "learning_rate": 8.154583194147929e-06, + "lm_loss": 5.6562, + "loss": 1.2649, + "step": 662, + "text_contrastive_loss": 0.6271, + "train_positive_log_prob": -81.31, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.4028, + "epoch": 1.4966139954853273, + "grad_norm": 14.353432655334473, + "learning_rate": 8.148950760300642e-06, + "lm_loss": 5.7631, + "loss": 1.2938, + "step": 663, + "text_contrastive_loss": 0.6293, + "train_positive_log_prob": -86.5396, + "train_positive_token_accuracy": 0.0765, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.5248, + "epoch": 1.4988713318284423, + "grad_norm": 15.900279998779297, + "learning_rate": 8.14331169590056e-06, + "lm_loss": 5.7253, + "loss": 1.5054, + "step": 664, + "text_contrastive_loss": 0.8161, + "train_positive_log_prob": -85.6113, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.364, + "epoch": 1.5011286681715577, + "grad_norm": 14.203046798706055, + "learning_rate": 8.137666012821514e-06, + "lm_loss": 5.733, + "loss": 1.3608, + "step": 665, + "text_contrastive_loss": 0.8471, + "train_positive_log_prob": -83.937, + "train_positive_token_accuracy": 0.0758, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.5346, + "epoch": 1.5033860045146725, + "grad_norm": 16.062105178833008, + "learning_rate": 8.132013722951275e-06, + "lm_loss": 5.6654, + "loss": 1.5098, + "step": 666, + "text_contrastive_loss": 0.8173, + "train_positive_log_prob": -83.3416, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.4487, + "epoch": 1.5056433408577878, + "grad_norm": 16.18758201599121, + "learning_rate": 8.12635483819152e-06, + "lm_loss": 5.7487, + "loss": 1.4029, + "step": 667, + "text_contrastive_loss": 0.7587, + "train_positive_log_prob": -84.252, + "train_positive_token_accuracy": 0.0824, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.478, + "epoch": 1.507900677200903, + "grad_norm": 15.962945938110352, + "learning_rate": 8.12068937045782e-06, + "lm_loss": 5.5818, + "loss": 1.4346, + "step": 668, + "text_contrastive_loss": 0.7967, + "train_positive_log_prob": -83.5142, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4866, + "epoch": 1.510158013544018, + "grad_norm": 15.000810623168945, + "learning_rate": 8.115017331679602e-06, + "lm_loss": 5.7259, + "loss": 1.4781, + "step": 669, + "text_contrastive_loss": 0.8379, + "train_positive_log_prob": -84.9926, + "train_positive_token_accuracy": 0.0726, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.5004, + "epoch": 1.5124153498871333, + "grad_norm": 16.47449493408203, + "learning_rate": 8.109338733800132e-06, + "lm_loss": 5.6612, + "loss": 1.5289, + "step": 670, + "text_contrastive_loss": 0.9248, + "train_positive_log_prob": -83.8799, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.4206, + "epoch": 1.5146726862302482, + "grad_norm": 14.128972053527832, + "learning_rate": 8.103653588776483e-06, + "lm_loss": 5.605, + "loss": 1.3104, + "step": 671, + "text_contrastive_loss": 0.6584, + "train_positive_log_prob": -81.2949, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.3787, + "epoch": 1.5169300225733635, + "grad_norm": 14.067277908325195, + "learning_rate": 8.09796190857952e-06, + "lm_loss": 5.591, + "loss": 1.3993, + "step": 672, + "text_contrastive_loss": 0.9229, + "train_positive_log_prob": -80.6322, + "train_positive_token_accuracy": 0.0729, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.4477, + "epoch": 1.5191873589164786, + "grad_norm": 13.527449607849121, + "learning_rate": 8.09226370519386e-06, + "lm_loss": 5.6159, + "loss": 1.3563, + "step": 673, + "text_contrastive_loss": 0.694, + "train_positive_log_prob": -85.8378, + "train_positive_token_accuracy": 0.0721, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4705, + "epoch": 1.5214446952595937, + "grad_norm": 15.505599021911621, + "learning_rate": 8.08655899061787e-06, + "lm_loss": 5.8439, + "loss": 1.5065, + "step": 674, + "text_contrastive_loss": 0.9033, + "train_positive_log_prob": -87.5452, + "train_positive_token_accuracy": 0.0738, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.5657, + "epoch": 1.5237020316027088, + "grad_norm": 15.648252487182617, + "learning_rate": 8.080847776863609e-06, + "lm_loss": 5.5688, + "loss": 1.6023, + "step": 675, + "text_contrastive_loss": 0.9594, + "train_positive_log_prob": -82.599, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.4264, + "epoch": 1.5259593679458239, + "grad_norm": 12.821239471435547, + "learning_rate": 8.075130075956836e-06, + "lm_loss": 5.7206, + "loss": 1.4074, + "step": 676, + "text_contrastive_loss": 0.818, + "train_positive_log_prob": -84.5158, + "train_positive_token_accuracy": 0.0735, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.4091, + "epoch": 1.5282167042889392, + "grad_norm": 13.122559547424316, + "learning_rate": 8.069405899936961e-06, + "lm_loss": 5.647, + "loss": 1.3933, + "step": 677, + "text_contrastive_loss": 0.839, + "train_positive_log_prob": -84.2019, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.454, + "epoch": 1.530474040632054, + "grad_norm": 13.56540298461914, + "learning_rate": 8.06367526085703e-06, + "lm_loss": 5.6796, + "loss": 1.3976, + "step": 678, + "text_contrastive_loss": 0.7513, + "train_positive_log_prob": -83.3309, + "train_positive_token_accuracy": 0.0825, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.42, + "epoch": 1.5327313769751694, + "grad_norm": 14.087994575500488, + "learning_rate": 8.057938170783704e-06, + "lm_loss": 5.6418, + "loss": 1.3526, + "step": 679, + "text_contrastive_loss": 0.7367, + "train_positive_log_prob": -83.7651, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.4734, + "epoch": 1.5349887133182845, + "grad_norm": 13.650496482849121, + "learning_rate": 8.052194641797217e-06, + "lm_loss": 5.8091, + "loss": 1.4548, + "step": 680, + "text_contrastive_loss": 0.801, + "train_positive_log_prob": -86.0657, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.4921, + "epoch": 1.5372460496613995, + "grad_norm": 16.06494903564453, + "learning_rate": 8.046444685991369e-06, + "lm_loss": 5.5956, + "loss": 1.4576, + "step": 681, + "text_contrastive_loss": 0.812, + "train_positive_log_prob": -83.7828, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.4793, + "epoch": 1.5395033860045146, + "grad_norm": 16.24143409729004, + "learning_rate": 8.040688315473489e-06, + "lm_loss": 5.5937, + "loss": 1.4818, + "step": 682, + "text_contrastive_loss": 0.8862, + "train_positive_log_prob": -84.3899, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.5799, + "epoch": 1.5417607223476297, + "grad_norm": 16.767017364501953, + "learning_rate": 8.034925542364412e-06, + "lm_loss": 5.7097, + "loss": 1.5881, + "step": 683, + "text_contrastive_loss": 0.8744, + "train_positive_log_prob": -84.8151, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.4295, + "epoch": 1.544018058690745, + "grad_norm": 14.054061889648438, + "learning_rate": 8.029156378798459e-06, + "lm_loss": 5.7526, + "loss": 1.4195, + "step": 684, + "text_contrastive_loss": 0.8295, + "train_positive_log_prob": -85.5074, + "train_positive_token_accuracy": 0.083, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.5139, + "epoch": 1.54627539503386, + "grad_norm": 15.094547271728516, + "learning_rate": 8.023380836923404e-06, + "lm_loss": 5.7492, + "loss": 1.5277, + "step": 685, + "text_contrastive_loss": 0.8778, + "train_positive_log_prob": -84.4177, + "train_positive_token_accuracy": 0.083, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.47, + "epoch": 1.5485327313769752, + "grad_norm": 13.039653778076172, + "learning_rate": 8.017598928900452e-06, + "lm_loss": 5.5967, + "loss": 1.4238, + "step": 686, + "text_contrastive_loss": 0.7884, + "train_positive_log_prob": -82.5282, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.5264, + "epoch": 1.5507900677200903, + "grad_norm": 14.566529273986816, + "learning_rate": 8.011810666904212e-06, + "lm_loss": 5.6958, + "loss": 1.5157, + "step": 687, + "text_contrastive_loss": 0.8395, + "train_positive_log_prob": -85.0098, + "train_positive_token_accuracy": 0.0756, + "train_positive_token_prob": 0.0276 + }, + { + "contrastive_loss": 0.4454, + "epoch": 1.5530474040632054, + "grad_norm": 15.863831520080566, + "learning_rate": 8.006016063122672e-06, + "lm_loss": 5.6261, + "loss": 1.411, + "step": 688, + "text_contrastive_loss": 0.8059, + "train_positive_log_prob": -81.5353, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.4816, + "epoch": 1.5553047404063205, + "grad_norm": 13.956178665161133, + "learning_rate": 8.000215129757178e-06, + "lm_loss": 5.6006, + "loss": 1.4833, + "step": 689, + "text_contrastive_loss": 0.8834, + "train_positive_log_prob": -82.8911, + "train_positive_token_accuracy": 0.0839, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.3698, + "epoch": 1.5575620767494356, + "grad_norm": 13.523423194885254, + "learning_rate": 7.994407879022397e-06, + "lm_loss": 5.7382, + "loss": 1.3065, + "step": 690, + "text_contrastive_loss": 0.7257, + "train_positive_log_prob": -84.3939, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.5507, + "epoch": 1.559819413092551, + "grad_norm": 14.664929389953613, + "learning_rate": 7.9885943231463e-06, + "lm_loss": 5.7185, + "loss": 1.6127, + "step": 691, + "text_contrastive_loss": 0.9802, + "train_positive_log_prob": -83.6579, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0277 + }, + { + "contrastive_loss": 0.4425, + "epoch": 1.5620767494356658, + "grad_norm": 13.805353164672852, + "learning_rate": 7.98277447437014e-06, + "lm_loss": 5.7303, + "loss": 1.4197, + "step": 692, + "text_contrastive_loss": 0.8082, + "train_positive_log_prob": -87.7182, + "train_positive_token_accuracy": 0.0847, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.5328, + "epoch": 1.564334085778781, + "grad_norm": 15.047657012939453, + "learning_rate": 7.976948344948412e-06, + "lm_loss": 5.7732, + "loss": 1.5557, + "step": 693, + "text_contrastive_loss": 0.8912, + "train_positive_log_prob": -85.3158, + "train_positive_token_accuracy": 0.0719, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.5185, + "epoch": 1.5665914221218962, + "grad_norm": 13.574178695678711, + "learning_rate": 7.971115947148842e-06, + "lm_loss": 5.5913, + "loss": 1.4799, + "step": 694, + "text_contrastive_loss": 0.8046, + "train_positive_log_prob": -82.2982, + "train_positive_token_accuracy": 0.0738, + "train_positive_token_prob": 0.0277 + }, + { + "contrastive_loss": 0.5766, + "epoch": 1.5688487584650113, + "grad_norm": 17.185392379760742, + "learning_rate": 7.965277293252354e-06, + "lm_loss": 5.5453, + "loss": 1.5794, + "step": 695, + "text_contrastive_loss": 0.8965, + "train_positive_log_prob": -82.7266, + "train_positive_token_accuracy": 0.0697, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.5021, + "epoch": 1.5711060948081266, + "grad_norm": 14.885881423950195, + "learning_rate": 7.95943239555304e-06, + "lm_loss": 5.6439, + "loss": 1.4632, + "step": 696, + "text_contrastive_loss": 0.7935, + "train_positive_log_prob": -81.9729, + "train_positive_token_accuracy": 0.0686, + "train_positive_token_prob": 0.0275 + }, + { + "contrastive_loss": 0.4813, + "epoch": 1.5733634311512414, + "grad_norm": 16.061283111572266, + "learning_rate": 7.953581266358148e-06, + "lm_loss": 5.683, + "loss": 1.4846, + "step": 697, + "text_contrastive_loss": 0.8699, + "train_positive_log_prob": -84.6637, + "train_positive_token_accuracy": 0.0732, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.4571, + "epoch": 1.5756207674943568, + "grad_norm": 16.301799774169922, + "learning_rate": 7.94772391798804e-06, + "lm_loss": 5.6104, + "loss": 1.2995, + "step": 698, + "text_contrastive_loss": 0.5628, + "train_positive_log_prob": -83.2373, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.5282, + "epoch": 1.5778781038374716, + "grad_norm": 15.997379302978516, + "learning_rate": 7.941860362776176e-06, + "lm_loss": 5.5753, + "loss": 1.5329, + "step": 699, + "text_contrastive_loss": 0.8943, + "train_positive_log_prob": -78.5536, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.5032, + "epoch": 1.580135440180587, + "grad_norm": 15.163290977478027, + "learning_rate": 7.935990613069087e-06, + "lm_loss": 5.6094, + "loss": 1.5086, + "step": 700, + "text_contrastive_loss": 0.8889, + "train_positive_log_prob": -81.4709, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.423, + "epoch": 1.582392776523702, + "grad_norm": 13.42507266998291, + "learning_rate": 7.930114681226341e-06, + "lm_loss": 5.7168, + "loss": 1.3396, + "step": 701, + "text_contrastive_loss": 0.6898, + "train_positive_log_prob": -82.7148, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.4207, + "epoch": 1.5846501128668171, + "grad_norm": 14.971874237060547, + "learning_rate": 7.924232579620533e-06, + "lm_loss": 5.6897, + "loss": 1.4329, + "step": 702, + "text_contrastive_loss": 0.8866, + "train_positive_log_prob": -83.8101, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.3741, + "epoch": 1.5869074492099324, + "grad_norm": 12.66203498840332, + "learning_rate": 7.91834432063724e-06, + "lm_loss": 5.5778, + "loss": 1.2824, + "step": 703, + "text_contrastive_loss": 0.701, + "train_positive_log_prob": -82.8604, + "train_positive_token_accuracy": 0.0726, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.4889, + "epoch": 1.5891647855530473, + "grad_norm": 16.0451717376709, + "learning_rate": 7.912449916675008e-06, + "lm_loss": 5.5718, + "loss": 1.4275, + "step": 704, + "text_contrastive_loss": 0.7628, + "train_positive_log_prob": -83.3331, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.5527, + "epoch": 1.5914221218961626, + "grad_norm": 16.90771484375, + "learning_rate": 7.90654938014533e-06, + "lm_loss": 5.6262, + "loss": 1.5266, + "step": 705, + "text_contrastive_loss": 0.8225, + "train_positive_log_prob": -83.3444, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4658, + "epoch": 1.5936794582392777, + "grad_norm": 15.123517990112305, + "learning_rate": 7.900642723472596e-06, + "lm_loss": 5.8234, + "loss": 1.4394, + "step": 706, + "text_contrastive_loss": 0.7824, + "train_positive_log_prob": -88.3992, + "train_positive_token_accuracy": 0.0721, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.4542, + "epoch": 1.5959367945823928, + "grad_norm": 16.686111450195312, + "learning_rate": 7.894729959094097e-06, + "lm_loss": 5.7093, + "loss": 1.4151, + "step": 707, + "text_contrastive_loss": 0.7799, + "train_positive_log_prob": -83.3886, + "train_positive_token_accuracy": 0.0741, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.5535, + "epoch": 1.5981941309255079, + "grad_norm": 15.97707462310791, + "learning_rate": 7.888811099459974e-06, + "lm_loss": 5.6928, + "loss": 1.5947, + "step": 708, + "text_contrastive_loss": 0.9437, + "train_positive_log_prob": -85.3024, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4852, + "epoch": 1.600451467268623, + "grad_norm": 16.52826690673828, + "learning_rate": 7.882886157033209e-06, + "lm_loss": 5.7066, + "loss": 1.4224, + "step": 709, + "text_contrastive_loss": 0.733, + "train_positive_log_prob": -84.7526, + "train_positive_token_accuracy": 0.0847, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4855, + "epoch": 1.6027088036117383, + "grad_norm": 15.485867500305176, + "learning_rate": 7.876955144289594e-06, + "lm_loss": 5.7469, + "loss": 1.5005, + "step": 710, + "text_contrastive_loss": 0.8808, + "train_positive_log_prob": -84.7467, + "train_positive_token_accuracy": 0.0723, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.5587, + "epoch": 1.6049661399548532, + "grad_norm": 17.72032356262207, + "learning_rate": 7.871018073717693e-06, + "lm_loss": 5.6785, + "loss": 1.5746, + "step": 711, + "text_contrastive_loss": 0.896, + "train_positive_log_prob": -83.1175, + "train_positive_token_accuracy": 0.0731, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.5219, + "epoch": 1.6072234762979685, + "grad_norm": 17.101654052734375, + "learning_rate": 7.865074957818839e-06, + "lm_loss": 5.8464, + "loss": 1.4967, + "step": 712, + "text_contrastive_loss": 0.7804, + "train_positive_log_prob": -87.0816, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.4699, + "epoch": 1.6094808126410836, + "grad_norm": 15.60084342956543, + "learning_rate": 7.859125809107082e-06, + "lm_loss": 5.6099, + "loss": 1.4565, + "step": 713, + "text_contrastive_loss": 0.8512, + "train_positive_log_prob": -85.1133, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.5304, + "epoch": 1.6117381489841986, + "grad_norm": 17.016775131225586, + "learning_rate": 7.853170640109182e-06, + "lm_loss": 5.7116, + "loss": 1.574, + "step": 714, + "text_contrastive_loss": 0.9449, + "train_positive_log_prob": -86.1658, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.5022, + "epoch": 1.6139954853273137, + "grad_norm": 14.960779190063477, + "learning_rate": 7.847209463364574e-06, + "lm_loss": 5.5966, + "loss": 1.4896, + "step": 715, + "text_contrastive_loss": 0.8555, + "train_positive_log_prob": -80.6875, + "train_positive_token_accuracy": 0.0837, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.5147, + "epoch": 1.6162528216704288, + "grad_norm": 16.02543830871582, + "learning_rate": 7.841242291425342e-06, + "lm_loss": 5.5781, + "loss": 1.5167, + "step": 716, + "text_contrastive_loss": 0.8883, + "train_positive_log_prob": -81.9083, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.5836, + "epoch": 1.6185101580135441, + "grad_norm": 16.46441650390625, + "learning_rate": 7.835269136856194e-06, + "lm_loss": 5.6446, + "loss": 1.598, + "step": 717, + "text_contrastive_loss": 0.8999, + "train_positive_log_prob": -81.6667, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4231, + "epoch": 1.620767494356659, + "grad_norm": 14.927350997924805, + "learning_rate": 7.829290012234438e-06, + "lm_loss": 5.5749, + "loss": 1.3463, + "step": 718, + "text_contrastive_loss": 0.7314, + "train_positive_log_prob": -81.1214, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.458, + "epoch": 1.6230248306997743, + "grad_norm": 14.263801574707031, + "learning_rate": 7.823304930149949e-06, + "lm_loss": 5.5116, + "loss": 1.407, + "step": 719, + "text_contrastive_loss": 0.7957, + "train_positive_log_prob": -79.7248, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.4134, + "epoch": 1.6252821670428894, + "grad_norm": 15.119287490844727, + "learning_rate": 7.817313903205148e-06, + "lm_loss": 5.6889, + "loss": 1.3429, + "step": 720, + "text_contrastive_loss": 0.7212, + "train_positive_log_prob": -85.5753, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.438, + "epoch": 1.6275395033860045, + "grad_norm": 13.916505813598633, + "learning_rate": 7.811316944014974e-06, + "lm_loss": 5.682, + "loss": 1.4345, + "step": 721, + "text_contrastive_loss": 0.8565, + "train_positive_log_prob": -83.2527, + "train_positive_token_accuracy": 0.0741, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.4637, + "epoch": 1.6297968397291196, + "grad_norm": 15.625614166259766, + "learning_rate": 7.805314065206857e-06, + "lm_loss": 5.5972, + "loss": 1.3984, + "step": 722, + "text_contrastive_loss": 0.75, + "train_positive_log_prob": -81.8847, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.4416, + "epoch": 1.6320541760722347, + "grad_norm": 13.980611801147461, + "learning_rate": 7.799305279420691e-06, + "lm_loss": 5.6503, + "loss": 1.4343, + "step": 723, + "text_contrastive_loss": 0.8552, + "train_positive_log_prob": -82.3918, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.4065, + "epoch": 1.63431151241535, + "grad_norm": 13.4432954788208, + "learning_rate": 7.793290599308807e-06, + "lm_loss": 5.6487, + "loss": 1.4368, + "step": 724, + "text_contrastive_loss": 0.931, + "train_positive_log_prob": -83.2098, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.5409, + "epoch": 1.6365688487584649, + "grad_norm": 16.960397720336914, + "learning_rate": 7.78727003753595e-06, + "lm_loss": 5.5825, + "loss": 1.571, + "step": 725, + "text_contrastive_loss": 0.9439, + "train_positive_log_prob": -80.888, + "train_positive_token_accuracy": 0.0709, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.4632, + "epoch": 1.6388261851015802, + "grad_norm": 15.548394203186035, + "learning_rate": 7.78124360677925e-06, + "lm_loss": 5.563, + "loss": 1.4885, + "step": 726, + "text_contrastive_loss": 0.9379, + "train_positive_log_prob": -83.5324, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.4936, + "epoch": 1.6410835214446953, + "grad_norm": 15.020196914672852, + "learning_rate": 7.775211319728191e-06, + "lm_loss": 5.6957, + "loss": 1.4923, + "step": 727, + "text_contrastive_loss": 0.8583, + "train_positive_log_prob": -84.2994, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.4793, + "epoch": 1.6433408577878104, + "grad_norm": 13.576902389526367, + "learning_rate": 7.769173189084589e-06, + "lm_loss": 5.6299, + "loss": 1.503, + "step": 728, + "text_contrastive_loss": 0.9213, + "train_positive_log_prob": -83.7656, + "train_positive_token_accuracy": 0.0751, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.5622, + "epoch": 1.6455981941309257, + "grad_norm": 15.968917846679688, + "learning_rate": 7.763129227562568e-06, + "lm_loss": 5.6922, + "loss": 1.5674, + "step": 729, + "text_contrastive_loss": 0.872, + "train_positive_log_prob": -84.8933, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.4253, + "epoch": 1.6478555304740405, + "grad_norm": 11.961668014526367, + "learning_rate": 7.757079447888529e-06, + "lm_loss": 5.6106, + "loss": 1.3941, + "step": 730, + "text_contrastive_loss": 0.8155, + "train_positive_log_prob": -83.3698, + "train_positive_token_accuracy": 0.0705, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.4224, + "epoch": 1.6501128668171559, + "grad_norm": 14.189032554626465, + "learning_rate": 7.75102386280112e-06, + "lm_loss": 5.5594, + "loss": 1.4198, + "step": 731, + "text_contrastive_loss": 0.8829, + "train_positive_log_prob": -82.5264, + "train_positive_token_accuracy": 0.0847, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4987, + "epoch": 1.6523702031602707, + "grad_norm": 16.02800750732422, + "learning_rate": 7.744962485051217e-06, + "lm_loss": 5.6141, + "loss": 1.4237, + "step": 732, + "text_contrastive_loss": 0.7272, + "train_positive_log_prob": -82.6569, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.5574, + "epoch": 1.654627539503386, + "grad_norm": 14.435892105102539, + "learning_rate": 7.738895327401891e-06, + "lm_loss": 5.6596, + "loss": 1.5477, + "step": 733, + "text_contrastive_loss": 0.8486, + "train_positive_log_prob": -84.4114, + "train_positive_token_accuracy": 0.0723, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.463, + "epoch": 1.6568848758465011, + "grad_norm": 15.091788291931152, + "learning_rate": 7.732822402628385e-06, + "lm_loss": 5.6116, + "loss": 1.4275, + "step": 734, + "text_contrastive_loss": 0.8067, + "train_positive_log_prob": -81.6985, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.5229, + "epoch": 1.6591422121896162, + "grad_norm": 14.150435447692871, + "learning_rate": 7.726743723518087e-06, + "lm_loss": 5.5404, + "loss": 1.4638, + "step": 735, + "text_contrastive_loss": 0.7737, + "train_positive_log_prob": -82.4356, + "train_positive_token_accuracy": 0.0827, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.5449, + "epoch": 1.6613995485327315, + "grad_norm": 14.770322799682617, + "learning_rate": 7.720659302870496e-06, + "lm_loss": 5.6358, + "loss": 1.5641, + "step": 736, + "text_contrastive_loss": 0.9112, + "train_positive_log_prob": -83.3461, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.5634, + "epoch": 1.6636568848758464, + "grad_norm": 16.423322677612305, + "learning_rate": 7.714569153497204e-06, + "lm_loss": 5.7256, + "loss": 1.5752, + "step": 737, + "text_contrastive_loss": 0.8784, + "train_positive_log_prob": -84.1377, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3726, + "epoch": 1.6659142212189617, + "grad_norm": 13.500272750854492, + "learning_rate": 7.708473288221868e-06, + "lm_loss": 5.7883, + "loss": 1.3863, + "step": 738, + "text_contrastive_loss": 0.8698, + "train_positive_log_prob": -85.7638, + "train_positive_token_accuracy": 0.0725, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.4047, + "epoch": 1.6681715575620768, + "grad_norm": 13.40776252746582, + "learning_rate": 7.702371719880178e-06, + "lm_loss": 5.7828, + "loss": 1.3603, + "step": 739, + "text_contrastive_loss": 0.7548, + "train_positive_log_prob": -84.9105, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.5778, + "epoch": 1.670428893905192, + "grad_norm": 19.666736602783203, + "learning_rate": 7.696264461319831e-06, + "lm_loss": 5.5872, + "loss": 1.6082, + "step": 740, + "text_contrastive_loss": 0.9434, + "train_positive_log_prob": -81.2583, + "train_positive_token_accuracy": 0.0723, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.4538, + "epoch": 1.672686230248307, + "grad_norm": 13.57969856262207, + "learning_rate": 7.69015152540051e-06, + "lm_loss": 5.6104, + "loss": 1.4696, + "step": 741, + "text_contrastive_loss": 0.9095, + "train_positive_log_prob": -82.6663, + "train_positive_token_accuracy": 0.0848, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4191, + "epoch": 1.674943566591422, + "grad_norm": 14.619769096374512, + "learning_rate": 7.684032924993845e-06, + "lm_loss": 5.6756, + "loss": 1.3818, + "step": 742, + "text_contrastive_loss": 0.7902, + "train_positive_log_prob": -83.3139, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3881, + "epoch": 1.6772009029345374, + "grad_norm": 13.681233406066895, + "learning_rate": 7.677908672983404e-06, + "lm_loss": 5.6276, + "loss": 1.3546, + "step": 743, + "text_contrastive_loss": 0.8074, + "train_positive_log_prob": -83.6696, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4186, + "epoch": 1.6794582392776523, + "grad_norm": 13.719589233398438, + "learning_rate": 7.671778782264647e-06, + "lm_loss": 5.6167, + "loss": 1.3565, + "step": 744, + "text_contrastive_loss": 0.7524, + "train_positive_log_prob": -81.7958, + "train_positive_token_accuracy": 0.0807, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.5202, + "epoch": 1.6817155756207676, + "grad_norm": 14.650406837463379, + "learning_rate": 7.66564326574491e-06, + "lm_loss": 5.7198, + "loss": 1.5202, + "step": 745, + "text_contrastive_loss": 0.8562, + "train_positive_log_prob": -84.1885, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4809, + "epoch": 1.6839729119638827, + "grad_norm": 14.786684036254883, + "learning_rate": 7.65950213634337e-06, + "lm_loss": 5.6805, + "loss": 1.5209, + "step": 746, + "text_contrastive_loss": 0.9438, + "train_positive_log_prob": -84.0291, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3547, + "epoch": 1.6862302483069977, + "grad_norm": 13.625265121459961, + "learning_rate": 7.653355406991034e-06, + "lm_loss": 5.5719, + "loss": 1.3461, + "step": 747, + "text_contrastive_loss": 0.8685, + "train_positive_log_prob": -82.9902, + "train_positive_token_accuracy": 0.0855, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.494, + "epoch": 1.6884875846501128, + "grad_norm": 16.157793045043945, + "learning_rate": 7.64720309063069e-06, + "lm_loss": 5.6602, + "loss": 1.4636, + "step": 748, + "text_contrastive_loss": 0.8071, + "train_positive_log_prob": -83.7534, + "train_positive_token_accuracy": 0.0847, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4407, + "epoch": 1.690744920993228, + "grad_norm": 12.296791076660156, + "learning_rate": 7.641045200216896e-06, + "lm_loss": 5.5736, + "loss": 1.3939, + "step": 749, + "text_contrastive_loss": 0.7915, + "train_positive_log_prob": -83.2573, + "train_positive_token_accuracy": 0.0696, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.4045, + "epoch": 1.6930022573363432, + "grad_norm": 14.133316040039062, + "learning_rate": 7.634881748715941e-06, + "lm_loss": 5.8323, + "loss": 1.3474, + "step": 750, + "text_contrastive_loss": 0.7193, + "train_positive_log_prob": -85.3922, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.5063, + "epoch": 1.695259593679458, + "grad_norm": 15.648238182067871, + "learning_rate": 7.628712749105831e-06, + "lm_loss": 5.6789, + "loss": 1.5364, + "step": 751, + "text_contrastive_loss": 0.9244, + "train_positive_log_prob": -83.2288, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.389, + "epoch": 1.6975169300225734, + "grad_norm": 13.445862770080566, + "learning_rate": 7.622538214376248e-06, + "lm_loss": 5.614, + "loss": 1.2759, + "step": 752, + "text_contrastive_loss": 0.6511, + "train_positive_log_prob": -83.7275, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.0285 + }, + { + "contrastive_loss": 0.4324, + "epoch": 1.6997742663656885, + "grad_norm": 13.527403831481934, + "learning_rate": 7.616358157528535e-06, + "lm_loss": 5.5906, + "loss": 1.3935, + "step": 753, + "text_contrastive_loss": 0.804, + "train_positive_log_prob": -81.6582, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.4645, + "epoch": 1.7020316027088036, + "grad_norm": 17.295063018798828, + "learning_rate": 7.610172591575656e-06, + "lm_loss": 5.7421, + "loss": 1.4054, + "step": 754, + "text_contrastive_loss": 0.7334, + "train_positive_log_prob": -84.4743, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.027 + }, + { + "contrastive_loss": 0.4783, + "epoch": 1.7042889390519187, + "grad_norm": 14.567025184631348, + "learning_rate": 7.60398152954218e-06, + "lm_loss": 5.6383, + "loss": 1.3756, + "step": 755, + "text_contrastive_loss": 0.6669, + "train_positive_log_prob": -83.1865, + "train_positive_token_accuracy": 0.0725, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.4442, + "epoch": 1.7065462753950338, + "grad_norm": 13.607656478881836, + "learning_rate": 7.597784984464248e-06, + "lm_loss": 5.6439, + "loss": 1.4153, + "step": 756, + "text_contrastive_loss": 0.8135, + "train_positive_log_prob": -84.5278, + "train_positive_token_accuracy": 0.0728, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.5499, + "epoch": 1.708803611738149, + "grad_norm": 15.9579439163208, + "learning_rate": 7.5915829693895435e-06, + "lm_loss": 5.6153, + "loss": 1.5317, + "step": 757, + "text_contrastive_loss": 0.8405, + "train_positive_log_prob": -81.9036, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.55, + "epoch": 1.711060948081264, + "grad_norm": 15.920516014099121, + "learning_rate": 7.585375497377271e-06, + "lm_loss": 5.6634, + "loss": 1.6203, + "step": 758, + "text_contrastive_loss": 1.0078, + "train_positive_log_prob": -83.6693, + "train_positive_token_accuracy": 0.0714, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.459, + "epoch": 1.7133182844243793, + "grad_norm": 14.685449600219727, + "learning_rate": 7.579162581498125e-06, + "lm_loss": 5.6672, + "loss": 1.4651, + "step": 759, + "text_contrastive_loss": 0.8788, + "train_positive_log_prob": -83.7887, + "train_positive_token_accuracy": 0.0734, + "train_positive_token_prob": 0.0279 + }, + { + "contrastive_loss": 0.547, + "epoch": 1.7155756207674944, + "grad_norm": 16.232683181762695, + "learning_rate": 7.572944234834261e-06, + "lm_loss": 5.5787, + "loss": 1.5259, + "step": 760, + "text_contrastive_loss": 0.8419, + "train_positive_log_prob": -82.2189, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.5429, + "epoch": 1.7178329571106095, + "grad_norm": 14.05725383758545, + "learning_rate": 7.5667204704792706e-06, + "lm_loss": 5.5723, + "loss": 1.4872, + "step": 761, + "text_contrastive_loss": 0.7741, + "train_positive_log_prob": -82.9307, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.5645, + "epoch": 1.7200902934537246, + "grad_norm": 15.913080215454102, + "learning_rate": 7.5604913015381535e-06, + "lm_loss": 5.612, + "loss": 1.618, + "step": 762, + "text_contrastive_loss": 0.9847, + "train_positive_log_prob": -82.8652, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.5004, + "epoch": 1.7223476297968396, + "grad_norm": 15.01777172088623, + "learning_rate": 7.554256741127291e-06, + "lm_loss": 5.6485, + "loss": 1.5048, + "step": 763, + "text_contrastive_loss": 0.879, + "train_positive_log_prob": -83.8218, + "train_positive_token_accuracy": 0.08, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.4249, + "epoch": 1.724604966139955, + "grad_norm": 13.942124366760254, + "learning_rate": 7.548016802374412e-06, + "lm_loss": 5.5199, + "loss": 1.4004, + "step": 764, + "text_contrastive_loss": 0.847, + "train_positive_log_prob": -79.3699, + "train_positive_token_accuracy": 0.0709, + "train_positive_token_prob": 0.0278 + }, + { + "contrastive_loss": 0.564, + "epoch": 1.7268623024830698, + "grad_norm": 15.48228645324707, + "learning_rate": 7.541771498418575e-06, + "lm_loss": 5.6861, + "loss": 1.586, + "step": 765, + "text_contrastive_loss": 0.9066, + "train_positive_log_prob": -83.9554, + "train_positive_token_accuracy": 0.0714, + "train_positive_token_prob": 0.0282 + }, + { + "contrastive_loss": 0.4756, + "epoch": 1.7291196388261851, + "grad_norm": 14.206538200378418, + "learning_rate": 7.535520842410136e-06, + "lm_loss": 5.5848, + "loss": 1.4707, + "step": 766, + "text_contrastive_loss": 0.8732, + "train_positive_log_prob": -81.5227, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4922, + "epoch": 1.7313769751693002, + "grad_norm": 15.891885757446289, + "learning_rate": 7.529264847510715e-06, + "lm_loss": 5.605, + "loss": 1.4179, + "step": 767, + "text_contrastive_loss": 0.7303, + "train_positive_log_prob": -83.5083, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.5116, + "epoch": 1.7336343115124153, + "grad_norm": 14.138011932373047, + "learning_rate": 7.52300352689318e-06, + "lm_loss": 5.6124, + "loss": 1.5538, + "step": 768, + "text_contrastive_loss": 0.9619, + "train_positive_log_prob": -84.2734, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.5051, + "epoch": 1.7358916478555306, + "grad_norm": 16.293575286865234, + "learning_rate": 7.516736893741611e-06, + "lm_loss": 5.6102, + "loss": 1.5055, + "step": 769, + "text_contrastive_loss": 0.8788, + "train_positive_log_prob": -84.5274, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.4136, + "epoch": 1.7381489841986455, + "grad_norm": 13.28024673461914, + "learning_rate": 7.510464961251271e-06, + "lm_loss": 5.66, + "loss": 1.2926, + "step": 770, + "text_contrastive_loss": 0.6262, + "train_positive_log_prob": -83.7298, + "train_positive_token_accuracy": 0.0776, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.4625, + "epoch": 1.7404063205417608, + "grad_norm": 13.785194396972656, + "learning_rate": 7.5041877426285856e-06, + "lm_loss": 5.5338, + "loss": 1.4152, + "step": 771, + "text_contrastive_loss": 0.7986, + "train_positive_log_prob": -82.5518, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.5912, + "epoch": 1.742663656884876, + "grad_norm": 16.37154769897461, + "learning_rate": 7.49790525109111e-06, + "lm_loss": 5.619, + "loss": 1.6494, + "step": 772, + "text_contrastive_loss": 0.9927, + "train_positive_log_prob": -84.0263, + "train_positive_token_accuracy": 0.0862, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3917, + "epoch": 1.744920993227991, + "grad_norm": 13.226980209350586, + "learning_rate": 7.491617499867502e-06, + "lm_loss": 5.5191, + "loss": 1.3084, + "step": 773, + "text_contrastive_loss": 0.7295, + "train_positive_log_prob": -82.3989, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.3858, + "epoch": 1.747178329571106, + "grad_norm": 13.030774116516113, + "learning_rate": 7.485324502197494e-06, + "lm_loss": 5.638, + "loss": 1.2957, + "step": 774, + "text_contrastive_loss": 0.6923, + "train_positive_log_prob": -84.3509, + "train_positive_token_accuracy": 0.0843, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3829, + "epoch": 1.7494356659142212, + "grad_norm": 13.835314750671387, + "learning_rate": 7.479026271331864e-06, + "lm_loss": 5.5914, + "loss": 1.3019, + "step": 775, + "text_contrastive_loss": 0.7197, + "train_positive_log_prob": -81.8853, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.5479, + "epoch": 1.7516930022573365, + "grad_norm": 15.478888511657715, + "learning_rate": 7.472722820532414e-06, + "lm_loss": 5.6064, + "loss": 1.5398, + "step": 776, + "text_contrastive_loss": 0.8624, + "train_positive_log_prob": -83.042, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.6671, + "epoch": 1.7539503386004514, + "grad_norm": 18.064414978027344, + "learning_rate": 7.466414163071934e-06, + "lm_loss": 5.5698, + "loss": 1.6579, + "step": 777, + "text_contrastive_loss": 0.8676, + "train_positive_log_prob": -82.7657, + "train_positive_token_accuracy": 0.0833, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.5462, + "epoch": 1.7562076749435667, + "grad_norm": 17.70136070251465, + "learning_rate": 7.460100312234176e-06, + "lm_loss": 5.614, + "loss": 1.5893, + "step": 778, + "text_contrastive_loss": 0.9635, + "train_positive_log_prob": -83.7691, + "train_positive_token_accuracy": 0.083, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4495, + "epoch": 1.7584650112866818, + "grad_norm": 14.237092971801758, + "learning_rate": 7.453781281313831e-06, + "lm_loss": 5.5583, + "loss": 1.3692, + "step": 779, + "text_contrastive_loss": 0.7277, + "train_positive_log_prob": -81.0764, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.5176, + "epoch": 1.7607223476297968, + "grad_norm": 14.912921905517578, + "learning_rate": 7.447457083616494e-06, + "lm_loss": 5.5592, + "loss": 1.4947, + "step": 780, + "text_contrastive_loss": 0.8424, + "train_positive_log_prob": -83.3878, + "train_positive_token_accuracy": 0.0862, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.4051, + "epoch": 1.762979683972912, + "grad_norm": 14.96948528289795, + "learning_rate": 7.441127732458642e-06, + "lm_loss": 5.5364, + "loss": 1.3749, + "step": 781, + "text_contrastive_loss": 0.8324, + "train_positive_log_prob": -82.7012, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.464, + "epoch": 1.765237020316027, + "grad_norm": 14.908394813537598, + "learning_rate": 7.434793241167601e-06, + "lm_loss": 5.5138, + "loss": 1.5142, + "step": 782, + "text_contrastive_loss": 0.9976, + "train_positive_log_prob": -81.243, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4923, + "epoch": 1.7674943566591423, + "grad_norm": 14.808036804199219, + "learning_rate": 7.428453623081522e-06, + "lm_loss": 5.5242, + "loss": 1.4992, + "step": 783, + "text_contrastive_loss": 0.9091, + "train_positive_log_prob": -79.4112, + "train_positive_token_accuracy": 0.0764, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.5279, + "epoch": 1.7697516930022572, + "grad_norm": 14.06231689453125, + "learning_rate": 7.422108891549349e-06, + "lm_loss": 5.5644, + "loss": 1.5323, + "step": 784, + "text_contrastive_loss": 0.8958, + "train_positive_log_prob": -83.0977, + "train_positive_token_accuracy": 0.0861, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.4355, + "epoch": 1.7720090293453725, + "grad_norm": 14.778243064880371, + "learning_rate": 7.415759059930799e-06, + "lm_loss": 5.5615, + "loss": 1.3451, + "step": 785, + "text_contrastive_loss": 0.707, + "train_positive_log_prob": -80.6814, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4089, + "epoch": 1.7742663656884876, + "grad_norm": 14.337993621826172, + "learning_rate": 7.409404141596319e-06, + "lm_loss": 5.552, + "loss": 1.3724, + "step": 786, + "text_contrastive_loss": 0.8166, + "train_positive_log_prob": -81.0286, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.423, + "epoch": 1.7765237020316027, + "grad_norm": 15.23420238494873, + "learning_rate": 7.403044149927074e-06, + "lm_loss": 5.486, + "loss": 1.3159, + "step": 787, + "text_contrastive_loss": 0.6885, + "train_positive_log_prob": -78.5884, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.4517, + "epoch": 1.7787810383747178, + "grad_norm": 14.249853134155273, + "learning_rate": 7.396679098314908e-06, + "lm_loss": 5.5283, + "loss": 1.3896, + "step": 788, + "text_contrastive_loss": 0.7702, + "train_positive_log_prob": -82.317, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.5075, + "epoch": 1.7810383747178329, + "grad_norm": 13.771214485168457, + "learning_rate": 7.390309000162321e-06, + "lm_loss": 5.6282, + "loss": 1.5053, + "step": 789, + "text_contrastive_loss": 0.87, + "train_positive_log_prob": -84.1161, + "train_positive_token_accuracy": 0.087, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.4329, + "epoch": 1.7832957110609482, + "grad_norm": 15.034110069274902, + "learning_rate": 7.383933868882438e-06, + "lm_loss": 5.6843, + "loss": 1.4138, + "step": 790, + "text_contrastive_loss": 0.825, + "train_positive_log_prob": -84.4644, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4016, + "epoch": 1.785553047404063, + "grad_norm": 12.467670440673828, + "learning_rate": 7.377553717898983e-06, + "lm_loss": 5.5377, + "loss": 1.3134, + "step": 791, + "text_contrastive_loss": 0.7161, + "train_positive_log_prob": -80.99, + "train_positive_token_accuracy": 0.0756, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.358, + "epoch": 1.7878103837471784, + "grad_norm": 13.146148681640625, + "learning_rate": 7.37116856064625e-06, + "lm_loss": 5.6856, + "loss": 1.3233, + "step": 792, + "text_contrastive_loss": 0.7935, + "train_positive_log_prob": -83.9336, + "train_positive_token_accuracy": 0.077, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4693, + "epoch": 1.7900677200902935, + "grad_norm": 13.863717079162598, + "learning_rate": 7.364778410569071e-06, + "lm_loss": 5.6756, + "loss": 1.4343, + "step": 793, + "text_contrastive_loss": 0.7948, + "train_positive_log_prob": -85.6033, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.4345, + "epoch": 1.7923250564334086, + "grad_norm": 15.09156322479248, + "learning_rate": 7.358383281122797e-06, + "lm_loss": 5.5968, + "loss": 1.3944, + "step": 794, + "text_contrastive_loss": 0.8006, + "train_positive_log_prob": -81.4811, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.5224, + "epoch": 1.7945823927765236, + "grad_norm": 12.545190811157227, + "learning_rate": 7.351983185773259e-06, + "lm_loss": 5.5227, + "loss": 1.5239, + "step": 795, + "text_contrastive_loss": 0.8986, + "train_positive_log_prob": -81.5267, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.5602, + "epoch": 1.7968397291196387, + "grad_norm": 17.57004737854004, + "learning_rate": 7.345578137996745e-06, + "lm_loss": 5.5923, + "loss": 1.5782, + "step": 796, + "text_contrastive_loss": 0.9175, + "train_positive_log_prob": -82.5345, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.5064, + "epoch": 1.799097065462754, + "grad_norm": 15.378622055053711, + "learning_rate": 7.339168151279974e-06, + "lm_loss": 5.6117, + "loss": 1.512, + "step": 797, + "text_contrastive_loss": 0.8888, + "train_positive_log_prob": -84.1719, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4536, + "epoch": 1.801354401805869, + "grad_norm": 14.970085144042969, + "learning_rate": 7.332753239120061e-06, + "lm_loss": 5.6563, + "loss": 1.4391, + "step": 798, + "text_contrastive_loss": 0.8398, + "train_positive_log_prob": -85.3759, + "train_positive_token_accuracy": 0.0807, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.4578, + "epoch": 1.8036117381489842, + "grad_norm": 14.22152328491211, + "learning_rate": 7.326333415024494e-06, + "lm_loss": 5.6367, + "loss": 1.4757, + "step": 799, + "text_contrastive_loss": 0.9084, + "train_positive_log_prob": -83.9155, + "train_positive_token_accuracy": 0.0845, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3515, + "epoch": 1.8058690744920993, + "grad_norm": 12.08447551727295, + "learning_rate": 7.319908692511103e-06, + "lm_loss": 5.6908, + "loss": 1.3384, + "step": 800, + "text_contrastive_loss": 0.8356, + "train_positive_log_prob": -87.2971, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4836, + "epoch": 1.8081264108352144, + "grad_norm": 14.808053970336914, + "learning_rate": 7.313479085108033e-06, + "lm_loss": 5.5513, + "loss": 1.5525, + "step": 801, + "text_contrastive_loss": 1.0275, + "train_positive_log_prob": -81.7091, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.5721, + "epoch": 1.8103837471783297, + "grad_norm": 17.425893783569336, + "learning_rate": 7.307044606353715e-06, + "lm_loss": 5.6184, + "loss": 1.6052, + "step": 802, + "text_contrastive_loss": 0.9426, + "train_positive_log_prob": -82.0182, + "train_positive_token_accuracy": 0.0717, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.5907, + "epoch": 1.8126410835214446, + "grad_norm": 15.897844314575195, + "learning_rate": 7.300605269796839e-06, + "lm_loss": 5.6316, + "loss": 1.6192, + "step": 803, + "text_contrastive_loss": 0.9306, + "train_positive_log_prob": -84.0544, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.479, + "epoch": 1.81489841986456, + "grad_norm": 15.763197898864746, + "learning_rate": 7.2941610889963164e-06, + "lm_loss": 5.6244, + "loss": 1.532, + "step": 804, + "text_contrastive_loss": 0.9811, + "train_positive_log_prob": -84.0574, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.5027, + "epoch": 1.8171557562076748, + "grad_norm": 15.428396224975586, + "learning_rate": 7.2877120775212685e-06, + "lm_loss": 5.5224, + "loss": 1.4741, + "step": 805, + "text_contrastive_loss": 0.8383, + "train_positive_log_prob": -80.657, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4664, + "epoch": 1.81941309255079, + "grad_norm": 13.003288269042969, + "learning_rate": 7.2812582489509844e-06, + "lm_loss": 5.5655, + "loss": 1.4355, + "step": 806, + "text_contrastive_loss": 0.8253, + "train_positive_log_prob": -81.457, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.4669, + "epoch": 1.8216704288939052, + "grad_norm": 15.819608688354492, + "learning_rate": 7.2747996168748915e-06, + "lm_loss": 5.7389, + "loss": 1.4654, + "step": 807, + "text_contrastive_loss": 0.8491, + "train_positive_log_prob": -86.9135, + "train_positive_token_accuracy": 0.0833, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3771, + "epoch": 1.8239277652370203, + "grad_norm": 13.820873260498047, + "learning_rate": 7.26833619489254e-06, + "lm_loss": 5.5685, + "loss": 1.2797, + "step": 808, + "text_contrastive_loss": 0.6915, + "train_positive_log_prob": -82.895, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.5023, + "epoch": 1.8261851015801356, + "grad_norm": 17.437044143676758, + "learning_rate": 7.261867996613559e-06, + "lm_loss": 5.62, + "loss": 1.5989, + "step": 809, + "text_contrastive_loss": 1.0692, + "train_positive_log_prob": -84.8249, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3518, + "epoch": 1.8284424379232505, + "grad_norm": 12.64499568939209, + "learning_rate": 7.255395035657639e-06, + "lm_loss": 5.6586, + "loss": 1.3552, + "step": 810, + "text_contrastive_loss": 0.8751, + "train_positive_log_prob": -82.8223, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.3781, + "epoch": 1.8306997742663658, + "grad_norm": 13.45781421661377, + "learning_rate": 7.2489173256544975e-06, + "lm_loss": 5.5488, + "loss": 1.3094, + "step": 811, + "text_contrastive_loss": 0.7528, + "train_positive_log_prob": -83.4927, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4842, + "epoch": 1.8329571106094809, + "grad_norm": 13.990462303161621, + "learning_rate": 7.242434880243851e-06, + "lm_loss": 5.5488, + "loss": 1.4776, + "step": 812, + "text_contrastive_loss": 0.8769, + "train_positive_log_prob": -83.9268, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.419, + "epoch": 1.835214446952596, + "grad_norm": 14.105340003967285, + "learning_rate": 7.235947713075389e-06, + "lm_loss": 5.6479, + "loss": 1.3555, + "step": 813, + "text_contrastive_loss": 0.7433, + "train_positive_log_prob": -83.3754, + "train_positive_token_accuracy": 0.0714, + "train_positive_token_prob": 0.0273 + }, + { + "contrastive_loss": 0.4682, + "epoch": 1.837471783295711, + "grad_norm": 14.975137710571289, + "learning_rate": 7.229455837808741e-06, + "lm_loss": 5.7287, + "loss": 1.4711, + "step": 814, + "text_contrastive_loss": 0.8601, + "train_positive_log_prob": -85.1682, + "train_positive_token_accuracy": 0.0728, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.5217, + "epoch": 1.8397291196388261, + "grad_norm": 16.649938583374023, + "learning_rate": 7.222959268113452e-06, + "lm_loss": 5.5503, + "loss": 1.5125, + "step": 815, + "text_contrastive_loss": 0.8715, + "train_positive_log_prob": -81.9515, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4763, + "epoch": 1.8419864559819414, + "grad_norm": 14.393045425415039, + "learning_rate": 7.216458017668951e-06, + "lm_loss": 5.6168, + "loss": 1.4171, + "step": 816, + "text_contrastive_loss": 0.7581, + "train_positive_log_prob": -83.7497, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4702, + "epoch": 1.8442437923250563, + "grad_norm": 14.543290138244629, + "learning_rate": 7.2099521001645225e-06, + "lm_loss": 5.7231, + "loss": 1.4471, + "step": 817, + "text_contrastive_loss": 0.8091, + "train_positive_log_prob": -85.1064, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.5249, + "epoch": 1.8465011286681716, + "grad_norm": 15.051460266113281, + "learning_rate": 7.20344152929928e-06, + "lm_loss": 5.5542, + "loss": 1.523, + "step": 818, + "text_contrastive_loss": 0.8853, + "train_positive_log_prob": -82.3788, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3858, + "epoch": 1.8487584650112867, + "grad_norm": 13.299602508544922, + "learning_rate": 7.19692631878213e-06, + "lm_loss": 5.7493, + "loss": 1.3207, + "step": 819, + "text_contrastive_loss": 0.7199, + "train_positive_log_prob": -88.4855, + "train_positive_token_accuracy": 0.0728, + "train_positive_token_prob": 0.0277 + }, + { + "contrastive_loss": 0.4634, + "epoch": 1.8510158013544018, + "grad_norm": 14.328387260437012, + "learning_rate": 7.190406482331757e-06, + "lm_loss": 5.5276, + "loss": 1.42, + "step": 820, + "text_contrastive_loss": 0.8075, + "train_positive_log_prob": -80.3671, + "train_positive_token_accuracy": 0.0699, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.5607, + "epoch": 1.853273137697517, + "grad_norm": 15.57853889465332, + "learning_rate": 7.183882033676579e-06, + "lm_loss": 5.53, + "loss": 1.5186, + "step": 821, + "text_contrastive_loss": 0.8099, + "train_positive_log_prob": -82.9777, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.5059, + "epoch": 1.855530474040632, + "grad_norm": 16.22122573852539, + "learning_rate": 7.177352986554729e-06, + "lm_loss": 5.5799, + "loss": 1.5496, + "step": 822, + "text_contrastive_loss": 0.9714, + "train_positive_log_prob": -84.1135, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.4553, + "epoch": 1.8577878103837473, + "grad_norm": 16.399616241455078, + "learning_rate": 7.1708193547140205e-06, + "lm_loss": 5.729, + "loss": 1.3817, + "step": 823, + "text_contrastive_loss": 0.707, + "train_positive_log_prob": -85.2438, + "train_positive_token_accuracy": 0.0764, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.5053, + "epoch": 1.8600451467268622, + "grad_norm": 16.39391326904297, + "learning_rate": 7.164281151911923e-06, + "lm_loss": 5.602, + "loss": 1.4259, + "step": 824, + "text_contrastive_loss": 0.7207, + "train_positive_log_prob": -84.753, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.5906, + "epoch": 1.8623024830699775, + "grad_norm": 15.44497013092041, + "learning_rate": 7.157738391915531e-06, + "lm_loss": 5.5914, + "loss": 1.6382, + "step": 825, + "text_contrastive_loss": 0.9768, + "train_positive_log_prob": -82.2142, + "train_positive_token_accuracy": 0.0863, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3885, + "epoch": 1.8645598194130926, + "grad_norm": 13.059473037719727, + "learning_rate": 7.151191088501531e-06, + "lm_loss": 5.6098, + "loss": 1.3482, + "step": 826, + "text_contrastive_loss": 0.7973, + "train_positive_log_prob": -84.2569, + "train_positive_token_accuracy": 0.0726, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.5635, + "epoch": 1.8668171557562077, + "grad_norm": 15.385315895080566, + "learning_rate": 7.14463925545618e-06, + "lm_loss": 5.677, + "loss": 1.564, + "step": 827, + "text_contrastive_loss": 0.8657, + "train_positive_log_prob": -84.6658, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.4413, + "epoch": 1.8690744920993227, + "grad_norm": 14.82215404510498, + "learning_rate": 7.138082906575271e-06, + "lm_loss": 5.562, + "loss": 1.4071, + "step": 828, + "text_contrastive_loss": 0.8192, + "train_positive_log_prob": -83.908, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.4351, + "epoch": 1.8713318284424378, + "grad_norm": 13.686999320983887, + "learning_rate": 7.131522055664109e-06, + "lm_loss": 5.6246, + "loss": 1.4405, + "step": 829, + "text_contrastive_loss": 0.8859, + "train_positive_log_prob": -82.5476, + "train_positive_token_accuracy": 0.0756, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.3927, + "epoch": 1.8735891647855532, + "grad_norm": 13.37989616394043, + "learning_rate": 7.124956716537471e-06, + "lm_loss": 5.5763, + "loss": 1.3506, + "step": 830, + "text_contrastive_loss": 0.8005, + "train_positive_log_prob": -83.5371, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4601, + "epoch": 1.875846501128668, + "grad_norm": 13.956619262695312, + "learning_rate": 7.118386903019594e-06, + "lm_loss": 5.6951, + "loss": 1.4153, + "step": 831, + "text_contrastive_loss": 0.7714, + "train_positive_log_prob": -81.9084, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.4526, + "epoch": 1.8781038374717833, + "grad_norm": 13.204083442687988, + "learning_rate": 7.111812628944132e-06, + "lm_loss": 5.6366, + "loss": 1.4553, + "step": 832, + "text_contrastive_loss": 0.878, + "train_positive_log_prob": -83.1604, + "train_positive_token_accuracy": 0.0702, + "train_positive_token_prob": 0.0281 + }, + { + "contrastive_loss": 0.5398, + "epoch": 1.8803611738148984, + "grad_norm": 15.896525382995605, + "learning_rate": 7.105233908154128e-06, + "lm_loss": 5.5994, + "loss": 1.5083, + "step": 833, + "text_contrastive_loss": 0.817, + "train_positive_log_prob": -83.0266, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.4857, + "epoch": 1.8826185101580135, + "grad_norm": 14.621209144592285, + "learning_rate": 7.098650754501994e-06, + "lm_loss": 5.6352, + "loss": 1.4897, + "step": 834, + "text_contrastive_loss": 0.8809, + "train_positive_log_prob": -83.8209, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.4413, + "epoch": 1.8848758465011288, + "grad_norm": 15.942708969116211, + "learning_rate": 7.0920631818494745e-06, + "lm_loss": 5.6217, + "loss": 1.4979, + "step": 835, + "text_contrastive_loss": 0.9888, + "train_positive_log_prob": -83.1856, + "train_positive_token_accuracy": 0.085, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4802, + "epoch": 1.8871331828442437, + "grad_norm": 16.156293869018555, + "learning_rate": 7.085471204067616e-06, + "lm_loss": 5.5167, + "loss": 1.4624, + "step": 836, + "text_contrastive_loss": 0.8611, + "train_positive_log_prob": -82.4925, + "train_positive_token_accuracy": 0.0807, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3945, + "epoch": 1.889390519187359, + "grad_norm": 13.92788314819336, + "learning_rate": 7.078874835036742e-06, + "lm_loss": 5.6192, + "loss": 1.3384, + "step": 837, + "text_contrastive_loss": 0.7639, + "train_positive_log_prob": -84.0938, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.5487, + "epoch": 1.8916478555304739, + "grad_norm": 17.215038299560547, + "learning_rate": 7.072274088646425e-06, + "lm_loss": 5.666, + "loss": 1.494, + "step": 838, + "text_contrastive_loss": 0.7576, + "train_positive_log_prob": -83.9518, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.4725, + "epoch": 1.8939051918735892, + "grad_norm": 14.875057220458984, + "learning_rate": 7.065668978795449e-06, + "lm_loss": 5.6206, + "loss": 1.4994, + "step": 839, + "text_contrastive_loss": 0.9295, + "train_positive_log_prob": -82.9405, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.3705, + "epoch": 1.8961625282167043, + "grad_norm": 13.532670974731445, + "learning_rate": 7.059059519391794e-06, + "lm_loss": 5.561, + "loss": 1.2806, + "step": 840, + "text_contrastive_loss": 0.708, + "train_positive_log_prob": -84.3467, + "train_positive_token_accuracy": 0.0845, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.4629, + "epoch": 1.8984198645598194, + "grad_norm": 15.141593933105469, + "learning_rate": 7.05244572435259e-06, + "lm_loss": 5.5582, + "loss": 1.4232, + "step": 841, + "text_contrastive_loss": 0.809, + "train_positive_log_prob": -81.7893, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4663, + "epoch": 1.9006772009029347, + "grad_norm": 16.773216247558594, + "learning_rate": 7.045827607604103e-06, + "lm_loss": 5.5616, + "loss": 1.38, + "step": 842, + "text_contrastive_loss": 0.715, + "train_positive_log_prob": -81.9258, + "train_positive_token_accuracy": 0.0867, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.4645, + "epoch": 1.9029345372460496, + "grad_norm": 14.560948371887207, + "learning_rate": 7.039205183081694e-06, + "lm_loss": 5.5767, + "loss": 1.418, + "step": 843, + "text_contrastive_loss": 0.7916, + "train_positive_log_prob": -83.0209, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4268, + "epoch": 1.9051918735891649, + "grad_norm": 14.007619857788086, + "learning_rate": 7.0325784647298e-06, + "lm_loss": 5.7192, + "loss": 1.4293, + "step": 844, + "text_contrastive_loss": 0.8611, + "train_positive_log_prob": -85.6541, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.5428, + "epoch": 1.90744920993228, + "grad_norm": 14.533950805664062, + "learning_rate": 7.0259474665018915e-06, + "lm_loss": 5.5752, + "loss": 1.4701, + "step": 845, + "text_contrastive_loss": 0.7396, + "train_positive_log_prob": -84.3883, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.5016, + "epoch": 1.909706546275395, + "grad_norm": 13.754950523376465, + "learning_rate": 7.019312202360457e-06, + "lm_loss": 5.399, + "loss": 1.4198, + "step": 846, + "text_contrastive_loss": 0.7565, + "train_positive_log_prob": -79.1924, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.46, + "epoch": 1.9119638826185101, + "grad_norm": 16.947940826416016, + "learning_rate": 7.012672686276969e-06, + "lm_loss": 5.5083, + "loss": 1.5104, + "step": 847, + "text_contrastive_loss": 0.9991, + "train_positive_log_prob": -80.2429, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4708, + "epoch": 1.9142212189616252, + "grad_norm": 14.792731285095215, + "learning_rate": 7.006028932231847e-06, + "lm_loss": 5.5341, + "loss": 1.4455, + "step": 848, + "text_contrastive_loss": 0.8425, + "train_positive_log_prob": -81.9765, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.5239, + "epoch": 1.9164785553047405, + "grad_norm": 15.621329307556152, + "learning_rate": 6.999380954214438e-06, + "lm_loss": 5.4574, + "loss": 1.4538, + "step": 849, + "text_contrastive_loss": 0.7685, + "train_positive_log_prob": -78.5588, + "train_positive_token_accuracy": 0.0879, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.4317, + "epoch": 1.9187358916478554, + "grad_norm": 15.163226127624512, + "learning_rate": 6.992728766222982e-06, + "lm_loss": 5.531, + "loss": 1.365, + "step": 850, + "text_contrastive_loss": 0.7604, + "train_positive_log_prob": -82.474, + "train_positive_token_accuracy": 0.087, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.4402, + "epoch": 1.9209932279909707, + "grad_norm": 15.181447982788086, + "learning_rate": 6.9860723822645825e-06, + "lm_loss": 5.6103, + "loss": 1.3989, + "step": 851, + "text_contrastive_loss": 0.7952, + "train_positive_log_prob": -83.0405, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.4504, + "epoch": 1.9232505643340858, + "grad_norm": 13.12296199798584, + "learning_rate": 6.979411816355183e-06, + "lm_loss": 5.5117, + "loss": 1.459, + "step": 852, + "text_contrastive_loss": 0.9149, + "train_positive_log_prob": -81.5778, + "train_positive_token_accuracy": 0.0845, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.583, + "epoch": 1.925507900677201, + "grad_norm": 16.99874496459961, + "learning_rate": 6.972747082519526e-06, + "lm_loss": 5.6497, + "loss": 1.6512, + "step": 853, + "text_contrastive_loss": 1.0065, + "train_positive_log_prob": -85.2778, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.4726, + "epoch": 1.927765237020316, + "grad_norm": 13.545490264892578, + "learning_rate": 6.966078194791133e-06, + "lm_loss": 5.5068, + "loss": 1.3707, + "step": 854, + "text_contrastive_loss": 0.6948, + "train_positive_log_prob": -80.9018, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.5064, + "epoch": 1.930022573363431, + "grad_norm": 15.094696998596191, + "learning_rate": 6.959405167212278e-06, + "lm_loss": 5.6578, + "loss": 1.5452, + "step": 855, + "text_contrastive_loss": 0.9461, + "train_positive_log_prob": -81.7496, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4564, + "epoch": 1.9322799097065464, + "grad_norm": 14.36962890625, + "learning_rate": 6.952728013833941e-06, + "lm_loss": 5.5843, + "loss": 1.468, + "step": 856, + "text_contrastive_loss": 0.9064, + "train_positive_log_prob": -84.0509, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.5573, + "epoch": 1.9345372460496613, + "grad_norm": 15.610633850097656, + "learning_rate": 6.946046748715796e-06, + "lm_loss": 5.5431, + "loss": 1.5477, + "step": 857, + "text_contrastive_loss": 0.8722, + "train_positive_log_prob": -81.0661, + "train_positive_token_accuracy": 0.0847, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4875, + "epoch": 1.9367945823927766, + "grad_norm": 15.493638038635254, + "learning_rate": 6.9393613859261755e-06, + "lm_loss": 5.6309, + "loss": 1.4219, + "step": 858, + "text_contrastive_loss": 0.7426, + "train_positive_log_prob": -82.9735, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.4801, + "epoch": 1.9390519187358917, + "grad_norm": 14.840836524963379, + "learning_rate": 6.932671939542037e-06, + "lm_loss": 5.5117, + "loss": 1.4978, + "step": 859, + "text_contrastive_loss": 0.9331, + "train_positive_log_prob": -83.0663, + "train_positive_token_accuracy": 0.0824, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.46, + "epoch": 1.9413092550790068, + "grad_norm": 14.81650161743164, + "learning_rate": 6.925978423648941e-06, + "lm_loss": 5.482, + "loss": 1.4621, + "step": 860, + "text_contrastive_loss": 0.9079, + "train_positive_log_prob": -80.0698, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.5035, + "epoch": 1.9435665914221218, + "grad_norm": 14.228053092956543, + "learning_rate": 6.919280852341011e-06, + "lm_loss": 5.4694, + "loss": 1.4845, + "step": 861, + "text_contrastive_loss": 0.8682, + "train_positive_log_prob": -80.179, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4094, + "epoch": 1.945823927765237, + "grad_norm": 13.145322799682617, + "learning_rate": 6.912579239720913e-06, + "lm_loss": 5.5978, + "loss": 1.324, + "step": 862, + "text_contrastive_loss": 0.7096, + "train_positive_log_prob": -81.0623, + "train_positive_token_accuracy": 0.0836, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4709, + "epoch": 1.9480812641083523, + "grad_norm": 15.593461036682129, + "learning_rate": 6.9058735998998224e-06, + "lm_loss": 5.5679, + "loss": 1.4226, + "step": 863, + "text_contrastive_loss": 0.7897, + "train_positive_log_prob": -82.5362, + "train_positive_token_accuracy": 0.0856, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.5678, + "epoch": 1.9503386004514671, + "grad_norm": 14.172186851501465, + "learning_rate": 6.899163946997396e-06, + "lm_loss": 5.5179, + "loss": 1.5015, + "step": 864, + "text_contrastive_loss": 0.7637, + "train_positive_log_prob": -81.4523, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.5079, + "epoch": 1.9525959367945824, + "grad_norm": 20.990461349487305, + "learning_rate": 6.892450295141737e-06, + "lm_loss": 5.5316, + "loss": 1.4109, + "step": 865, + "text_contrastive_loss": 0.6997, + "train_positive_log_prob": -82.1323, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4982, + "epoch": 1.9548532731376975, + "grad_norm": 14.648655891418457, + "learning_rate": 6.885732658469374e-06, + "lm_loss": 5.6773, + "loss": 1.5554, + "step": 866, + "text_contrastive_loss": 0.9791, + "train_positive_log_prob": -84.3598, + "train_positive_token_accuracy": 0.072, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.432, + "epoch": 1.9571106094808126, + "grad_norm": 15.13239574432373, + "learning_rate": 6.8790110511252195e-06, + "lm_loss": 5.6213, + "loss": 1.3945, + "step": 867, + "text_contrastive_loss": 0.8006, + "train_positive_log_prob": -84.0088, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.577, + "epoch": 1.959367945823928, + "grad_norm": 16.206260681152344, + "learning_rate": 6.872285487262555e-06, + "lm_loss": 5.5932, + "loss": 1.6151, + "step": 868, + "text_contrastive_loss": 0.9576, + "train_positive_log_prob": -82.8912, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.5226, + "epoch": 1.9616252821670428, + "grad_norm": 15.38842487335205, + "learning_rate": 6.865555981042983e-06, + "lm_loss": 5.6184, + "loss": 1.5009, + "step": 869, + "text_contrastive_loss": 0.8328, + "train_positive_log_prob": -82.719, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.431, + "epoch": 1.963882618510158, + "grad_norm": 15.43478775024414, + "learning_rate": 6.858822546636417e-06, + "lm_loss": 5.6062, + "loss": 1.4393, + "step": 870, + "text_contrastive_loss": 0.8954, + "train_positive_log_prob": -84.3117, + "train_positive_token_accuracy": 0.0777, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.4174, + "epoch": 1.966139954853273, + "grad_norm": 14.091517448425293, + "learning_rate": 6.852085198221035e-06, + "lm_loss": 5.5341, + "loss": 1.4338, + "step": 871, + "text_contrastive_loss": 0.926, + "train_positive_log_prob": -81.2831, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.5209, + "epoch": 1.9683972911963883, + "grad_norm": 16.09682273864746, + "learning_rate": 6.845343949983258e-06, + "lm_loss": 5.5722, + "loss": 1.5563, + "step": 872, + "text_contrastive_loss": 0.9563, + "train_positive_log_prob": -81.1881, + "train_positive_token_accuracy": 0.0735, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.4173, + "epoch": 1.9706546275395034, + "grad_norm": 14.025125503540039, + "learning_rate": 6.838598816117725e-06, + "lm_loss": 5.6286, + "loss": 1.4377, + "step": 873, + "text_contrastive_loss": 0.9151, + "train_positive_log_prob": -85.091, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.4504, + "epoch": 1.9729119638826185, + "grad_norm": 14.221590042114258, + "learning_rate": 6.831849810827247e-06, + "lm_loss": 5.4477, + "loss": 1.4057, + "step": 874, + "text_contrastive_loss": 0.8209, + "train_positive_log_prob": -80.7588, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.5954, + "epoch": 1.9751693002257338, + "grad_norm": 17.675140380859375, + "learning_rate": 6.825096948322791e-06, + "lm_loss": 5.4461, + "loss": 1.6072, + "step": 875, + "text_contrastive_loss": 0.9343, + "train_positive_log_prob": -81.7373, + "train_positive_token_accuracy": 0.0716, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4878, + "epoch": 1.9774266365688487, + "grad_norm": 13.812328338623047, + "learning_rate": 6.818340242823449e-06, + "lm_loss": 5.5571, + "loss": 1.4228, + "step": 876, + "text_contrastive_loss": 0.7586, + "train_positive_log_prob": -82.4894, + "train_positive_token_accuracy": 0.07, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.4491, + "epoch": 1.979683972911964, + "grad_norm": 12.950183868408203, + "learning_rate": 6.8115797085564e-06, + "lm_loss": 5.5688, + "loss": 1.345, + "step": 877, + "text_contrastive_loss": 0.678, + "train_positive_log_prob": -82.0546, + "train_positive_token_accuracy": 0.0843, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.4886, + "epoch": 1.981941309255079, + "grad_norm": 16.71753692626953, + "learning_rate": 6.804815359756887e-06, + "lm_loss": 5.6296, + "loss": 1.4671, + "step": 878, + "text_contrastive_loss": 0.8311, + "train_positive_log_prob": -85.982, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.3974, + "epoch": 1.9841986455981941, + "grad_norm": 14.750004768371582, + "learning_rate": 6.798047210668185e-06, + "lm_loss": 5.6267, + "loss": 1.3373, + "step": 879, + "text_contrastive_loss": 0.7546, + "train_positive_log_prob": -84.2957, + "train_positive_token_accuracy": 0.0741, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.4041, + "epoch": 1.9864559819413092, + "grad_norm": 14.013243675231934, + "learning_rate": 6.7912752755415716e-06, + "lm_loss": 5.6537, + "loss": 1.4475, + "step": 880, + "text_contrastive_loss": 0.9561, + "train_positive_log_prob": -83.5193, + "train_positive_token_accuracy": 0.0825, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.4919, + "epoch": 1.9887133182844243, + "grad_norm": 14.93449592590332, + "learning_rate": 6.7844995686362955e-06, + "lm_loss": 5.525, + "loss": 1.5078, + "step": 881, + "text_contrastive_loss": 0.9267, + "train_positive_log_prob": -82.0335, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3913, + "epoch": 1.9909706546275396, + "grad_norm": 12.556751251220703, + "learning_rate": 6.777720104219548e-06, + "lm_loss": 5.4374, + "loss": 1.3901, + "step": 882, + "text_contrastive_loss": 0.9103, + "train_positive_log_prob": -81.9656, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.5565, + "epoch": 1.9932279909706545, + "grad_norm": 15.738174438476562, + "learning_rate": 6.770936896566434e-06, + "lm_loss": 5.5313, + "loss": 1.5212, + "step": 883, + "text_contrastive_loss": 0.8232, + "train_positive_log_prob": -79.5299, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3763, + "epoch": 1.9954853273137698, + "grad_norm": 13.78351879119873, + "learning_rate": 6.7641499599599355e-06, + "lm_loss": 5.5568, + "loss": 1.296, + "step": 884, + "text_contrastive_loss": 0.7281, + "train_positive_log_prob": -82.6574, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.5562, + "epoch": 1.997742663656885, + "grad_norm": 14.878168106079102, + "learning_rate": 6.757359308690889e-06, + "lm_loss": 5.5898, + "loss": 1.5898, + "step": 885, + "text_contrastive_loss": 0.9492, + "train_positive_log_prob": -81.2728, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.2543, + "epoch": 2.0, + "grad_norm": 14.165923118591309, + "learning_rate": 6.750564957057958e-06, + "lm_loss": 5.4659, + "loss": 1.0487, + "step": 886, + "text_contrastive_loss": 0.4956, + "train_positive_log_prob": -81.4205, + "train_positive_token_accuracy": 0.084, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.4014, + "epoch": 2.0022573363431153, + "grad_norm": 12.469857215881348, + "learning_rate": 6.743766919367588e-06, + "lm_loss": 5.64, + "loss": 1.3815, + "step": 887, + "text_contrastive_loss": 0.8323, + "train_positive_log_prob": -85.5017, + "train_positive_token_accuracy": 0.085, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.434, + "epoch": 2.00451467268623, + "grad_norm": 13.196414947509766, + "learning_rate": 6.736965209933992e-06, + "lm_loss": 5.4902, + "loss": 1.3573, + "step": 888, + "text_contrastive_loss": 0.7486, + "train_positive_log_prob": -80.9577, + "train_positive_token_accuracy": 0.0728, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.4232, + "epoch": 2.0067720090293455, + "grad_norm": 13.297743797302246, + "learning_rate": 6.730159843079113e-06, + "lm_loss": 5.4933, + "loss": 1.4115, + "step": 889, + "text_contrastive_loss": 0.8781, + "train_positive_log_prob": -82.5661, + "train_positive_token_accuracy": 0.0829, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.5013, + "epoch": 2.0090293453724604, + "grad_norm": 15.293583869934082, + "learning_rate": 6.723350833132596e-06, + "lm_loss": 5.5136, + "loss": 1.485, + "step": 890, + "text_contrastive_loss": 0.8645, + "train_positive_log_prob": -80.3577, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3664, + "epoch": 2.0112866817155757, + "grad_norm": 12.71983814239502, + "learning_rate": 6.716538194431754e-06, + "lm_loss": 5.5791, + "loss": 1.2353, + "step": 891, + "text_contrastive_loss": 0.6219, + "train_positive_log_prob": -84.1868, + "train_positive_token_accuracy": 0.0875, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.4512, + "epoch": 2.0135440180586905, + "grad_norm": 13.784418106079102, + "learning_rate": 6.7097219413215474e-06, + "lm_loss": 5.5874, + "loss": 1.4766, + "step": 892, + "text_contrastive_loss": 0.9333, + "train_positive_log_prob": -83.6367, + "train_positive_token_accuracy": 0.0741, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.464, + "epoch": 2.015801354401806, + "grad_norm": 13.814016342163086, + "learning_rate": 6.702902088154539e-06, + "lm_loss": 5.6971, + "loss": 1.473, + "step": 893, + "text_contrastive_loss": 0.8786, + "train_positive_log_prob": -83.9005, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.4201, + "epoch": 2.018058690744921, + "grad_norm": 14.438596725463867, + "learning_rate": 6.696078649290878e-06, + "lm_loss": 5.5219, + "loss": 1.3344, + "step": 894, + "text_contrastive_loss": 0.7243, + "train_positive_log_prob": -81.3829, + "train_positive_token_accuracy": 0.0777, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3738, + "epoch": 2.020316027088036, + "grad_norm": 14.425973892211914, + "learning_rate": 6.689251639098261e-06, + "lm_loss": 5.5112, + "loss": 1.2928, + "step": 895, + "text_contrastive_loss": 0.7358, + "train_positive_log_prob": -80.539, + "train_positive_token_accuracy": 0.0855, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.5213, + "epoch": 2.0225733634311513, + "grad_norm": 15.28638744354248, + "learning_rate": 6.682421071951907e-06, + "lm_loss": 5.661, + "loss": 1.5267, + "step": 896, + "text_contrastive_loss": 0.8787, + "train_positive_log_prob": -85.0275, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3547, + "epoch": 2.024830699774266, + "grad_norm": 10.85566520690918, + "learning_rate": 6.67558696223452e-06, + "lm_loss": 5.557, + "loss": 1.2948, + "step": 897, + "text_contrastive_loss": 0.7687, + "train_positive_log_prob": -81.9218, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3896, + "epoch": 2.0270880361173815, + "grad_norm": 13.134331703186035, + "learning_rate": 6.668749324336268e-06, + "lm_loss": 5.5849, + "loss": 1.3087, + "step": 898, + "text_contrastive_loss": 0.7213, + "train_positive_log_prob": -84.1701, + "train_positive_token_accuracy": 0.0765, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.3909, + "epoch": 2.0293453724604964, + "grad_norm": 15.461824417114258, + "learning_rate": 6.661908172654746e-06, + "lm_loss": 5.5168, + "loss": 1.372, + "step": 899, + "text_contrastive_loss": 0.859, + "train_positive_log_prob": -82.0224, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4188, + "epoch": 2.0316027088036117, + "grad_norm": 13.036766052246094, + "learning_rate": 6.65506352159495e-06, + "lm_loss": 5.4942, + "loss": 1.3947, + "step": 900, + "text_contrastive_loss": 0.8529, + "train_positive_log_prob": -81.7681, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4034, + "epoch": 2.033860045146727, + "grad_norm": 12.675641059875488, + "learning_rate": 6.6482153855692395e-06, + "lm_loss": 5.5938, + "loss": 1.3212, + "step": 901, + "text_contrastive_loss": 0.7167, + "train_positive_log_prob": -82.6862, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4577, + "epoch": 2.036117381489842, + "grad_norm": 13.375115394592285, + "learning_rate": 6.64136377899732e-06, + "lm_loss": 5.4563, + "loss": 1.3828, + "step": 902, + "text_contrastive_loss": 0.7588, + "train_positive_log_prob": -80.3569, + "train_positive_token_accuracy": 0.0777, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.5385, + "epoch": 2.038374717832957, + "grad_norm": 15.63585376739502, + "learning_rate": 6.6345087163061935e-06, + "lm_loss": 5.5635, + "loss": 1.597, + "step": 903, + "text_contrastive_loss": 1.0041, + "train_positive_log_prob": -81.5388, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4168, + "epoch": 2.040632054176072, + "grad_norm": 13.021448135375977, + "learning_rate": 6.627650211930152e-06, + "lm_loss": 5.6115, + "loss": 1.4543, + "step": 904, + "text_contrastive_loss": 0.9526, + "train_positive_log_prob": -83.1677, + "train_positive_token_accuracy": 0.0675, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.4712, + "epoch": 2.0428893905191874, + "grad_norm": 13.909318923950195, + "learning_rate": 6.620788280310722e-06, + "lm_loss": 5.5277, + "loss": 1.4534, + "step": 905, + "text_contrastive_loss": 0.8587, + "train_positive_log_prob": -82.8317, + "train_positive_token_accuracy": 0.0842, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3354, + "epoch": 2.0451467268623027, + "grad_norm": 12.445659637451172, + "learning_rate": 6.613922935896659e-06, + "lm_loss": 5.5755, + "loss": 1.246, + "step": 906, + "text_contrastive_loss": 0.7061, + "train_positive_log_prob": -82.6127, + "train_positive_token_accuracy": 0.0836, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4904, + "epoch": 2.0474040632054176, + "grad_norm": 17.49274444580078, + "learning_rate": 6.607054193143894e-06, + "lm_loss": 5.5608, + "loss": 1.5067, + "step": 907, + "text_contrastive_loss": 0.9204, + "train_positive_log_prob": -82.153, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3629, + "epoch": 2.049661399548533, + "grad_norm": 13.173979759216309, + "learning_rate": 6.600182066515519e-06, + "lm_loss": 5.5864, + "loss": 1.2993, + "step": 908, + "text_contrastive_loss": 0.7554, + "train_positive_log_prob": -81.8103, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3438, + "epoch": 2.0519187358916477, + "grad_norm": 11.536819458007812, + "learning_rate": 6.593306570481751e-06, + "lm_loss": 5.6188, + "loss": 1.2151, + "step": 909, + "text_contrastive_loss": 0.6189, + "train_positive_log_prob": -83.6289, + "train_positive_token_accuracy": 0.0756, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4957, + "epoch": 2.054176072234763, + "grad_norm": 14.669292449951172, + "learning_rate": 6.586427719519901e-06, + "lm_loss": 5.438, + "loss": 1.5099, + "step": 910, + "text_contrastive_loss": 0.9407, + "train_positive_log_prob": -79.5543, + "train_positive_token_accuracy": 0.0852, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.4692, + "epoch": 2.056433408577878, + "grad_norm": 13.657344818115234, + "learning_rate": 6.579545528114344e-06, + "lm_loss": 5.549, + "loss": 1.4664, + "step": 911, + "text_contrastive_loss": 0.8847, + "train_positive_log_prob": -81.3501, + "train_positive_token_accuracy": 0.0738, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4576, + "epoch": 2.0586907449209932, + "grad_norm": 14.083206176757812, + "learning_rate": 6.572660010756489e-06, + "lm_loss": 5.4462, + "loss": 1.3224, + "step": 912, + "text_contrastive_loss": 0.6403, + "train_positive_log_prob": -79.5662, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.4042, + "epoch": 2.0609480812641086, + "grad_norm": 13.646615982055664, + "learning_rate": 6.565771181944747e-06, + "lm_loss": 5.396, + "loss": 1.2911, + "step": 913, + "text_contrastive_loss": 0.6945, + "train_positive_log_prob": -77.4431, + "train_positive_token_accuracy": 0.083, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.4211, + "epoch": 2.0632054176072234, + "grad_norm": 13.751798629760742, + "learning_rate": 6.558879056184505e-06, + "lm_loss": 5.6277, + "loss": 1.3106, + "step": 914, + "text_contrastive_loss": 0.6533, + "train_positive_log_prob": -83.7784, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4034, + "epoch": 2.0654627539503387, + "grad_norm": 13.624903678894043, + "learning_rate": 6.551983647988089e-06, + "lm_loss": 5.5401, + "loss": 1.3296, + "step": 915, + "text_contrastive_loss": 0.7445, + "train_positive_log_prob": -80.6111, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.5206, + "epoch": 2.0677200902934536, + "grad_norm": 14.546343803405762, + "learning_rate": 6.545084971874738e-06, + "lm_loss": 5.5809, + "loss": 1.6038, + "step": 916, + "text_contrastive_loss": 1.0502, + "train_positive_log_prob": -81.5982, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3386, + "epoch": 2.069977426636569, + "grad_norm": 13.124034881591797, + "learning_rate": 6.5381830423705714e-06, + "lm_loss": 5.6015, + "loss": 1.236, + "step": 917, + "text_contrastive_loss": 0.6744, + "train_positive_log_prob": -82.5345, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4812, + "epoch": 2.072234762979684, + "grad_norm": 14.624191284179688, + "learning_rate": 6.531277874008562e-06, + "lm_loss": 5.4844, + "loss": 1.5353, + "step": 918, + "text_contrastive_loss": 1.0114, + "train_positive_log_prob": -81.1695, + "train_positive_token_accuracy": 0.0866, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3539, + "epoch": 2.074492099322799, + "grad_norm": 13.687456130981445, + "learning_rate": 6.5243694813284975e-06, + "lm_loss": 5.7161, + "loss": 1.238, + "step": 919, + "text_contrastive_loss": 0.6248, + "train_positive_log_prob": -84.9158, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.3948, + "epoch": 2.0767494356659144, + "grad_norm": 13.144502639770508, + "learning_rate": 6.517457878876958e-06, + "lm_loss": 5.6032, + "loss": 1.4035, + "step": 920, + "text_contrastive_loss": 0.8967, + "train_positive_log_prob": -82.5401, + "train_positive_token_accuracy": 0.0727, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.3949, + "epoch": 2.0790067720090293, + "grad_norm": 13.207449913024902, + "learning_rate": 6.510543081207281e-06, + "lm_loss": 5.5139, + "loss": 1.3689, + "step": 921, + "text_contrastive_loss": 0.8452, + "train_positive_log_prob": -81.8431, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4945, + "epoch": 2.0812641083521446, + "grad_norm": 15.764245986938477, + "learning_rate": 6.503625102879534e-06, + "lm_loss": 5.5832, + "loss": 1.4095, + "step": 922, + "text_contrastive_loss": 0.7134, + "train_positive_log_prob": -83.381, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.3191, + "epoch": 2.0835214446952595, + "grad_norm": 13.758867263793945, + "learning_rate": 6.496703958460479e-06, + "lm_loss": 5.5048, + "loss": 1.2468, + "step": 923, + "text_contrastive_loss": 0.7546, + "train_positive_log_prob": -81.2825, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.468, + "epoch": 2.0857787810383748, + "grad_norm": 14.071089744567871, + "learning_rate": 6.489779662523545e-06, + "lm_loss": 5.5622, + "loss": 1.4406, + "step": 924, + "text_contrastive_loss": 0.8328, + "train_positive_log_prob": -83.4136, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4457, + "epoch": 2.0880361173814896, + "grad_norm": 13.754512786865234, + "learning_rate": 6.4828522296488014e-06, + "lm_loss": 5.5535, + "loss": 1.5173, + "step": 925, + "text_contrastive_loss": 1.0325, + "train_positive_log_prob": -82.2827, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.4365, + "epoch": 2.090293453724605, + "grad_norm": 14.58531379699707, + "learning_rate": 6.475921674422917e-06, + "lm_loss": 5.4567, + "loss": 1.4428, + "step": 926, + "text_contrastive_loss": 0.9213, + "train_positive_log_prob": -80.1927, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.2903, + "epoch": 2.0925507900677203, + "grad_norm": 10.97835636138916, + "learning_rate": 6.4689880114391375e-06, + "lm_loss": 5.5145, + "loss": 1.1903, + "step": 927, + "text_contrastive_loss": 0.697, + "train_positive_log_prob": -81.2473, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4226, + "epoch": 2.094808126410835, + "grad_norm": 14.165572166442871, + "learning_rate": 6.462051255297255e-06, + "lm_loss": 5.618, + "loss": 1.4143, + "step": 928, + "text_contrastive_loss": 0.8598, + "train_positive_log_prob": -84.6873, + "train_positive_token_accuracy": 0.0733, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.3467, + "epoch": 2.0970654627539504, + "grad_norm": 11.557136535644531, + "learning_rate": 6.455111420603568e-06, + "lm_loss": 5.5892, + "loss": 1.3139, + "step": 929, + "text_contrastive_loss": 0.8166, + "train_positive_log_prob": -81.9797, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.3628, + "epoch": 2.0993227990970653, + "grad_norm": 13.662775039672852, + "learning_rate": 6.448168521970865e-06, + "lm_loss": 5.5271, + "loss": 1.2832, + "step": 930, + "text_contrastive_loss": 0.7355, + "train_positive_log_prob": -80.5091, + "train_positive_token_accuracy": 0.0829, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4152, + "epoch": 2.1015801354401806, + "grad_norm": 13.737049102783203, + "learning_rate": 6.441222574018378e-06, + "lm_loss": 5.4806, + "loss": 1.348, + "step": 931, + "text_contrastive_loss": 0.7694, + "train_positive_log_prob": -81.0335, + "train_positive_token_accuracy": 0.0753, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4303, + "epoch": 2.1038374717832955, + "grad_norm": 15.317706108093262, + "learning_rate": 6.434273591371771e-06, + "lm_loss": 5.6195, + "loss": 1.4958, + "step": 932, + "text_contrastive_loss": 1.007, + "train_positive_log_prob": -85.5653, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.4121, + "epoch": 2.106094808126411, + "grad_norm": 14.348837852478027, + "learning_rate": 6.427321588663085e-06, + "lm_loss": 5.5524, + "loss": 1.351, + "step": 933, + "text_contrastive_loss": 0.7673, + "train_positive_log_prob": -83.1556, + "train_positive_token_accuracy": 0.0814, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4465, + "epoch": 2.108352144469526, + "grad_norm": 13.229959487915039, + "learning_rate": 6.42036658053073e-06, + "lm_loss": 5.5886, + "loss": 1.3958, + "step": 934, + "text_contrastive_loss": 0.7809, + "train_positive_log_prob": -83.986, + "train_positive_token_accuracy": 0.0829, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4145, + "epoch": 2.110609480812641, + "grad_norm": 13.140077590942383, + "learning_rate": 6.41340858161944e-06, + "lm_loss": 5.5015, + "loss": 1.3422, + "step": 935, + "text_contrastive_loss": 0.755, + "train_positive_log_prob": -81.1053, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4528, + "epoch": 2.1128668171557563, + "grad_norm": 13.821182250976562, + "learning_rate": 6.406447606580248e-06, + "lm_loss": 5.6049, + "loss": 1.4529, + "step": 936, + "text_contrastive_loss": 0.8792, + "train_positive_log_prob": -84.0983, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3658, + "epoch": 2.115124153498871, + "grad_norm": 13.366264343261719, + "learning_rate": 6.399483670070451e-06, + "lm_loss": 5.5543, + "loss": 1.2832, + "step": 937, + "text_contrastive_loss": 0.724, + "train_positive_log_prob": -83.4673, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.354, + "epoch": 2.1173814898419865, + "grad_norm": 14.258349418640137, + "learning_rate": 6.392516786753586e-06, + "lm_loss": 5.6234, + "loss": 1.297, + "step": 938, + "text_contrastive_loss": 0.7613, + "train_positive_log_prob": -83.5372, + "train_positive_token_accuracy": 0.0652, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.4311, + "epoch": 2.119638826185102, + "grad_norm": 14.100428581237793, + "learning_rate": 6.385546971299389e-06, + "lm_loss": 5.4922, + "loss": 1.3693, + "step": 939, + "text_contrastive_loss": 0.7778, + "train_positive_log_prob": -79.6583, + "train_positive_token_accuracy": 0.0776, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3776, + "epoch": 2.1218961625282167, + "grad_norm": 13.281478881835938, + "learning_rate": 6.378574238383776e-06, + "lm_loss": 5.3444, + "loss": 1.2723, + "step": 940, + "text_contrastive_loss": 0.7204, + "train_positive_log_prob": -79.2491, + "train_positive_token_accuracy": 0.0848, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.3426, + "epoch": 2.124153498871332, + "grad_norm": 12.80377197265625, + "learning_rate": 6.3715986026888046e-06, + "lm_loss": 5.6403, + "loss": 1.3062, + "step": 941, + "text_contrastive_loss": 0.7991, + "train_positive_log_prob": -83.9209, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.4369, + "epoch": 2.126410835214447, + "grad_norm": 13.716954231262207, + "learning_rate": 6.3646200789026426e-06, + "lm_loss": 5.4998, + "loss": 1.4223, + "step": 942, + "text_contrastive_loss": 0.8708, + "train_positive_log_prob": -81.0337, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4166, + "epoch": 2.128668171557562, + "grad_norm": 12.997210502624512, + "learning_rate": 6.35763868171954e-06, + "lm_loss": 5.601, + "loss": 1.3992, + "step": 943, + "text_contrastive_loss": 0.8448, + "train_positive_log_prob": -80.1899, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4015, + "epoch": 2.130925507900677, + "grad_norm": 12.987225532531738, + "learning_rate": 6.350654425839799e-06, + "lm_loss": 5.507, + "loss": 1.4052, + "step": 944, + "text_contrastive_loss": 0.906, + "train_positive_log_prob": -81.2277, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4211, + "epoch": 2.1331828442437923, + "grad_norm": 16.543136596679688, + "learning_rate": 6.343667325969736e-06, + "lm_loss": 5.5159, + "loss": 1.4324, + "step": 945, + "text_contrastive_loss": 0.9193, + "train_positive_log_prob": -81.4643, + "train_positive_token_accuracy": 0.0828, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4419, + "epoch": 2.1354401805869077, + "grad_norm": 13.043792724609375, + "learning_rate": 6.336677396821663e-06, + "lm_loss": 5.4945, + "loss": 1.4553, + "step": 946, + "text_contrastive_loss": 0.928, + "train_positive_log_prob": -80.2524, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3598, + "epoch": 2.1376975169300225, + "grad_norm": 13.676464080810547, + "learning_rate": 6.3296846531138445e-06, + "lm_loss": 5.5957, + "loss": 1.3786, + "step": 947, + "text_contrastive_loss": 0.9184, + "train_positive_log_prob": -82.2543, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4613, + "epoch": 2.139954853273138, + "grad_norm": 12.815018653869629, + "learning_rate": 6.322689109570472e-06, + "lm_loss": 5.6794, + "loss": 1.4275, + "step": 948, + "text_contrastive_loss": 0.7965, + "train_positive_log_prob": -85.657, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.4738, + "epoch": 2.1422121896162527, + "grad_norm": 13.829718589782715, + "learning_rate": 6.315690780921634e-06, + "lm_loss": 5.5211, + "loss": 1.4375, + "step": 949, + "text_contrastive_loss": 0.8232, + "train_positive_log_prob": -82.0154, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3183, + "epoch": 2.144469525959368, + "grad_norm": 12.133904457092285, + "learning_rate": 6.3086896819032814e-06, + "lm_loss": 5.6061, + "loss": 1.2586, + "step": 950, + "text_contrastive_loss": 0.7592, + "train_positive_log_prob": -84.4395, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4167, + "epoch": 2.146726862302483, + "grad_norm": 14.410158157348633, + "learning_rate": 6.301685827257202e-06, + "lm_loss": 5.5752, + "loss": 1.4587, + "step": 951, + "text_contrastive_loss": 0.9691, + "train_positive_log_prob": -82.5616, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.4086, + "epoch": 2.148984198645598, + "grad_norm": 12.80140495300293, + "learning_rate": 6.294679231730983e-06, + "lm_loss": 5.5819, + "loss": 1.4097, + "step": 952, + "text_contrastive_loss": 0.8859, + "train_positive_log_prob": -83.4914, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.4471, + "epoch": 2.1512415349887135, + "grad_norm": 15.21994400024414, + "learning_rate": 6.2876699100779815e-06, + "lm_loss": 5.612, + "loss": 1.448, + "step": 953, + "text_contrastive_loss": 0.8794, + "train_positive_log_prob": -83.5872, + "train_positive_token_accuracy": 0.0776, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.3505, + "epoch": 2.1534988713318284, + "grad_norm": 12.049601554870605, + "learning_rate": 6.2806578770573e-06, + "lm_loss": 5.4727, + "loss": 1.2422, + "step": 954, + "text_contrastive_loss": 0.6889, + "train_positive_log_prob": -81.5363, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3645, + "epoch": 2.1557562076749437, + "grad_norm": 12.560468673706055, + "learning_rate": 6.273643147433743e-06, + "lm_loss": 5.4783, + "loss": 1.2736, + "step": 955, + "text_contrastive_loss": 0.7224, + "train_positive_log_prob": -79.177, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3303, + "epoch": 2.1580135440180586, + "grad_norm": 11.354669570922852, + "learning_rate": 6.266625735977802e-06, + "lm_loss": 5.5664, + "loss": 1.1947, + "step": 956, + "text_contrastive_loss": 0.6155, + "train_positive_log_prob": -83.2778, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.5072, + "epoch": 2.160270880361174, + "grad_norm": 15.365479469299316, + "learning_rate": 6.259605657465607e-06, + "lm_loss": 5.5306, + "loss": 1.4901, + "step": 957, + "text_contrastive_loss": 0.8597, + "train_positive_log_prob": -81.5969, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3784, + "epoch": 2.1625282167042887, + "grad_norm": 12.156998634338379, + "learning_rate": 6.252582926678908e-06, + "lm_loss": 5.6012, + "loss": 1.385, + "step": 958, + "text_contrastive_loss": 0.8929, + "train_positive_log_prob": -85.0688, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.3565, + "epoch": 2.164785553047404, + "grad_norm": 13.135516166687012, + "learning_rate": 6.24555755840504e-06, + "lm_loss": 5.6116, + "loss": 1.2822, + "step": 959, + "text_contrastive_loss": 0.729, + "train_positive_log_prob": -83.345, + "train_positive_token_accuracy": 0.0807, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.3736, + "epoch": 2.1670428893905194, + "grad_norm": 12.425690650939941, + "learning_rate": 6.238529567436892e-06, + "lm_loss": 5.4803, + "loss": 1.2525, + "step": 960, + "text_contrastive_loss": 0.6617, + "train_positive_log_prob": -81.8851, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4144, + "epoch": 2.1693002257336342, + "grad_norm": 15.262665748596191, + "learning_rate": 6.231498968572872e-06, + "lm_loss": 5.5378, + "loss": 1.401, + "step": 961, + "text_contrastive_loss": 0.8657, + "train_positive_log_prob": -84.0811, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4175, + "epoch": 2.1715575620767495, + "grad_norm": 13.04207992553711, + "learning_rate": 6.224465776616883e-06, + "lm_loss": 5.5645, + "loss": 1.3254, + "step": 962, + "text_contrastive_loss": 0.7029, + "train_positive_log_prob": -83.2502, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4205, + "epoch": 2.1738148984198644, + "grad_norm": 12.609440803527832, + "learning_rate": 6.217430006378285e-06, + "lm_loss": 5.7113, + "loss": 1.4096, + "step": 963, + "text_contrastive_loss": 0.836, + "train_positive_log_prob": -84.3713, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.4091, + "epoch": 2.1760722347629797, + "grad_norm": 14.255577087402344, + "learning_rate": 6.210391672671869e-06, + "lm_loss": 5.5626, + "loss": 1.2752, + "step": 964, + "text_contrastive_loss": 0.6198, + "train_positive_log_prob": -80.5034, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3836, + "epoch": 2.1783295711060946, + "grad_norm": 13.55477523803711, + "learning_rate": 6.203350790317825e-06, + "lm_loss": 5.5485, + "loss": 1.3682, + "step": 965, + "text_contrastive_loss": 0.8594, + "train_positive_log_prob": -80.256, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4534, + "epoch": 2.18058690744921, + "grad_norm": 13.079123497009277, + "learning_rate": 6.196307374141707e-06, + "lm_loss": 5.5843, + "loss": 1.4403, + "step": 966, + "text_contrastive_loss": 0.857, + "train_positive_log_prob": -84.4376, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.445, + "epoch": 2.1828442437923252, + "grad_norm": 14.958209037780762, + "learning_rate": 6.189261438974403e-06, + "lm_loss": 5.6398, + "loss": 1.4682, + "step": 967, + "text_contrastive_loss": 0.9185, + "train_positive_log_prob": -82.9384, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4288, + "epoch": 2.18510158013544, + "grad_norm": 14.397655487060547, + "learning_rate": 6.1822129996521105e-06, + "lm_loss": 5.667, + "loss": 1.485, + "step": 968, + "text_contrastive_loss": 0.9791, + "train_positive_log_prob": -82.5489, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3313, + "epoch": 2.1873589164785554, + "grad_norm": 12.131510734558105, + "learning_rate": 6.175162071016295e-06, + "lm_loss": 5.4547, + "loss": 1.2975, + "step": 969, + "text_contrastive_loss": 0.8415, + "train_positive_log_prob": -79.9448, + "train_positive_token_accuracy": 0.0866, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.4203, + "epoch": 2.1896162528216703, + "grad_norm": 14.369549751281738, + "learning_rate": 6.168108667913666e-06, + "lm_loss": 5.5365, + "loss": 1.3997, + "step": 970, + "text_contrastive_loss": 0.8515, + "train_positive_log_prob": -81.9656, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3938, + "epoch": 2.1918735891647856, + "grad_norm": 13.196785926818848, + "learning_rate": 6.161052805196141e-06, + "lm_loss": 5.5713, + "loss": 1.3249, + "step": 971, + "text_contrastive_loss": 0.7479, + "train_positive_log_prob": -82.563, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.4097, + "epoch": 2.194130925507901, + "grad_norm": 13.301088333129883, + "learning_rate": 6.15399449772082e-06, + "lm_loss": 5.5817, + "loss": 1.2948, + "step": 972, + "text_contrastive_loss": 0.6539, + "train_positive_log_prob": -83.7657, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.2882, + "epoch": 2.1963882618510158, + "grad_norm": 13.367931365966797, + "learning_rate": 6.146933760349947e-06, + "lm_loss": 5.6027, + "loss": 1.1748, + "step": 973, + "text_contrastive_loss": 0.6528, + "train_positive_log_prob": -84.8481, + "train_positive_token_accuracy": 0.0727, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.4458, + "epoch": 2.198645598194131, + "grad_norm": 13.1250581741333, + "learning_rate": 6.139870607950885e-06, + "lm_loss": 5.4991, + "loss": 1.4297, + "step": 974, + "text_contrastive_loss": 0.868, + "train_positive_log_prob": -81.0066, + "train_positive_token_accuracy": 0.0838, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3832, + "epoch": 2.200902934537246, + "grad_norm": 11.806879997253418, + "learning_rate": 6.1328050553960804e-06, + "lm_loss": 5.559, + "loss": 1.2466, + "step": 975, + "text_contrastive_loss": 0.6151, + "train_positive_log_prob": -82.1006, + "train_positive_token_accuracy": 0.0833, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4709, + "epoch": 2.2031602708803613, + "grad_norm": 13.744738578796387, + "learning_rate": 6.1257371175630375e-06, + "lm_loss": 5.4568, + "loss": 1.451, + "step": 976, + "text_contrastive_loss": 0.8689, + "train_positive_log_prob": -80.4263, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4932, + "epoch": 2.205417607223476, + "grad_norm": 15.910041809082031, + "learning_rate": 6.118666809334277e-06, + "lm_loss": 5.6013, + "loss": 1.5744, + "step": 977, + "text_contrastive_loss": 1.0421, + "train_positive_log_prob": -82.2891, + "train_positive_token_accuracy": 0.0718, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.4284, + "epoch": 2.2076749435665914, + "grad_norm": 13.888679504394531, + "learning_rate": 6.111594145597319e-06, + "lm_loss": 5.4465, + "loss": 1.3826, + "step": 978, + "text_contrastive_loss": 0.8192, + "train_positive_log_prob": -80.5925, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.4191, + "epoch": 2.2099322799097068, + "grad_norm": 12.493885040283203, + "learning_rate": 6.104519141244631e-06, + "lm_loss": 5.5223, + "loss": 1.3757, + "step": 979, + "text_contrastive_loss": 0.8088, + "train_positive_log_prob": -80.9319, + "train_positive_token_accuracy": 0.0701, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.2877, + "epoch": 2.2121896162528216, + "grad_norm": 12.375547409057617, + "learning_rate": 6.0974418111736235e-06, + "lm_loss": 5.6429, + "loss": 1.1834, + "step": 980, + "text_contrastive_loss": 0.6629, + "train_positive_log_prob": -84.6881, + "train_positive_token_accuracy": 0.0726, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.4433, + "epoch": 2.214446952595937, + "grad_norm": 14.025139808654785, + "learning_rate": 6.090362170286591e-06, + "lm_loss": 5.5321, + "loss": 1.3116, + "step": 981, + "text_contrastive_loss": 0.6302, + "train_positive_log_prob": -82.1337, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4485, + "epoch": 2.216704288939052, + "grad_norm": 13.927783012390137, + "learning_rate": 6.0832802334907044e-06, + "lm_loss": 5.5308, + "loss": 1.3695, + "step": 982, + "text_contrastive_loss": 0.7359, + "train_positive_log_prob": -83.071, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3961, + "epoch": 2.218961625282167, + "grad_norm": 14.23391342163086, + "learning_rate": 6.076196015697963e-06, + "lm_loss": 5.6122, + "loss": 1.3453, + "step": 983, + "text_contrastive_loss": 0.776, + "train_positive_log_prob": -85.3404, + "train_positive_token_accuracy": 0.084, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.4035, + "epoch": 2.221218961625282, + "grad_norm": 17.111347198486328, + "learning_rate": 6.069109531825169e-06, + "lm_loss": 5.5142, + "loss": 1.3439, + "step": 984, + "text_contrastive_loss": 0.778, + "train_positive_log_prob": -79.8006, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4077, + "epoch": 2.2234762979683973, + "grad_norm": 13.886045455932617, + "learning_rate": 6.0620207967939e-06, + "lm_loss": 5.6241, + "loss": 1.4371, + "step": 985, + "text_contrastive_loss": 0.9339, + "train_positive_log_prob": -82.4062, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.4891, + "epoch": 2.2257336343115126, + "grad_norm": 14.835206985473633, + "learning_rate": 6.054929825530469e-06, + "lm_loss": 5.5271, + "loss": 1.479, + "step": 986, + "text_contrastive_loss": 0.8745, + "train_positive_log_prob": -81.9998, + "train_positive_token_accuracy": 0.0758, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.2673, + "epoch": 2.2279909706546275, + "grad_norm": 11.74545669555664, + "learning_rate": 6.047836632965901e-06, + "lm_loss": 5.628, + "loss": 1.1362, + "step": 987, + "text_contrastive_loss": 0.6121, + "train_positive_log_prob": -82.9666, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.4179, + "epoch": 2.230248306997743, + "grad_norm": 13.016343116760254, + "learning_rate": 6.040741234035898e-06, + "lm_loss": 5.5476, + "loss": 1.3548, + "step": 988, + "text_contrastive_loss": 0.7642, + "train_positive_log_prob": -81.5885, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.449, + "epoch": 2.2325056433408577, + "grad_norm": 13.917783737182617, + "learning_rate": 6.0336436436808054e-06, + "lm_loss": 5.5768, + "loss": 1.4938, + "step": 989, + "text_contrastive_loss": 0.9744, + "train_positive_log_prob": -82.1898, + "train_positive_token_accuracy": 0.084, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4146, + "epoch": 2.234762979683973, + "grad_norm": 14.144447326660156, + "learning_rate": 6.026543876845586e-06, + "lm_loss": 5.5888, + "loss": 1.3899, + "step": 990, + "text_contrastive_loss": 0.8328, + "train_positive_log_prob": -83.591, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3853, + "epoch": 2.237020316027088, + "grad_norm": 13.677125930786133, + "learning_rate": 6.019441948479784e-06, + "lm_loss": 5.4485, + "loss": 1.3674, + "step": 991, + "text_contrastive_loss": 0.8745, + "train_positive_log_prob": -80.6227, + "train_positive_token_accuracy": 0.08, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4032, + "epoch": 2.239277652370203, + "grad_norm": 12.899636268615723, + "learning_rate": 6.012337873537494e-06, + "lm_loss": 5.5187, + "loss": 1.3294, + "step": 992, + "text_contrastive_loss": 0.7487, + "train_positive_log_prob": -82.6968, + "train_positive_token_accuracy": 0.073, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3392, + "epoch": 2.2415349887133185, + "grad_norm": 11.92052936553955, + "learning_rate": 6.005231666977331e-06, + "lm_loss": 5.5089, + "loss": 1.2696, + "step": 993, + "text_contrastive_loss": 0.7591, + "train_positive_log_prob": -81.514, + "train_positive_token_accuracy": 0.0744, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3499, + "epoch": 2.2437923250564333, + "grad_norm": 12.925131797790527, + "learning_rate": 5.998123343762403e-06, + "lm_loss": 5.4805, + "loss": 1.3415, + "step": 994, + "text_contrastive_loss": 0.8871, + "train_positive_log_prob": -80.4083, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3273, + "epoch": 2.2460496613995486, + "grad_norm": 12.381352424621582, + "learning_rate": 5.9910129188602665e-06, + "lm_loss": 5.6051, + "loss": 1.3166, + "step": 995, + "text_contrastive_loss": 0.8575, + "train_positive_log_prob": -82.6488, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.3697, + "epoch": 2.2483069977426635, + "grad_norm": 12.553468704223633, + "learning_rate": 5.983900407242911e-06, + "lm_loss": 5.6019, + "loss": 1.3365, + "step": 996, + "text_contrastive_loss": 0.8133, + "train_positive_log_prob": -83.2031, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.3704, + "epoch": 2.250564334085779, + "grad_norm": 13.373266220092773, + "learning_rate": 5.976785823886713e-06, + "lm_loss": 5.638, + "loss": 1.318, + "step": 997, + "text_contrastive_loss": 0.7676, + "train_positive_log_prob": -83.0714, + "train_positive_token_accuracy": 0.0846, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3749, + "epoch": 2.2528216704288937, + "grad_norm": 12.612702369689941, + "learning_rate": 5.96966918377242e-06, + "lm_loss": 5.4589, + "loss": 1.2995, + "step": 998, + "text_contrastive_loss": 0.7573, + "train_positive_log_prob": -81.9903, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.364, + "epoch": 2.255079006772009, + "grad_norm": 14.332036018371582, + "learning_rate": 5.9625505018851e-06, + "lm_loss": 5.443, + "loss": 1.3177, + "step": 999, + "text_contrastive_loss": 0.8187, + "train_positive_log_prob": -82.1957, + "train_positive_token_accuracy": 0.0859, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3865, + "epoch": 2.2573363431151243, + "grad_norm": 14.457605361938477, + "learning_rate": 5.955429793214129e-06, + "lm_loss": 5.6281, + "loss": 1.3005, + "step": 1000, + "text_contrastive_loss": 0.7023, + "train_positive_log_prob": -82.4421, + "train_positive_token_accuracy": 0.0731, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.4277, + "epoch": 2.259593679458239, + "grad_norm": 14.964315414428711, + "learning_rate": 5.948307072753146e-06, + "lm_loss": 5.4846, + "loss": 1.3767, + "step": 1001, + "text_contrastive_loss": 0.8013, + "train_positive_log_prob": -80.7696, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3103, + "epoch": 2.2618510158013545, + "grad_norm": 12.597579002380371, + "learning_rate": 5.941182355500028e-06, + "lm_loss": 5.4495, + "loss": 1.2341, + "step": 1002, + "text_contrastive_loss": 0.7577, + "train_positive_log_prob": -81.4411, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.399, + "epoch": 2.2641083521444694, + "grad_norm": 13.291341781616211, + "learning_rate": 5.934055656456855e-06, + "lm_loss": 5.42, + "loss": 1.2891, + "step": 1003, + "text_contrastive_loss": 0.6964, + "train_positive_log_prob": -80.4141, + "train_positive_token_accuracy": 0.0866, + "train_positive_token_prob": 0.0329 + }, + { + "contrastive_loss": 0.4211, + "epoch": 2.2663656884875847, + "grad_norm": 13.093559265136719, + "learning_rate": 5.926926990629883e-06, + "lm_loss": 5.4314, + "loss": 1.3203, + "step": 1004, + "text_contrastive_loss": 0.7123, + "train_positive_log_prob": -79.2042, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4546, + "epoch": 2.2686230248307, + "grad_norm": 14.154945373535156, + "learning_rate": 5.919796373029504e-06, + "lm_loss": 5.5635, + "loss": 1.3675, + "step": 1005, + "text_contrastive_loss": 0.713, + "train_positive_log_prob": -83.1069, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3096, + "epoch": 2.270880361173815, + "grad_norm": 12.211151123046875, + "learning_rate": 5.912663818670224e-06, + "lm_loss": 5.5052, + "loss": 1.2244, + "step": 1006, + "text_contrastive_loss": 0.7286, + "train_positive_log_prob": -81.8863, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3444, + "epoch": 2.27313769751693, + "grad_norm": 12.89335823059082, + "learning_rate": 5.905529342570627e-06, + "lm_loss": 5.525, + "loss": 1.2835, + "step": 1007, + "text_contrastive_loss": 0.7732, + "train_positive_log_prob": -80.8033, + "train_positive_token_accuracy": 0.0825, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4244, + "epoch": 2.275395033860045, + "grad_norm": 15.95014762878418, + "learning_rate": 5.898392959753343e-06, + "lm_loss": 5.3849, + "loss": 1.3743, + "step": 1008, + "text_contrastive_loss": 0.8228, + "train_positive_log_prob": -80.0396, + "train_positive_token_accuracy": 0.0875, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.4084, + "epoch": 2.2776523702031604, + "grad_norm": 13.84809684753418, + "learning_rate": 5.8912546852450116e-06, + "lm_loss": 5.5493, + "loss": 1.3388, + "step": 1009, + "text_contrastive_loss": 0.751, + "train_positive_log_prob": -82.204, + "train_positive_token_accuracy": 0.0715, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.4045, + "epoch": 2.2799097065462752, + "grad_norm": 14.218093872070312, + "learning_rate": 5.8841145340762665e-06, + "lm_loss": 5.4525, + "loss": 1.3355, + "step": 1010, + "text_contrastive_loss": 0.7715, + "train_positive_log_prob": -80.3833, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3756, + "epoch": 2.2821670428893905, + "grad_norm": 14.792195320129395, + "learning_rate": 5.876972521281683e-06, + "lm_loss": 5.4787, + "loss": 1.2897, + "step": 1011, + "text_contrastive_loss": 0.7324, + "train_positive_log_prob": -81.1348, + "train_positive_token_accuracy": 0.0902, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.422, + "epoch": 2.2844243792325054, + "grad_norm": 14.615242958068848, + "learning_rate": 5.869828661899761e-06, + "lm_loss": 5.5307, + "loss": 1.4285, + "step": 1012, + "text_contrastive_loss": 0.9069, + "train_positive_log_prob": -79.6997, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4187, + "epoch": 2.2866817155756207, + "grad_norm": 14.485371589660645, + "learning_rate": 5.862682970972888e-06, + "lm_loss": 5.4987, + "loss": 1.3371, + "step": 1013, + "text_contrastive_loss": 0.737, + "train_positive_log_prob": -80.8279, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4542, + "epoch": 2.288939051918736, + "grad_norm": 14.774161338806152, + "learning_rate": 5.855535463547309e-06, + "lm_loss": 5.4593, + "loss": 1.3862, + "step": 1014, + "text_contrastive_loss": 0.7721, + "train_positive_log_prob": -80.0327, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3766, + "epoch": 2.291196388261851, + "grad_norm": 13.07331371307373, + "learning_rate": 5.8483861546730915e-06, + "lm_loss": 5.5848, + "loss": 1.2832, + "step": 1015, + "text_contrastive_loss": 0.6962, + "train_positive_log_prob": -82.2048, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4102, + "epoch": 2.293453724604966, + "grad_norm": 13.870847702026367, + "learning_rate": 5.841235059404097e-06, + "lm_loss": 5.4185, + "loss": 1.4639, + "step": 1016, + "text_contrastive_loss": 1.0237, + "train_positive_log_prob": -80.3671, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3975, + "epoch": 2.295711060948081, + "grad_norm": 13.475671768188477, + "learning_rate": 5.834082192797948e-06, + "lm_loss": 5.4765, + "loss": 1.3319, + "step": 1017, + "text_contrastive_loss": 0.7735, + "train_positive_log_prob": -79.3442, + "train_positive_token_accuracy": 0.0741, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3731, + "epoch": 2.2979683972911964, + "grad_norm": 13.642584800720215, + "learning_rate": 5.826927569915999e-06, + "lm_loss": 5.7399, + "loss": 1.3258, + "step": 1018, + "text_contrastive_loss": 0.7572, + "train_positive_log_prob": -84.101, + "train_positive_token_accuracy": 0.0674, + "train_positive_token_prob": 0.0274 + }, + { + "contrastive_loss": 0.4306, + "epoch": 2.3002257336343117, + "grad_norm": 14.462185859680176, + "learning_rate": 5.819771205823303e-06, + "lm_loss": 5.6443, + "loss": 1.3692, + "step": 1019, + "text_contrastive_loss": 0.7485, + "train_positive_log_prob": -83.9394, + "train_positive_token_accuracy": 0.0704, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.4197, + "epoch": 2.3024830699774266, + "grad_norm": 14.147954940795898, + "learning_rate": 5.812613115588575e-06, + "lm_loss": 5.4966, + "loss": 1.3636, + "step": 1020, + "text_contrastive_loss": 0.7884, + "train_positive_log_prob": -79.8075, + "train_positive_token_accuracy": 0.0643, + "train_positive_token_prob": 0.0284 + }, + { + "contrastive_loss": 0.3725, + "epoch": 2.304740406320542, + "grad_norm": 14.429810523986816, + "learning_rate": 5.805453314284168e-06, + "lm_loss": 5.5771, + "loss": 1.2985, + "step": 1021, + "text_contrastive_loss": 0.7367, + "train_positive_log_prob": -83.3511, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3774, + "epoch": 2.3069977426636568, + "grad_norm": 15.471999168395996, + "learning_rate": 5.7982918169860395e-06, + "lm_loss": 5.5091, + "loss": 1.3346, + "step": 1022, + "text_contrastive_loss": 0.8126, + "train_positive_log_prob": -81.344, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3785, + "epoch": 2.309255079006772, + "grad_norm": 14.69314956665039, + "learning_rate": 5.791128638773711e-06, + "lm_loss": 5.5394, + "loss": 1.3352, + "step": 1023, + "text_contrastive_loss": 0.8055, + "train_positive_log_prob": -81.367, + "train_positive_token_accuracy": 0.0854, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4047, + "epoch": 2.311512415349887, + "grad_norm": 13.294145584106445, + "learning_rate": 5.783963794730254e-06, + "lm_loss": 5.5661, + "loss": 1.3431, + "step": 1024, + "text_contrastive_loss": 0.7636, + "train_positive_log_prob": -82.211, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4087, + "epoch": 2.3137697516930023, + "grad_norm": 13.578989028930664, + "learning_rate": 5.776797299942236e-06, + "lm_loss": 5.6248, + "loss": 1.3411, + "step": 1025, + "text_contrastive_loss": 0.7398, + "train_positive_log_prob": -83.8835, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.2892, + "epoch": 2.3160270880361176, + "grad_norm": 12.38988208770752, + "learning_rate": 5.7696291694997105e-06, + "lm_loss": 5.6038, + "loss": 1.2273, + "step": 1026, + "text_contrastive_loss": 0.7555, + "train_positive_log_prob": -83.9048, + "train_positive_token_accuracy": 0.0843, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4708, + "epoch": 2.3182844243792324, + "grad_norm": 15.33709716796875, + "learning_rate": 5.762459418496169e-06, + "lm_loss": 5.4894, + "loss": 1.4837, + "step": 1027, + "text_contrastive_loss": 0.9281, + "train_positive_log_prob": -80.0691, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4534, + "epoch": 2.3205417607223477, + "grad_norm": 13.43025016784668, + "learning_rate": 5.755288062028519e-06, + "lm_loss": 5.4538, + "loss": 1.404, + "step": 1028, + "text_contrastive_loss": 0.8104, + "train_positive_log_prob": -80.4657, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3802, + "epoch": 2.3227990970654626, + "grad_norm": 12.586274147033691, + "learning_rate": 5.748115115197045e-06, + "lm_loss": 5.5433, + "loss": 1.3214, + "step": 1029, + "text_contrastive_loss": 0.7737, + "train_positive_log_prob": -80.8127, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.3511, + "epoch": 2.325056433408578, + "grad_norm": 12.862933158874512, + "learning_rate": 5.740940593105383e-06, + "lm_loss": 5.5364, + "loss": 1.2075, + "step": 1030, + "text_contrastive_loss": 0.6056, + "train_positive_log_prob": -82.0468, + "train_positive_token_accuracy": 0.0756, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.4311, + "epoch": 2.327313769751693, + "grad_norm": 13.449870109558105, + "learning_rate": 5.733764510860482e-06, + "lm_loss": 5.4491, + "loss": 1.3553, + "step": 1031, + "text_contrastive_loss": 0.7587, + "train_positive_log_prob": -79.0788, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3812, + "epoch": 2.329571106094808, + "grad_norm": 13.699833869934082, + "learning_rate": 5.726586883572584e-06, + "lm_loss": 5.4856, + "loss": 1.3338, + "step": 1032, + "text_contrastive_loss": 0.808, + "train_positive_log_prob": -82.1777, + "train_positive_token_accuracy": 0.0694, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.4511, + "epoch": 2.3318284424379234, + "grad_norm": 14.140271186828613, + "learning_rate": 5.719407726355174e-06, + "lm_loss": 5.5012, + "loss": 1.4156, + "step": 1033, + "text_contrastive_loss": 0.8286, + "train_positive_log_prob": -82.0444, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4229, + "epoch": 2.3340857787810383, + "grad_norm": 14.649331092834473, + "learning_rate": 5.712227054324968e-06, + "lm_loss": 5.5919, + "loss": 1.4092, + "step": 1034, + "text_contrastive_loss": 0.8542, + "train_positive_log_prob": -83.851, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4216, + "epoch": 2.3363431151241536, + "grad_norm": 14.275680541992188, + "learning_rate": 5.705044882601862e-06, + "lm_loss": 5.5034, + "loss": 1.3981, + "step": 1035, + "text_contrastive_loss": 0.8525, + "train_positive_log_prob": -82.83, + "train_positive_token_accuracy": 0.0758, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4434, + "epoch": 2.3386004514672685, + "grad_norm": 13.905360221862793, + "learning_rate": 5.697861226308923e-06, + "lm_loss": 5.6121, + "loss": 1.4741, + "step": 1036, + "text_contrastive_loss": 0.9389, + "train_positive_log_prob": -85.4769, + "train_positive_token_accuracy": 0.0744, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.4153, + "epoch": 2.340857787810384, + "grad_norm": 12.414887428283691, + "learning_rate": 5.69067610057233e-06, + "lm_loss": 5.4648, + "loss": 1.3672, + "step": 1037, + "text_contrastive_loss": 0.8109, + "train_positive_log_prob": -83.1883, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3813, + "epoch": 2.343115124153499, + "grad_norm": 12.460741996765137, + "learning_rate": 5.683489520521365e-06, + "lm_loss": 5.554, + "loss": 1.3597, + "step": 1038, + "text_contrastive_loss": 0.8462, + "train_positive_log_prob": -81.7023, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4566, + "epoch": 2.345372460496614, + "grad_norm": 16.492595672607422, + "learning_rate": 5.6763015012883686e-06, + "lm_loss": 5.4773, + "loss": 1.4185, + "step": 1039, + "text_contrastive_loss": 0.8284, + "train_positive_log_prob": -81.5847, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3209, + "epoch": 2.3476297968397293, + "grad_norm": 11.588064193725586, + "learning_rate": 5.6691120580087126e-06, + "lm_loss": 5.5496, + "loss": 1.2051, + "step": 1040, + "text_contrastive_loss": 0.6585, + "train_positive_log_prob": -79.3824, + "train_positive_token_accuracy": 0.0868, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3628, + "epoch": 2.349887133182844, + "grad_norm": 11.495614051818848, + "learning_rate": 5.661921205820767e-06, + "lm_loss": 5.491, + "loss": 1.2699, + "step": 1041, + "text_contrastive_loss": 0.716, + "train_positive_log_prob": -80.6935, + "train_positive_token_accuracy": 0.0776, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4982, + "epoch": 2.3521444695259595, + "grad_norm": 16.367971420288086, + "learning_rate": 5.654728959865872e-06, + "lm_loss": 5.6088, + "loss": 1.4526, + "step": 1042, + "text_contrastive_loss": 0.7871, + "train_positive_log_prob": -84.4409, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3574, + "epoch": 2.3544018058690743, + "grad_norm": 11.687899589538574, + "learning_rate": 5.647535335288296e-06, + "lm_loss": 5.5539, + "loss": 1.265, + "step": 1043, + "text_contrastive_loss": 0.7044, + "train_positive_log_prob": -84.0477, + "train_positive_token_accuracy": 0.0751, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3309, + "epoch": 2.3566591422121896, + "grad_norm": 12.096535682678223, + "learning_rate": 5.640340347235215e-06, + "lm_loss": 5.5325, + "loss": 1.3035, + "step": 1044, + "text_contrastive_loss": 0.8387, + "train_positive_log_prob": -81.5821, + "train_positive_token_accuracy": 0.08, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.5239, + "epoch": 2.3589164785553045, + "grad_norm": 14.460906028747559, + "learning_rate": 5.6331440108566735e-06, + "lm_loss": 5.688, + "loss": 1.5885, + "step": 1045, + "text_contrastive_loss": 0.9915, + "train_positive_log_prob": -84.719, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.4119, + "epoch": 2.36117381489842, + "grad_norm": 14.186589241027832, + "learning_rate": 5.6259463413055604e-06, + "lm_loss": 5.5774, + "loss": 1.4244, + "step": 1046, + "text_contrastive_loss": 0.9096, + "train_positive_log_prob": -82.0488, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.4088, + "epoch": 2.363431151241535, + "grad_norm": 13.958948135375977, + "learning_rate": 5.6187473537375635e-06, + "lm_loss": 5.6674, + "loss": 1.3383, + "step": 1047, + "text_contrastive_loss": 0.7254, + "train_positive_log_prob": -81.8009, + "train_positive_token_accuracy": 0.072, + "train_positive_token_prob": 0.0283 + }, + { + "contrastive_loss": 0.3033, + "epoch": 2.36568848758465, + "grad_norm": 11.371842384338379, + "learning_rate": 5.611547063311152e-06, + "lm_loss": 5.4781, + "loss": 1.2355, + "step": 1048, + "text_contrastive_loss": 0.7687, + "train_positive_log_prob": -80.5247, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4748, + "epoch": 2.3679458239277653, + "grad_norm": 16.5283203125, + "learning_rate": 5.604345485187535e-06, + "lm_loss": 5.4918, + "loss": 1.4455, + "step": 1049, + "text_contrastive_loss": 0.8429, + "train_positive_log_prob": -83.7695, + "train_positive_token_accuracy": 0.0881, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.4184, + "epoch": 2.37020316027088, + "grad_norm": 13.52514934539795, + "learning_rate": 5.597142634530639e-06, + "lm_loss": 5.4235, + "loss": 1.2884, + "step": 1050, + "text_contrastive_loss": 0.6553, + "train_positive_log_prob": -80.5712, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.4096, + "epoch": 2.3724604966139955, + "grad_norm": 13.995003700256348, + "learning_rate": 5.589938526507059e-06, + "lm_loss": 5.4935, + "loss": 1.3267, + "step": 1051, + "text_contrastive_loss": 0.7355, + "train_positive_log_prob": -80.4922, + "train_positive_token_accuracy": 0.0744, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4411, + "epoch": 2.374717832957111, + "grad_norm": 15.997383117675781, + "learning_rate": 5.582733176286048e-06, + "lm_loss": 5.4534, + "loss": 1.4606, + "step": 1052, + "text_contrastive_loss": 0.9483, + "train_positive_log_prob": -79.4779, + "train_positive_token_accuracy": 0.0713, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.3544, + "epoch": 2.3769751693002257, + "grad_norm": 14.313887596130371, + "learning_rate": 5.575526599039472e-06, + "lm_loss": 5.3616, + "loss": 1.2752, + "step": 1053, + "text_contrastive_loss": 0.7693, + "train_positive_log_prob": -79.2925, + "train_positive_token_accuracy": 0.0895, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3037, + "epoch": 2.379232505643341, + "grad_norm": 11.96545124053955, + "learning_rate": 5.568318809941777e-06, + "lm_loss": 5.4406, + "loss": 1.2469, + "step": 1054, + "text_contrastive_loss": 0.7983, + "train_positive_log_prob": -79.7392, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3744, + "epoch": 2.381489841986456, + "grad_norm": 13.176071166992188, + "learning_rate": 5.561109824169962e-06, + "lm_loss": 5.4787, + "loss": 1.288, + "step": 1055, + "text_contrastive_loss": 0.7316, + "train_positive_log_prob": -83.9965, + "train_positive_token_accuracy": 0.0843, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3477, + "epoch": 2.383747178329571, + "grad_norm": 13.640965461730957, + "learning_rate": 5.553899656903552e-06, + "lm_loss": 5.5338, + "loss": 1.2847, + "step": 1056, + "text_contrastive_loss": 0.7672, + "train_positive_log_prob": -84.1766, + "train_positive_token_accuracy": 0.0855, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.4663, + "epoch": 2.386004514672686, + "grad_norm": 15.705275535583496, + "learning_rate": 5.546688323324548e-06, + "lm_loss": 5.5384, + "loss": 1.434, + "step": 1057, + "text_contrastive_loss": 0.8277, + "train_positive_log_prob": -83.1616, + "train_positive_token_accuracy": 0.0726, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3768, + "epoch": 2.3882618510158014, + "grad_norm": 12.569790840148926, + "learning_rate": 5.53947583861742e-06, + "lm_loss": 5.4783, + "loss": 1.3203, + "step": 1058, + "text_contrastive_loss": 0.7913, + "train_positive_log_prob": -80.1623, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.414, + "epoch": 2.3905191873589167, + "grad_norm": 13.718636512756348, + "learning_rate": 5.5322622179690514e-06, + "lm_loss": 5.462, + "loss": 1.3294, + "step": 1059, + "text_contrastive_loss": 0.7382, + "train_positive_log_prob": -79.114, + "train_positive_token_accuracy": 0.085, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4054, + "epoch": 2.3927765237020315, + "grad_norm": 14.379610061645508, + "learning_rate": 5.525047476568722e-06, + "lm_loss": 5.5497, + "loss": 1.3388, + "step": 1060, + "text_contrastive_loss": 0.7568, + "train_positive_log_prob": -82.7442, + "train_positive_token_accuracy": 0.0726, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.4302, + "epoch": 2.395033860045147, + "grad_norm": 13.15449333190918, + "learning_rate": 5.51783162960807e-06, + "lm_loss": 5.5239, + "loss": 1.3565, + "step": 1061, + "text_contrastive_loss": 0.7478, + "train_positive_log_prob": -83.3524, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.445, + "epoch": 2.3972911963882617, + "grad_norm": 15.589587211608887, + "learning_rate": 5.5106146922810664e-06, + "lm_loss": 5.5588, + "loss": 1.4749, + "step": 1062, + "text_contrastive_loss": 0.948, + "train_positive_log_prob": -82.9687, + "train_positive_token_accuracy": 0.0735, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.4575, + "epoch": 2.399548532731377, + "grad_norm": 16.6496639251709, + "learning_rate": 5.50339667978397e-06, + "lm_loss": 5.5757, + "loss": 1.4905, + "step": 1063, + "text_contrastive_loss": 0.951, + "train_positive_log_prob": -82.6548, + "train_positive_token_accuracy": 0.0733, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.4152, + "epoch": 2.401805869074492, + "grad_norm": 14.359514236450195, + "learning_rate": 5.496177607315312e-06, + "lm_loss": 5.4715, + "loss": 1.4209, + "step": 1064, + "text_contrastive_loss": 0.917, + "train_positive_log_prob": -80.8512, + "train_positive_token_accuracy": 0.0875, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3931, + "epoch": 2.404063205417607, + "grad_norm": 13.47160816192627, + "learning_rate": 5.488957490075846e-06, + "lm_loss": 5.4933, + "loss": 1.3401, + "step": 1065, + "text_contrastive_loss": 0.7953, + "train_positive_log_prob": -80.8319, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4, + "epoch": 2.4063205417607225, + "grad_norm": 14.166399002075195, + "learning_rate": 5.4817363432685355e-06, + "lm_loss": 5.5855, + "loss": 1.3907, + "step": 1066, + "text_contrastive_loss": 0.8642, + "train_positive_log_prob": -83.4994, + "train_positive_token_accuracy": 0.0844, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.405, + "epoch": 2.4085778781038374, + "grad_norm": 12.809855461120605, + "learning_rate": 5.474514182098504e-06, + "lm_loss": 5.4582, + "loss": 1.3871, + "step": 1067, + "text_contrastive_loss": 0.8726, + "train_positive_log_prob": -80.9575, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4355, + "epoch": 2.4108352144469527, + "grad_norm": 13.22983169555664, + "learning_rate": 5.4672910217730155e-06, + "lm_loss": 5.4703, + "loss": 1.3701, + "step": 1068, + "text_contrastive_loss": 0.7753, + "train_positive_log_prob": -81.2887, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.431, + "epoch": 2.4130925507900676, + "grad_norm": 13.735644340515137, + "learning_rate": 5.4600668775014355e-06, + "lm_loss": 5.5753, + "loss": 1.3539, + "step": 1069, + "text_contrastive_loss": 0.7307, + "train_positive_log_prob": -82.871, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.5297, + "epoch": 2.415349887133183, + "grad_norm": 14.50279426574707, + "learning_rate": 5.452841764495203e-06, + "lm_loss": 5.4559, + "loss": 1.4491, + "step": 1070, + "text_contrastive_loss": 0.7477, + "train_positive_log_prob": -79.0655, + "train_positive_token_accuracy": 0.0844, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3882, + "epoch": 2.417607223476298, + "grad_norm": 14.254829406738281, + "learning_rate": 5.445615697967797e-06, + "lm_loss": 5.4299, + "loss": 1.2952, + "step": 1071, + "text_contrastive_loss": 0.7282, + "train_positive_log_prob": -81.8976, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.5385, + "epoch": 2.419864559819413, + "grad_norm": 14.705514907836914, + "learning_rate": 5.438388693134702e-06, + "lm_loss": 5.4563, + "loss": 1.544, + "step": 1072, + "text_contrastive_loss": 0.9196, + "train_positive_log_prob": -79.1594, + "train_positive_token_accuracy": 0.0835, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3876, + "epoch": 2.4221218961625284, + "grad_norm": 13.609487533569336, + "learning_rate": 5.431160765213379e-06, + "lm_loss": 5.4411, + "loss": 1.3682, + "step": 1073, + "text_contrastive_loss": 0.873, + "train_positive_log_prob": -79.1474, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.2967, + "epoch": 2.4243792325056432, + "grad_norm": 12.19257926940918, + "learning_rate": 5.423931929423235e-06, + "lm_loss": 5.4654, + "loss": 1.1781, + "step": 1074, + "text_contrastive_loss": 0.6697, + "train_positive_log_prob": -80.4232, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3983, + "epoch": 2.4266365688487586, + "grad_norm": 12.440181732177734, + "learning_rate": 5.416702200985585e-06, + "lm_loss": 5.554, + "loss": 1.3957, + "step": 1075, + "text_contrastive_loss": 0.884, + "train_positive_log_prob": -83.4132, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3982, + "epoch": 2.4288939051918734, + "grad_norm": 12.483242988586426, + "learning_rate": 5.409471595123628e-06, + "lm_loss": 5.4045, + "loss": 1.3891, + "step": 1076, + "text_contrastive_loss": 0.901, + "train_positive_log_prob": -78.5737, + "train_positive_token_accuracy": 0.0825, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3963, + "epoch": 2.4311512415349887, + "grad_norm": 13.03343391418457, + "learning_rate": 5.4022401270624036e-06, + "lm_loss": 5.4955, + "loss": 1.2496, + "step": 1077, + "text_contrastive_loss": 0.6075, + "train_positive_log_prob": -82.3033, + "train_positive_token_accuracy": 0.0923, + "train_positive_token_prob": 0.0331 + }, + { + "contrastive_loss": 0.4213, + "epoch": 2.4334085778781036, + "grad_norm": 12.747968673706055, + "learning_rate": 5.395007812028775e-06, + "lm_loss": 5.5118, + "loss": 1.3411, + "step": 1078, + "text_contrastive_loss": 0.7373, + "train_positive_log_prob": -80.5047, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4957, + "epoch": 2.435665914221219, + "grad_norm": 14.808120727539062, + "learning_rate": 5.387774665251385e-06, + "lm_loss": 5.5048, + "loss": 1.4535, + "step": 1079, + "text_contrastive_loss": 0.8145, + "train_positive_log_prob": -83.3273, + "train_positive_token_accuracy": 0.0852, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3311, + "epoch": 2.4379232505643342, + "grad_norm": 12.2733793258667, + "learning_rate": 5.380540701960627e-06, + "lm_loss": 5.4812, + "loss": 1.2274, + "step": 1080, + "text_contrastive_loss": 0.6964, + "train_positive_log_prob": -81.4476, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3964, + "epoch": 2.440180586907449, + "grad_norm": 13.839046478271484, + "learning_rate": 5.373305937388613e-06, + "lm_loss": 5.6242, + "loss": 1.2956, + "step": 1081, + "text_contrastive_loss": 0.6735, + "train_positive_log_prob": -84.5144, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.42, + "epoch": 2.4424379232505644, + "grad_norm": 15.788115501403809, + "learning_rate": 5.3660703867691475e-06, + "lm_loss": 5.4986, + "loss": 1.4655, + "step": 1082, + "text_contrastive_loss": 0.9912, + "train_positive_log_prob": -80.5029, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3968, + "epoch": 2.4446952595936793, + "grad_norm": 13.70380973815918, + "learning_rate": 5.358834065337684e-06, + "lm_loss": 5.613, + "loss": 1.4596, + "step": 1083, + "text_contrastive_loss": 1.0029, + "train_positive_log_prob": -83.2968, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3298, + "epoch": 2.4469525959367946, + "grad_norm": 12.720730781555176, + "learning_rate": 5.3515969883313e-06, + "lm_loss": 5.6038, + "loss": 1.2963, + "step": 1084, + "text_contrastive_loss": 0.8123, + "train_positive_log_prob": -83.4635, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.3556, + "epoch": 2.44920993227991, + "grad_norm": 13.116069793701172, + "learning_rate": 5.344359170988668e-06, + "lm_loss": 5.5753, + "loss": 1.2659, + "step": 1085, + "text_contrastive_loss": 0.7055, + "train_positive_log_prob": -81.6709, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4478, + "epoch": 2.4514672686230248, + "grad_norm": 15.64147663116455, + "learning_rate": 5.337120628550016e-06, + "lm_loss": 5.489, + "loss": 1.4801, + "step": 1086, + "text_contrastive_loss": 0.9668, + "train_positive_log_prob": -82.764, + "train_positive_token_accuracy": 0.0845, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3946, + "epoch": 2.45372460496614, + "grad_norm": 12.542115211486816, + "learning_rate": 5.329881376257098e-06, + "lm_loss": 5.4888, + "loss": 1.3452, + "step": 1087, + "text_contrastive_loss": 0.8035, + "train_positive_log_prob": -81.07, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4427, + "epoch": 2.455981941309255, + "grad_norm": 15.055801391601562, + "learning_rate": 5.322641429353167e-06, + "lm_loss": 5.4291, + "loss": 1.4587, + "step": 1088, + "text_contrastive_loss": 0.9461, + "train_positive_log_prob": -81.8182, + "train_positive_token_accuracy": 0.0925, + "train_positive_token_prob": 0.0336 + }, + { + "contrastive_loss": 0.4104, + "epoch": 2.4582392776523703, + "grad_norm": 13.4984769821167, + "learning_rate": 5.315400803082934e-06, + "lm_loss": 5.4306, + "loss": 1.3392, + "step": 1089, + "text_contrastive_loss": 0.7714, + "train_positive_log_prob": -80.143, + "train_positive_token_accuracy": 0.0828, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4127, + "epoch": 2.460496613995485, + "grad_norm": 14.413800239562988, + "learning_rate": 5.308159512692544e-06, + "lm_loss": 5.5318, + "loss": 1.435, + "step": 1090, + "text_contrastive_loss": 0.9382, + "train_positive_log_prob": -81.3786, + "train_positive_token_accuracy": 0.0728, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.4988, + "epoch": 2.4627539503386005, + "grad_norm": 15.12528133392334, + "learning_rate": 5.300917573429536e-06, + "lm_loss": 5.5342, + "loss": 1.5686, + "step": 1091, + "text_contrastive_loss": 1.0328, + "train_positive_log_prob": -78.8866, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4301, + "epoch": 2.4650112866817158, + "grad_norm": 14.577112197875977, + "learning_rate": 5.293675000542822e-06, + "lm_loss": 5.613, + "loss": 1.3923, + "step": 1092, + "text_contrastive_loss": 0.8019, + "train_positive_log_prob": -84.0201, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.5108, + "epoch": 2.4672686230248306, + "grad_norm": 15.106910705566406, + "learning_rate": 5.286431809282639e-06, + "lm_loss": 5.4072, + "loss": 1.5167, + "step": 1093, + "text_contrastive_loss": 0.9303, + "train_positive_log_prob": -80.0071, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3982, + "epoch": 2.469525959367946, + "grad_norm": 13.1541166305542, + "learning_rate": 5.279188014900537e-06, + "lm_loss": 5.4555, + "loss": 1.2875, + "step": 1094, + "text_contrastive_loss": 0.6874, + "train_positive_log_prob": -79.1284, + "train_positive_token_accuracy": 0.0848, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.4176, + "epoch": 2.471783295711061, + "grad_norm": 13.657038688659668, + "learning_rate": 5.2719436326493255e-06, + "lm_loss": 5.5483, + "loss": 1.3841, + "step": 1095, + "text_contrastive_loss": 0.8235, + "train_positive_log_prob": -83.332, + "train_positive_token_accuracy": 0.0676, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.4231, + "epoch": 2.474040632054176, + "grad_norm": 13.24435043334961, + "learning_rate": 5.26469867778306e-06, + "lm_loss": 5.5449, + "loss": 1.3485, + "step": 1096, + "text_contrastive_loss": 0.7419, + "train_positive_log_prob": -81.6579, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3658, + "epoch": 2.476297968397291, + "grad_norm": 11.30309009552002, + "learning_rate": 5.257453165556996e-06, + "lm_loss": 5.4812, + "loss": 1.2899, + "step": 1097, + "text_contrastive_loss": 0.752, + "train_positive_log_prob": -81.9436, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.3655, + "epoch": 2.4785553047404063, + "grad_norm": 12.997236251831055, + "learning_rate": 5.2502071112275675e-06, + "lm_loss": 5.5637, + "loss": 1.3163, + "step": 1098, + "text_contrastive_loss": 0.7888, + "train_positive_log_prob": -82.4997, + "train_positive_token_accuracy": 0.0827, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3802, + "epoch": 2.4808126410835216, + "grad_norm": 13.113594055175781, + "learning_rate": 5.242960530052344e-06, + "lm_loss": 5.4369, + "loss": 1.3944, + "step": 1099, + "text_contrastive_loss": 0.9408, + "train_positive_log_prob": -78.9868, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3574, + "epoch": 2.4830699774266365, + "grad_norm": 12.079339981079102, + "learning_rate": 5.235713437290012e-06, + "lm_loss": 5.4745, + "loss": 1.3933, + "step": 1100, + "text_contrastive_loss": 0.9769, + "train_positive_log_prob": -81.7971, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.3884, + "epoch": 2.485327313769752, + "grad_norm": 13.166781425476074, + "learning_rate": 5.228465848200327e-06, + "lm_loss": 5.569, + "loss": 1.2961, + "step": 1101, + "text_contrastive_loss": 0.7016, + "train_positive_log_prob": -79.4835, + "train_positive_token_accuracy": 0.0914, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.522, + "epoch": 2.4875846501128667, + "grad_norm": 16.098726272583008, + "learning_rate": 5.221217778044096e-06, + "lm_loss": 5.4566, + "loss": 1.4602, + "step": 1102, + "text_contrastive_loss": 0.785, + "train_positive_log_prob": -80.1628, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3885, + "epoch": 2.489841986455982, + "grad_norm": 14.24079418182373, + "learning_rate": 5.2139692420831325e-06, + "lm_loss": 5.6174, + "loss": 1.3326, + "step": 1103, + "text_contrastive_loss": 0.7646, + "train_positive_log_prob": -84.0461, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4124, + "epoch": 2.4920993227990973, + "grad_norm": 12.878908157348633, + "learning_rate": 5.206720255580241e-06, + "lm_loss": 5.4701, + "loss": 1.3421, + "step": 1104, + "text_contrastive_loss": 0.7653, + "train_positive_log_prob": -80.3627, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.482, + "epoch": 2.494356659142212, + "grad_norm": 16.01677131652832, + "learning_rate": 5.199470833799164e-06, + "lm_loss": 5.5926, + "loss": 1.4655, + "step": 1105, + "text_contrastive_loss": 0.8485, + "train_positive_log_prob": -84.4975, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.2894, + "epoch": 2.4966139954853275, + "grad_norm": 10.986605644226074, + "learning_rate": 5.192220992004569e-06, + "lm_loss": 5.5843, + "loss": 1.1929, + "step": 1106, + "text_contrastive_loss": 0.6902, + "train_positive_log_prob": -83.3476, + "train_positive_token_accuracy": 0.073, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.5292, + "epoch": 2.4988713318284423, + "grad_norm": 15.666089057922363, + "learning_rate": 5.184970745461998e-06, + "lm_loss": 5.6315, + "loss": 1.5015, + "step": 1107, + "text_contrastive_loss": 0.8183, + "train_positive_log_prob": -84.0975, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.4, + "epoch": 2.5011286681715577, + "grad_norm": 13.067605018615723, + "learning_rate": 5.177720109437857e-06, + "lm_loss": 5.5512, + "loss": 1.3341, + "step": 1108, + "text_contrastive_loss": 0.7581, + "train_positive_log_prob": -82.9831, + "train_positive_token_accuracy": 0.0807, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3199, + "epoch": 2.5033860045146725, + "grad_norm": 12.18812084197998, + "learning_rate": 5.170469099199363e-06, + "lm_loss": 5.4483, + "loss": 1.2514, + "step": 1109, + "text_contrastive_loss": 0.7735, + "train_positive_log_prob": -78.2362, + "train_positive_token_accuracy": 0.0871, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.39, + "epoch": 2.505643340857788, + "grad_norm": 13.79318904876709, + "learning_rate": 5.1632177300145255e-06, + "lm_loss": 5.4453, + "loss": 1.3423, + "step": 1110, + "text_contrastive_loss": 0.8156, + "train_positive_log_prob": -80.9976, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.5187, + "epoch": 2.5079006772009027, + "grad_norm": 14.890632629394531, + "learning_rate": 5.155966017152108e-06, + "lm_loss": 5.3921, + "loss": 1.4984, + "step": 1111, + "text_contrastive_loss": 0.8809, + "train_positive_log_prob": -81.3691, + "train_positive_token_accuracy": 0.0839, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.4743, + "epoch": 2.510158013544018, + "grad_norm": 15.479884147644043, + "learning_rate": 5.148713975881598e-06, + "lm_loss": 5.4656, + "loss": 1.4291, + "step": 1112, + "text_contrastive_loss": 0.8165, + "train_positive_log_prob": -80.7669, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4181, + "epoch": 2.5124153498871333, + "grad_norm": 13.727455139160156, + "learning_rate": 5.141461621473175e-06, + "lm_loss": 5.6069, + "loss": 1.3524, + "step": 1113, + "text_contrastive_loss": 0.7474, + "train_positive_log_prob": -83.4925, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.4208, + "epoch": 2.514672686230248, + "grad_norm": 13.012874603271484, + "learning_rate": 5.1342089691976794e-06, + "lm_loss": 5.58, + "loss": 1.419, + "step": 1114, + "text_contrastive_loss": 0.8805, + "train_positive_log_prob": -81.449, + "train_positive_token_accuracy": 0.0741, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3197, + "epoch": 2.5169300225733635, + "grad_norm": 11.326669692993164, + "learning_rate": 5.126956034326573e-06, + "lm_loss": 5.4515, + "loss": 1.2929, + "step": 1115, + "text_contrastive_loss": 0.8562, + "train_positive_log_prob": -80.0263, + "train_positive_token_accuracy": 0.0732, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3155, + "epoch": 2.5191873589164784, + "grad_norm": 14.080465316772461, + "learning_rate": 5.119702832131922e-06, + "lm_loss": 5.5236, + "loss": 1.3119, + "step": 1116, + "text_contrastive_loss": 0.888, + "train_positive_log_prob": -82.1657, + "train_positive_token_accuracy": 0.0872, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.4016, + "epoch": 2.5214446952595937, + "grad_norm": 12.966772079467773, + "learning_rate": 5.112449377886345e-06, + "lm_loss": 5.5214, + "loss": 1.3804, + "step": 1117, + "text_contrastive_loss": 0.8533, + "train_positive_log_prob": -82.4786, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.374, + "epoch": 2.523702031602709, + "grad_norm": 12.530630111694336, + "learning_rate": 5.105195686863e-06, + "lm_loss": 5.59, + "loss": 1.3039, + "step": 1118, + "text_contrastive_loss": 0.7418, + "train_positive_log_prob": -82.7347, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3903, + "epoch": 2.525959367945824, + "grad_norm": 13.684558868408203, + "learning_rate": 5.097941774335538e-06, + "lm_loss": 5.4456, + "loss": 1.2989, + "step": 1119, + "text_contrastive_loss": 0.728, + "train_positive_log_prob": -80.5991, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4472, + "epoch": 2.528216704288939, + "grad_norm": 13.693355560302734, + "learning_rate": 5.090687655578078e-06, + "lm_loss": 5.5877, + "loss": 1.4589, + "step": 1120, + "text_contrastive_loss": 0.9059, + "train_positive_log_prob": -84.034, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.465, + "epoch": 2.530474040632054, + "grad_norm": 15.150816917419434, + "learning_rate": 5.083433345865175e-06, + "lm_loss": 5.4881, + "loss": 1.4266, + "step": 1121, + "text_contrastive_loss": 0.8256, + "train_positive_log_prob": -80.0279, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.4141, + "epoch": 2.5327313769751694, + "grad_norm": 13.404349327087402, + "learning_rate": 5.076178860471787e-06, + "lm_loss": 5.4936, + "loss": 1.3259, + "step": 1122, + "text_contrastive_loss": 0.7249, + "train_positive_log_prob": -81.142, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.4645, + "epoch": 2.5349887133182847, + "grad_norm": 13.493803977966309, + "learning_rate": 5.068924214673234e-06, + "lm_loss": 5.3862, + "loss": 1.4098, + "step": 1123, + "text_contrastive_loss": 0.8135, + "train_positive_log_prob": -78.2877, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.4092, + "epoch": 2.5372460496613995, + "grad_norm": 13.771960258483887, + "learning_rate": 5.061669423745185e-06, + "lm_loss": 5.4045, + "loss": 1.3754, + "step": 1124, + "text_contrastive_loss": 0.8514, + "train_positive_log_prob": -79.1246, + "train_positive_token_accuracy": 0.0824, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.388, + "epoch": 2.5395033860045144, + "grad_norm": 16.861345291137695, + "learning_rate": 5.054414502963605e-06, + "lm_loss": 5.5324, + "loss": 1.3435, + "step": 1125, + "text_contrastive_loss": 0.8044, + "train_positive_log_prob": -82.7506, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.384, + "epoch": 2.5417607223476297, + "grad_norm": 12.91841983795166, + "learning_rate": 5.0471594676047385e-06, + "lm_loss": 5.5547, + "loss": 1.3424, + "step": 1126, + "text_contrastive_loss": 0.8058, + "train_positive_log_prob": -81.9812, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.351, + "epoch": 2.544018058690745, + "grad_norm": 12.802785873413086, + "learning_rate": 5.039904332945069e-06, + "lm_loss": 5.3757, + "loss": 1.2313, + "step": 1127, + "text_contrastive_loss": 0.6855, + "train_positive_log_prob": -77.9244, + "train_positive_token_accuracy": 0.0903, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.4422, + "epoch": 2.54627539503386, + "grad_norm": 13.323243141174316, + "learning_rate": 5.03264911426129e-06, + "lm_loss": 5.3897, + "loss": 1.3628, + "step": 1128, + "text_contrastive_loss": 0.7634, + "train_positive_log_prob": -79.1607, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.4727, + "epoch": 2.5485327313769752, + "grad_norm": 13.163284301757812, + "learning_rate": 5.025393826830267e-06, + "lm_loss": 5.5807, + "loss": 1.4586, + "step": 1129, + "text_contrastive_loss": 0.8558, + "train_positive_log_prob": -82.3239, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4547, + "epoch": 2.55079006772009, + "grad_norm": 12.482891082763672, + "learning_rate": 5.0181384859290215e-06, + "lm_loss": 5.5216, + "loss": 1.4058, + "step": 1130, + "text_contrastive_loss": 0.798, + "train_positive_log_prob": -81.6846, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.4307, + "epoch": 2.5530474040632054, + "grad_norm": 14.769231796264648, + "learning_rate": 5.010883106834676e-06, + "lm_loss": 5.3878, + "loss": 1.421, + "step": 1131, + "text_contrastive_loss": 0.9031, + "train_positive_log_prob": -80.797, + "train_positive_token_accuracy": 0.0826, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.4768, + "epoch": 2.5553047404063207, + "grad_norm": 16.11454200744629, + "learning_rate": 5.003627704824438e-06, + "lm_loss": 5.4641, + "loss": 1.4463, + "step": 1132, + "text_contrastive_loss": 0.8461, + "train_positive_log_prob": -79.657, + "train_positive_token_accuracy": 0.0855, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.4315, + "epoch": 2.5575620767494356, + "grad_norm": 14.113448143005371, + "learning_rate": 4.996372295175563e-06, + "lm_loss": 5.4709, + "loss": 1.3318, + "step": 1133, + "text_contrastive_loss": 0.7065, + "train_positive_log_prob": -81.1628, + "train_positive_token_accuracy": 0.08, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3645, + "epoch": 2.559819413092551, + "grad_norm": 12.816051483154297, + "learning_rate": 4.989116893165325e-06, + "lm_loss": 5.4141, + "loss": 1.2573, + "step": 1134, + "text_contrastive_loss": 0.7027, + "train_positive_log_prob": -80.1974, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4965, + "epoch": 2.5620767494356658, + "grad_norm": 14.619532585144043, + "learning_rate": 4.981861514070979e-06, + "lm_loss": 5.4826, + "loss": 1.5327, + "step": 1135, + "text_contrastive_loss": 0.9758, + "train_positive_log_prob": -81.0369, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4737, + "epoch": 2.564334085778781, + "grad_norm": 14.578652381896973, + "learning_rate": 4.974606173169733e-06, + "lm_loss": 5.4658, + "loss": 1.4985, + "step": 1136, + "text_contrastive_loss": 0.9563, + "train_positive_log_prob": -81.2174, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.4365, + "epoch": 2.5665914221218964, + "grad_norm": 15.102015495300293, + "learning_rate": 4.9673508857387115e-06, + "lm_loss": 5.3932, + "loss": 1.3598, + "step": 1137, + "text_contrastive_loss": 0.7679, + "train_positive_log_prob": -79.8424, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4149, + "epoch": 2.5688487584650113, + "grad_norm": 13.778441429138184, + "learning_rate": 4.9600956670549324e-06, + "lm_loss": 5.5632, + "loss": 1.3688, + "step": 1138, + "text_contrastive_loss": 0.795, + "train_positive_log_prob": -83.3824, + "train_positive_token_accuracy": 0.0725, + "train_positive_token_prob": 0.0286 + }, + { + "contrastive_loss": 0.3971, + "epoch": 2.5711060948081266, + "grad_norm": 12.175816535949707, + "learning_rate": 4.952840532395262e-06, + "lm_loss": 5.5423, + "loss": 1.3516, + "step": 1139, + "text_contrastive_loss": 0.8005, + "train_positive_log_prob": -84.2209, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4395, + "epoch": 2.5733634311512414, + "grad_norm": 11.704392433166504, + "learning_rate": 4.945585497036396e-06, + "lm_loss": 5.5114, + "loss": 1.3733, + "step": 1140, + "text_contrastive_loss": 0.7652, + "train_positive_log_prob": -79.3102, + "train_positive_token_accuracy": 0.0695, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4203, + "epoch": 2.5756207674943568, + "grad_norm": 15.361371994018555, + "learning_rate": 4.938330576254817e-06, + "lm_loss": 5.4189, + "loss": 1.3165, + "step": 1141, + "text_contrastive_loss": 0.7086, + "train_positive_log_prob": -82.0675, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.4587, + "epoch": 2.5778781038374716, + "grad_norm": 14.702521324157715, + "learning_rate": 4.931075785326767e-06, + "lm_loss": 5.4207, + "loss": 1.3999, + "step": 1142, + "text_contrastive_loss": 0.7983, + "train_positive_log_prob": -81.4601, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.4346, + "epoch": 2.580135440180587, + "grad_norm": 13.952679634094238, + "learning_rate": 4.9238211395282156e-06, + "lm_loss": 5.3906, + "loss": 1.3682, + "step": 1143, + "text_contrastive_loss": 0.7891, + "train_positive_log_prob": -77.574, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3194, + "epoch": 2.582392776523702, + "grad_norm": 12.316725730895996, + "learning_rate": 4.9165666541348265e-06, + "lm_loss": 5.5005, + "loss": 1.2573, + "step": 1144, + "text_contrastive_loss": 0.7758, + "train_positive_log_prob": -82.0953, + "train_positive_token_accuracy": 0.0712, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.5058, + "epoch": 2.584650112866817, + "grad_norm": 14.519919395446777, + "learning_rate": 4.909312344421923e-06, + "lm_loss": 5.5282, + "loss": 1.4922, + "step": 1145, + "text_contrastive_loss": 0.8671, + "train_positive_log_prob": -82.5309, + "train_positive_token_accuracy": 0.0748, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.4073, + "epoch": 2.5869074492099324, + "grad_norm": 13.38963794708252, + "learning_rate": 4.902058225664465e-06, + "lm_loss": 5.3844, + "loss": 1.3561, + "step": 1146, + "text_contrastive_loss": 0.8208, + "train_positive_log_prob": -77.3817, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3824, + "epoch": 2.5891647855530473, + "grad_norm": 14.860939025878906, + "learning_rate": 4.8948043131370025e-06, + "lm_loss": 5.5304, + "loss": 1.3665, + "step": 1147, + "text_contrastive_loss": 0.8621, + "train_positive_log_prob": -81.1859, + "train_positive_token_accuracy": 0.0849, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3676, + "epoch": 2.5914221218961626, + "grad_norm": 13.177186965942383, + "learning_rate": 4.887550622113657e-06, + "lm_loss": 5.4172, + "loss": 1.2679, + "step": 1148, + "text_contrastive_loss": 0.717, + "train_positive_log_prob": -78.2341, + "train_positive_token_accuracy": 0.0866, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3827, + "epoch": 2.5936794582392775, + "grad_norm": 13.161023139953613, + "learning_rate": 4.88029716786808e-06, + "lm_loss": 5.5674, + "loss": 1.3092, + "step": 1149, + "text_contrastive_loss": 0.7397, + "train_positive_log_prob": -84.0758, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4661, + "epoch": 2.595936794582393, + "grad_norm": 14.401485443115234, + "learning_rate": 4.873043965673427e-06, + "lm_loss": 5.4896, + "loss": 1.4969, + "step": 1150, + "text_contrastive_loss": 0.9637, + "train_positive_log_prob": -82.2403, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.5713, + "epoch": 2.598194130925508, + "grad_norm": 16.998815536499023, + "learning_rate": 4.8657910308023205e-06, + "lm_loss": 5.6023, + "loss": 1.6205, + "step": 1151, + "text_contrastive_loss": 0.9779, + "train_positive_log_prob": -82.4184, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.3023, + "epoch": 2.600451467268623, + "grad_norm": 12.241743087768555, + "learning_rate": 4.858538378526825e-06, + "lm_loss": 5.4043, + "loss": 1.1452, + "step": 1152, + "text_contrastive_loss": 0.605, + "train_positive_log_prob": -78.5094, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.4277, + "epoch": 2.6027088036117383, + "grad_norm": 13.409485816955566, + "learning_rate": 4.851286024118402e-06, + "lm_loss": 5.4495, + "loss": 1.3611, + "step": 1153, + "text_contrastive_loss": 0.7768, + "train_positive_log_prob": -79.4225, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.4506, + "epoch": 2.604966139954853, + "grad_norm": 13.712029457092285, + "learning_rate": 4.844033982847893e-06, + "lm_loss": 5.5141, + "loss": 1.4575, + "step": 1154, + "text_contrastive_loss": 0.911, + "train_positive_log_prob": -79.9985, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3959, + "epoch": 2.6072234762979685, + "grad_norm": 14.144770622253418, + "learning_rate": 4.836782269985475e-06, + "lm_loss": 5.4278, + "loss": 1.3786, + "step": 1155, + "text_contrastive_loss": 0.8799, + "train_positive_log_prob": -77.1791, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3624, + "epoch": 2.609480812641084, + "grad_norm": 13.455986976623535, + "learning_rate": 4.829530900800638e-06, + "lm_loss": 5.5511, + "loss": 1.3704, + "step": 1156, + "text_contrastive_loss": 0.9056, + "train_positive_log_prob": -81.7055, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4301, + "epoch": 2.6117381489841986, + "grad_norm": 13.076140403747559, + "learning_rate": 4.8222798905621445e-06, + "lm_loss": 5.5047, + "loss": 1.3979, + "step": 1157, + "text_contrastive_loss": 0.8346, + "train_positive_log_prob": -80.441, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3724, + "epoch": 2.6139954853273135, + "grad_norm": 12.173945426940918, + "learning_rate": 4.815029254538003e-06, + "lm_loss": 5.4343, + "loss": 1.2759, + "step": 1158, + "text_contrastive_loss": 0.7201, + "train_positive_log_prob": -80.282, + "train_positive_token_accuracy": 0.0709, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.5704, + "epoch": 2.616252821670429, + "grad_norm": 18.74212074279785, + "learning_rate": 4.807779007995434e-06, + "lm_loss": 5.4498, + "loss": 1.5407, + "step": 1159, + "text_contrastive_loss": 0.8507, + "train_positive_log_prob": -78.973, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.2796, + "epoch": 2.618510158013544, + "grad_norm": 12.043617248535156, + "learning_rate": 4.800529166200837e-06, + "lm_loss": 5.4506, + "loss": 1.2163, + "step": 1160, + "text_contrastive_loss": 0.7833, + "train_positive_log_prob": -80.3766, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3519, + "epoch": 2.620767494356659, + "grad_norm": 11.911659240722656, + "learning_rate": 4.7932797444197604e-06, + "lm_loss": 5.5581, + "loss": 1.3302, + "step": 1161, + "text_contrastive_loss": 0.8449, + "train_positive_log_prob": -86.5841, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.4986, + "epoch": 2.6230248306997743, + "grad_norm": 14.69483470916748, + "learning_rate": 4.786030757916868e-06, + "lm_loss": 5.6034, + "loss": 1.5338, + "step": 1162, + "text_contrastive_loss": 0.9499, + "train_positive_log_prob": -83.5707, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4245, + "epoch": 2.625282167042889, + "grad_norm": 14.035123825073242, + "learning_rate": 4.778782221955907e-06, + "lm_loss": 5.4689, + "loss": 1.3534, + "step": 1163, + "text_contrastive_loss": 0.764, + "train_positive_log_prob": -79.9811, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3722, + "epoch": 2.6275395033860045, + "grad_norm": 12.763521194458008, + "learning_rate": 4.771534151799676e-06, + "lm_loss": 5.4592, + "loss": 1.3379, + "step": 1164, + "text_contrastive_loss": 0.8396, + "train_positive_log_prob": -82.5483, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.4304, + "epoch": 2.62979683972912, + "grad_norm": 12.734062194824219, + "learning_rate": 4.76428656270999e-06, + "lm_loss": 5.5827, + "loss": 1.3695, + "step": 1165, + "text_contrastive_loss": 0.7616, + "train_positive_log_prob": -82.6295, + "train_positive_token_accuracy": 0.0866, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.38, + "epoch": 2.6320541760722347, + "grad_norm": 13.003334045410156, + "learning_rate": 4.757039469947658e-06, + "lm_loss": 5.4456, + "loss": 1.3414, + "step": 1166, + "text_contrastive_loss": 0.8335, + "train_positive_log_prob": -82.5373, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3576, + "epoch": 2.63431151241535, + "grad_norm": 12.10800552368164, + "learning_rate": 4.7497928887724325e-06, + "lm_loss": 5.6071, + "loss": 1.3052, + "step": 1167, + "text_contrastive_loss": 0.7739, + "train_positive_log_prob": -83.0529, + "train_positive_token_accuracy": 0.0709, + "train_positive_token_prob": 0.029 + }, + { + "contrastive_loss": 0.3748, + "epoch": 2.636568848758465, + "grad_norm": 13.078047752380371, + "learning_rate": 4.7425468344430035e-06, + "lm_loss": 5.4195, + "loss": 1.2992, + "step": 1168, + "text_contrastive_loss": 0.7648, + "train_positive_log_prob": -78.6478, + "train_positive_token_accuracy": 0.0853, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3788, + "epoch": 2.63882618510158, + "grad_norm": 13.133106231689453, + "learning_rate": 4.73530132221694e-06, + "lm_loss": 5.4447, + "loss": 1.2704, + "step": 1169, + "text_contrastive_loss": 0.6943, + "train_positive_log_prob": -82.1405, + "train_positive_token_accuracy": 0.0935, + "train_positive_token_prob": 0.033 + }, + { + "contrastive_loss": 0.4071, + "epoch": 2.6410835214446955, + "grad_norm": 11.7039213180542, + "learning_rate": 4.7280563673506745e-06, + "lm_loss": 5.5345, + "loss": 1.3117, + "step": 1170, + "text_contrastive_loss": 0.7022, + "train_positive_log_prob": -83.8643, + "train_positive_token_accuracy": 0.0833, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3878, + "epoch": 2.6433408577878104, + "grad_norm": 14.071136474609375, + "learning_rate": 4.720811985099464e-06, + "lm_loss": 5.5199, + "loss": 1.3677, + "step": 1171, + "text_contrastive_loss": 0.8559, + "train_positive_log_prob": -81.6313, + "train_positive_token_accuracy": 0.0703, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4178, + "epoch": 2.6455981941309257, + "grad_norm": 14.577308654785156, + "learning_rate": 4.713568190717362e-06, + "lm_loss": 5.5622, + "loss": 1.3914, + "step": 1172, + "text_contrastive_loss": 0.8348, + "train_positive_log_prob": -82.2173, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3357, + "epoch": 2.6478555304740405, + "grad_norm": 13.793277740478516, + "learning_rate": 4.70632499945718e-06, + "lm_loss": 5.457, + "loss": 1.2923, + "step": 1173, + "text_contrastive_loss": 0.8218, + "train_positive_log_prob": -80.9398, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3966, + "epoch": 2.650112866817156, + "grad_norm": 12.712090492248535, + "learning_rate": 4.699082426570465e-06, + "lm_loss": 5.455, + "loss": 1.4072, + "step": 1174, + "text_contrastive_loss": 0.9301, + "train_positive_log_prob": -79.7999, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4478, + "epoch": 2.6523702031602707, + "grad_norm": 14.293423652648926, + "learning_rate": 4.6918404873074574e-06, + "lm_loss": 5.4744, + "loss": 1.4296, + "step": 1175, + "text_contrastive_loss": 0.8687, + "train_positive_log_prob": -82.2906, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3749, + "epoch": 2.654627539503386, + "grad_norm": 13.062413215637207, + "learning_rate": 4.684599196917067e-06, + "lm_loss": 5.5221, + "loss": 1.2666, + "step": 1176, + "text_contrastive_loss": 0.679, + "train_positive_log_prob": -81.818, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3527, + "epoch": 2.656884875846501, + "grad_norm": 12.62744140625, + "learning_rate": 4.677358570646834e-06, + "lm_loss": 5.4972, + "loss": 1.2916, + "step": 1177, + "text_contrastive_loss": 0.7783, + "train_positive_log_prob": -81.1665, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4143, + "epoch": 2.659142212189616, + "grad_norm": 15.637986183166504, + "learning_rate": 4.670118623742904e-06, + "lm_loss": 5.5146, + "loss": 1.3633, + "step": 1178, + "text_contrastive_loss": 0.7952, + "train_positive_log_prob": -81.7256, + "train_positive_token_accuracy": 0.0714, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.3955, + "epoch": 2.6613995485327315, + "grad_norm": 14.06098747253418, + "learning_rate": 4.662879371449987e-06, + "lm_loss": 5.4452, + "loss": 1.3342, + "step": 1179, + "text_contrastive_loss": 0.7884, + "train_positive_log_prob": -80.9345, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3519, + "epoch": 2.6636568848758464, + "grad_norm": 12.122421264648438, + "learning_rate": 4.655640829011335e-06, + "lm_loss": 5.4287, + "loss": 1.2841, + "step": 1180, + "text_contrastive_loss": 0.7787, + "train_positive_log_prob": -79.4815, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3221, + "epoch": 2.6659142212189617, + "grad_norm": 13.791107177734375, + "learning_rate": 4.6484030116687014e-06, + "lm_loss": 5.468, + "loss": 1.2268, + "step": 1181, + "text_contrastive_loss": 0.7159, + "train_positive_log_prob": -82.3194, + "train_positive_token_accuracy": 0.0898, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4163, + "epoch": 2.6681715575620766, + "grad_norm": 13.158355712890625, + "learning_rate": 4.64116593466232e-06, + "lm_loss": 5.4583, + "loss": 1.3262, + "step": 1182, + "text_contrastive_loss": 0.7282, + "train_positive_log_prob": -80.3141, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3014, + "epoch": 2.670428893905192, + "grad_norm": 11.234637260437012, + "learning_rate": 4.633929613230855e-06, + "lm_loss": 5.4815, + "loss": 1.2485, + "step": 1183, + "text_contrastive_loss": 0.798, + "train_positive_log_prob": -81.3262, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3415, + "epoch": 2.672686230248307, + "grad_norm": 13.206541061401367, + "learning_rate": 4.626694062611387e-06, + "lm_loss": 5.4586, + "loss": 1.2562, + "step": 1184, + "text_contrastive_loss": 0.7378, + "train_positive_log_prob": -82.3016, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3633, + "epoch": 2.674943566591422, + "grad_norm": 14.855770111083984, + "learning_rate": 4.619459298039373e-06, + "lm_loss": 5.4835, + "loss": 1.2701, + "step": 1185, + "text_contrastive_loss": 0.7169, + "train_positive_log_prob": -81.9799, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3584, + "epoch": 2.6772009029345374, + "grad_norm": 12.548898696899414, + "learning_rate": 4.612225334748616e-06, + "lm_loss": 5.4959, + "loss": 1.2311, + "step": 1186, + "text_contrastive_loss": 0.6463, + "train_positive_log_prob": -84.2821, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0292 + }, + { + "contrastive_loss": 0.3771, + "epoch": 2.6794582392776523, + "grad_norm": 15.054156303405762, + "learning_rate": 4.6049921879712254e-06, + "lm_loss": 5.4524, + "loss": 1.2866, + "step": 1187, + "text_contrastive_loss": 0.7285, + "train_positive_log_prob": -81.2542, + "train_positive_token_accuracy": 0.0828, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.4091, + "epoch": 2.6817155756207676, + "grad_norm": 12.37720775604248, + "learning_rate": 4.597759872937597e-06, + "lm_loss": 5.4666, + "loss": 1.3247, + "step": 1188, + "text_contrastive_loss": 0.738, + "train_positive_log_prob": -79.8199, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.4664, + "epoch": 2.683972911963883, + "grad_norm": 14.28339672088623, + "learning_rate": 4.590528404876374e-06, + "lm_loss": 5.4162, + "loss": 1.4403, + "step": 1189, + "text_contrastive_loss": 0.8646, + "train_positive_log_prob": -80.7148, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.44, + "epoch": 2.6862302483069977, + "grad_norm": 13.1124906539917, + "learning_rate": 4.5832977990144165e-06, + "lm_loss": 5.4942, + "loss": 1.3982, + "step": 1190, + "text_contrastive_loss": 0.8175, + "train_positive_log_prob": -79.7122, + "train_positive_token_accuracy": 0.0845, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3829, + "epoch": 2.6884875846501126, + "grad_norm": 13.71042251586914, + "learning_rate": 4.5760680705767665e-06, + "lm_loss": 5.3892, + "loss": 1.2428, + "step": 1191, + "text_contrastive_loss": 0.6419, + "train_positive_log_prob": -79.7473, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3628, + "epoch": 2.690744920993228, + "grad_norm": 12.924747467041016, + "learning_rate": 4.5688392347866226e-06, + "lm_loss": 5.4922, + "loss": 1.2771, + "step": 1192, + "text_contrastive_loss": 0.7302, + "train_positive_log_prob": -81.8459, + "train_positive_token_accuracy": 0.0744, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3661, + "epoch": 2.6930022573363432, + "grad_norm": 13.6475830078125, + "learning_rate": 4.561611306865299e-06, + "lm_loss": 5.546, + "loss": 1.2914, + "step": 1193, + "text_contrastive_loss": 0.7414, + "train_positive_log_prob": -83.2363, + "train_positive_token_accuracy": 0.0841, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3617, + "epoch": 2.695259593679458, + "grad_norm": 14.189035415649414, + "learning_rate": 4.554384302032204e-06, + "lm_loss": 5.4608, + "loss": 1.2822, + "step": 1194, + "text_contrastive_loss": 0.7489, + "train_positive_log_prob": -80.2858, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.4459, + "epoch": 2.6975169300225734, + "grad_norm": 13.431438446044922, + "learning_rate": 4.547158235504797e-06, + "lm_loss": 5.5341, + "loss": 1.4387, + "step": 1195, + "text_contrastive_loss": 0.8787, + "train_positive_log_prob": -82.2988, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4075, + "epoch": 2.6997742663656883, + "grad_norm": 12.916892051696777, + "learning_rate": 4.539933122498566e-06, + "lm_loss": 5.4545, + "loss": 1.4186, + "step": 1196, + "text_contrastive_loss": 0.9313, + "train_positive_log_prob": -81.626, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.2579, + "epoch": 2.7020316027088036, + "grad_norm": 13.460196495056152, + "learning_rate": 4.532708978226987e-06, + "lm_loss": 5.3946, + "loss": 1.144, + "step": 1197, + "text_contrastive_loss": 0.6933, + "train_positive_log_prob": -78.9772, + "train_positive_token_accuracy": 0.0846, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.328, + "epoch": 2.704288939051919, + "grad_norm": 13.811583518981934, + "learning_rate": 4.525485817901499e-06, + "lm_loss": 5.561, + "loss": 1.3221, + "step": 1198, + "text_contrastive_loss": 0.876, + "train_positive_log_prob": -84.3075, + "train_positive_token_accuracy": 0.0742, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.4166, + "epoch": 2.706546275395034, + "grad_norm": 12.972644805908203, + "learning_rate": 4.518263656731468e-06, + "lm_loss": 5.4697, + "loss": 1.4242, + "step": 1199, + "text_contrastive_loss": 0.9213, + "train_positive_log_prob": -81.43, + "train_positive_token_accuracy": 0.0837, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.4648, + "epoch": 2.708803611738149, + "grad_norm": 14.613834381103516, + "learning_rate": 4.511042509924157e-06, + "lm_loss": 5.4514, + "loss": 1.4545, + "step": 1200, + "text_contrastive_loss": 0.8891, + "train_positive_log_prob": -80.963, + "train_positive_token_accuracy": 0.0807, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.4774, + "epoch": 2.711060948081264, + "grad_norm": 14.243812561035156, + "learning_rate": 4.5038223926846905e-06, + "lm_loss": 5.5343, + "loss": 1.4871, + "step": 1201, + "text_contrastive_loss": 0.9125, + "train_positive_log_prob": -81.9726, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4903, + "epoch": 2.7133182844243793, + "grad_norm": 16.595163345336914, + "learning_rate": 4.49660332021603e-06, + "lm_loss": 5.4355, + "loss": 1.5449, + "step": 1202, + "text_contrastive_loss": 1.022, + "train_positive_log_prob": -78.8414, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4398, + "epoch": 2.7155756207674946, + "grad_norm": 13.141173362731934, + "learning_rate": 4.489385307718934e-06, + "lm_loss": 5.5469, + "loss": 1.3926, + "step": 1203, + "text_contrastive_loss": 0.7962, + "train_positive_log_prob": -80.2572, + "train_positive_token_accuracy": 0.0686, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.341, + "epoch": 2.7178329571106095, + "grad_norm": 12.069439888000488, + "learning_rate": 4.482168370391931e-06, + "lm_loss": 5.5183, + "loss": 1.2653, + "step": 1204, + "text_contrastive_loss": 0.745, + "train_positive_log_prob": -81.7843, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3944, + "epoch": 2.7200902934537243, + "grad_norm": 13.135692596435547, + "learning_rate": 4.47495252343128e-06, + "lm_loss": 5.5076, + "loss": 1.3911, + "step": 1205, + "text_contrastive_loss": 0.8918, + "train_positive_log_prob": -83.9915, + "train_positive_token_accuracy": 0.083, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3732, + "epoch": 2.7223476297968396, + "grad_norm": 12.680439949035645, + "learning_rate": 4.467737782030951e-06, + "lm_loss": 5.6225, + "loss": 1.3141, + "step": 1206, + "text_contrastive_loss": 0.7573, + "train_positive_log_prob": -84.8032, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3354, + "epoch": 2.724604966139955, + "grad_norm": 11.35401439666748, + "learning_rate": 4.460524161382582e-06, + "lm_loss": 5.3954, + "loss": 1.2194, + "step": 1207, + "text_contrastive_loss": 0.6888, + "train_positive_log_prob": -80.2996, + "train_positive_token_accuracy": 0.0814, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3585, + "epoch": 2.72686230248307, + "grad_norm": 11.742429733276367, + "learning_rate": 4.453311676675453e-06, + "lm_loss": 5.4571, + "loss": 1.2834, + "step": 1208, + "text_contrastive_loss": 0.7582, + "train_positive_log_prob": -82.4978, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.4639, + "epoch": 2.729119638826185, + "grad_norm": 13.734712600708008, + "learning_rate": 4.44610034309645e-06, + "lm_loss": 5.4073, + "loss": 1.3912, + "step": 1209, + "text_contrastive_loss": 0.7731, + "train_positive_log_prob": -81.0474, + "train_positive_token_accuracy": 0.0873, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3075, + "epoch": 2.7313769751693, + "grad_norm": 12.798529624938965, + "learning_rate": 4.438890175830039e-06, + "lm_loss": 5.6043, + "loss": 1.2357, + "step": 1210, + "text_contrastive_loss": 0.7356, + "train_positive_log_prob": -83.4316, + "train_positive_token_accuracy": 0.0748, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.4449, + "epoch": 2.7336343115124153, + "grad_norm": 14.952999114990234, + "learning_rate": 4.431681190058224e-06, + "lm_loss": 5.4193, + "loss": 1.4794, + "step": 1211, + "text_contrastive_loss": 0.985, + "train_positive_log_prob": -80.3201, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3684, + "epoch": 2.7358916478555306, + "grad_norm": 11.61523723602295, + "learning_rate": 4.42447340096053e-06, + "lm_loss": 5.4363, + "loss": 1.2537, + "step": 1212, + "text_contrastive_loss": 0.6834, + "train_positive_log_prob": -81.4823, + "train_positive_token_accuracy": 0.0837, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4008, + "epoch": 2.7381489841986455, + "grad_norm": 13.392486572265625, + "learning_rate": 4.417266823713953e-06, + "lm_loss": 5.3832, + "loss": 1.253, + "step": 1213, + "text_contrastive_loss": 0.6277, + "train_positive_log_prob": -78.9355, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.2892, + "epoch": 2.740406320541761, + "grad_norm": 12.114153861999512, + "learning_rate": 4.410061473492943e-06, + "lm_loss": 5.4637, + "loss": 1.1259, + "step": 1214, + "text_contrastive_loss": 0.5806, + "train_positive_log_prob": -81.5736, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3176, + "epoch": 2.7426636568848757, + "grad_norm": 12.64139175415039, + "learning_rate": 4.402857365469364e-06, + "lm_loss": 5.5269, + "loss": 1.2211, + "step": 1215, + "text_contrastive_loss": 0.7016, + "train_positive_log_prob": -81.6269, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.334, + "epoch": 2.744920993227991, + "grad_norm": 12.226225852966309, + "learning_rate": 4.3956545148124665e-06, + "lm_loss": 5.4629, + "loss": 1.3109, + "step": 1216, + "text_contrastive_loss": 0.8613, + "train_positive_log_prob": -79.0602, + "train_positive_token_accuracy": 0.0756, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4466, + "epoch": 2.7471783295711063, + "grad_norm": 16.149940490722656, + "learning_rate": 4.38845293668885e-06, + "lm_loss": 5.6052, + "loss": 1.4872, + "step": 1217, + "text_contrastive_loss": 0.9602, + "train_positive_log_prob": -82.9654, + "train_positive_token_accuracy": 0.0731, + "train_positive_token_prob": 0.0288 + }, + { + "contrastive_loss": 0.4523, + "epoch": 2.749435665914221, + "grad_norm": 13.595815658569336, + "learning_rate": 4.381252646262437e-06, + "lm_loss": 5.5025, + "loss": 1.4454, + "step": 1218, + "text_contrastive_loss": 0.8859, + "train_positive_log_prob": -81.916, + "train_positive_token_accuracy": 0.0677, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.475, + "epoch": 2.7516930022573365, + "grad_norm": 14.258941650390625, + "learning_rate": 4.37405365869444e-06, + "lm_loss": 5.5606, + "loss": 1.459, + "step": 1219, + "text_contrastive_loss": 0.8557, + "train_positive_log_prob": -83.0389, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3934, + "epoch": 2.7539503386004514, + "grad_norm": 12.872740745544434, + "learning_rate": 4.366855989143326e-06, + "lm_loss": 5.4506, + "loss": 1.3675, + "step": 1220, + "text_contrastive_loss": 0.8582, + "train_positive_log_prob": -79.0551, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3926, + "epoch": 2.7562076749435667, + "grad_norm": 14.489907264709473, + "learning_rate": 4.359659652764786e-06, + "lm_loss": 5.4683, + "loss": 1.287, + "step": 1221, + "text_contrastive_loss": 0.6952, + "train_positive_log_prob": -83.3088, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4205, + "epoch": 2.758465011286682, + "grad_norm": 13.774554252624512, + "learning_rate": 4.352464664711706e-06, + "lm_loss": 5.46, + "loss": 1.3908, + "step": 1222, + "text_contrastive_loss": 0.8486, + "train_positive_log_prob": -80.6208, + "train_positive_token_accuracy": 0.0841, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4246, + "epoch": 2.760722347629797, + "grad_norm": 13.082867622375488, + "learning_rate": 4.345271040134129e-06, + "lm_loss": 5.4653, + "loss": 1.3986, + "step": 1223, + "text_contrastive_loss": 0.8549, + "train_positive_log_prob": -80.3831, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4586, + "epoch": 2.7629796839729117, + "grad_norm": 15.413365364074707, + "learning_rate": 4.338078794179234e-06, + "lm_loss": 5.5129, + "loss": 1.3643, + "step": 1224, + "text_contrastive_loss": 0.7087, + "train_positive_log_prob": -81.1451, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.5464, + "epoch": 2.765237020316027, + "grad_norm": 17.120615005493164, + "learning_rate": 4.330887941991288e-06, + "lm_loss": 5.5425, + "loss": 1.4789, + "step": 1225, + "text_contrastive_loss": 0.7565, + "train_positive_log_prob": -83.4885, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.428, + "epoch": 2.7674943566591423, + "grad_norm": 13.84285831451416, + "learning_rate": 4.323698498711634e-06, + "lm_loss": 5.5542, + "loss": 1.4, + "step": 1226, + "text_contrastive_loss": 0.8331, + "train_positive_log_prob": -78.6072, + "train_positive_token_accuracy": 0.072, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.3547, + "epoch": 2.769751693002257, + "grad_norm": 12.189234733581543, + "learning_rate": 4.316510479478636e-06, + "lm_loss": 5.338, + "loss": 1.2366, + "step": 1227, + "text_contrastive_loss": 0.6962, + "train_positive_log_prob": -77.7541, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.439, + "epoch": 2.7720090293453725, + "grad_norm": 14.967228889465332, + "learning_rate": 4.309323899427671e-06, + "lm_loss": 5.4157, + "loss": 1.3515, + "step": 1228, + "text_contrastive_loss": 0.7419, + "train_positive_log_prob": -79.2481, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3639, + "epoch": 2.7742663656884874, + "grad_norm": 12.279850959777832, + "learning_rate": 4.302138773691079e-06, + "lm_loss": 5.488, + "loss": 1.2803, + "step": 1229, + "text_contrastive_loss": 0.7352, + "train_positive_log_prob": -80.778, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3847, + "epoch": 2.7765237020316027, + "grad_norm": 13.546014785766602, + "learning_rate": 4.294955117398139e-06, + "lm_loss": 5.5062, + "loss": 1.3418, + "step": 1230, + "text_contrastive_loss": 0.813, + "train_positive_log_prob": -79.0854, + "train_positive_token_accuracy": 0.0702, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.3354, + "epoch": 2.778781038374718, + "grad_norm": 12.121713638305664, + "learning_rate": 4.287772945675035e-06, + "lm_loss": 5.5482, + "loss": 1.2586, + "step": 1231, + "text_contrastive_loss": 0.7369, + "train_positive_log_prob": -82.8771, + "train_positive_token_accuracy": 0.0758, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3367, + "epoch": 2.781038374717833, + "grad_norm": 13.65939712524414, + "learning_rate": 4.280592273644829e-06, + "lm_loss": 5.4965, + "loss": 1.2315, + "step": 1232, + "text_contrastive_loss": 0.6903, + "train_positive_log_prob": -81.4805, + "train_positive_token_accuracy": 0.0756, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3851, + "epoch": 2.783295711060948, + "grad_norm": 12.37153148651123, + "learning_rate": 4.273413116427419e-06, + "lm_loss": 5.5467, + "loss": 1.2923, + "step": 1233, + "text_contrastive_loss": 0.705, + "train_positive_log_prob": -81.5341, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3845, + "epoch": 2.785553047404063, + "grad_norm": 14.038813591003418, + "learning_rate": 4.26623548913952e-06, + "lm_loss": 5.4681, + "loss": 1.3206, + "step": 1234, + "text_contrastive_loss": 0.7786, + "train_positive_log_prob": -80.1246, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4412, + "epoch": 2.7878103837471784, + "grad_norm": 13.503236770629883, + "learning_rate": 4.259059406894619e-06, + "lm_loss": 5.4573, + "loss": 1.4028, + "step": 1235, + "text_contrastive_loss": 0.8317, + "train_positive_log_prob": -81.8819, + "train_positive_token_accuracy": 0.0742, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.4692, + "epoch": 2.7900677200902937, + "grad_norm": 16.749441146850586, + "learning_rate": 4.251884884802956e-06, + "lm_loss": 5.3643, + "loss": 1.3972, + "step": 1236, + "text_contrastive_loss": 0.783, + "train_positive_log_prob": -77.9532, + "train_positive_token_accuracy": 0.0723, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3936, + "epoch": 2.7923250564334086, + "grad_norm": 14.661371231079102, + "learning_rate": 4.2447119379714805e-06, + "lm_loss": 5.5732, + "loss": 1.3699, + "step": 1237, + "text_contrastive_loss": 0.8378, + "train_positive_log_prob": -84.3168, + "train_positive_token_accuracy": 0.0857, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3977, + "epoch": 2.7945823927765234, + "grad_norm": 13.179463386535645, + "learning_rate": 4.237540581503831e-06, + "lm_loss": 5.5071, + "loss": 1.3262, + "step": 1238, + "text_contrastive_loss": 0.7554, + "train_positive_log_prob": -82.1534, + "train_positive_token_accuracy": 0.0814, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3354, + "epoch": 2.7968397291196387, + "grad_norm": 12.80728816986084, + "learning_rate": 4.23037083050029e-06, + "lm_loss": 5.566, + "loss": 1.2505, + "step": 1239, + "text_contrastive_loss": 0.7171, + "train_positive_log_prob": -80.3418, + "train_positive_token_accuracy": 0.0744, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.3678, + "epoch": 2.799097065462754, + "grad_norm": 13.287580490112305, + "learning_rate": 4.223202700057765e-06, + "lm_loss": 5.4992, + "loss": 1.3411, + "step": 1240, + "text_contrastive_loss": 0.8468, + "train_positive_log_prob": -82.6773, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3574, + "epoch": 2.801354401805869, + "grad_norm": 13.351524353027344, + "learning_rate": 4.216036205269748e-06, + "lm_loss": 5.4983, + "loss": 1.3677, + "step": 1241, + "text_contrastive_loss": 0.9208, + "train_positive_log_prob": -81.0326, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.394, + "epoch": 2.8036117381489842, + "grad_norm": 14.171607971191406, + "learning_rate": 4.20887136122629e-06, + "lm_loss": 5.4616, + "loss": 1.3209, + "step": 1242, + "text_contrastive_loss": 0.7614, + "train_positive_log_prob": -82.3064, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.5489, + "epoch": 2.805869074492099, + "grad_norm": 15.250497817993164, + "learning_rate": 4.201708183013963e-06, + "lm_loss": 5.5027, + "loss": 1.5929, + "step": 1243, + "text_contrastive_loss": 0.9875, + "train_positive_log_prob": -80.3633, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.3295, + "epoch": 2.8081264108352144, + "grad_norm": 12.793886184692383, + "learning_rate": 4.1945466857158336e-06, + "lm_loss": 5.4844, + "loss": 1.2274, + "step": 1244, + "text_contrastive_loss": 0.699, + "train_positive_log_prob": -81.6917, + "train_positive_token_accuracy": 0.0658, + "train_positive_token_prob": 0.028 + }, + { + "contrastive_loss": 0.4198, + "epoch": 2.8103837471783297, + "grad_norm": 14.77787971496582, + "learning_rate": 4.187386884411426e-06, + "lm_loss": 5.5448, + "loss": 1.3699, + "step": 1245, + "text_contrastive_loss": 0.7912, + "train_positive_log_prob": -82.1742, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3821, + "epoch": 2.8126410835214446, + "grad_norm": 13.060454368591309, + "learning_rate": 4.1802287941767e-06, + "lm_loss": 5.4751, + "loss": 1.3942, + "step": 1246, + "text_contrastive_loss": 0.9293, + "train_positive_log_prob": -84.0278, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4047, + "epoch": 2.81489841986456, + "grad_norm": 12.218612670898438, + "learning_rate": 4.173072430084002e-06, + "lm_loss": 5.531, + "loss": 1.428, + "step": 1247, + "text_contrastive_loss": 0.9404, + "train_positive_log_prob": -81.6481, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3456, + "epoch": 2.8171557562076748, + "grad_norm": 13.53044319152832, + "learning_rate": 4.165917807202055e-06, + "lm_loss": 5.4821, + "loss": 1.1878, + "step": 1248, + "text_contrastive_loss": 0.588, + "train_positive_log_prob": -82.2579, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3582, + "epoch": 2.81941309255079, + "grad_norm": 14.458516120910645, + "learning_rate": 4.1587649405959065e-06, + "lm_loss": 5.4468, + "loss": 1.2858, + "step": 1249, + "text_contrastive_loss": 0.7659, + "train_positive_log_prob": -81.0041, + "train_positive_token_accuracy": 0.0838, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4015, + "epoch": 2.8216704288939054, + "grad_norm": 14.125541687011719, + "learning_rate": 4.151613845326912e-06, + "lm_loss": 5.5291, + "loss": 1.3817, + "step": 1250, + "text_contrastive_loss": 0.8545, + "train_positive_log_prob": -81.6426, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.4077, + "epoch": 2.8239277652370203, + "grad_norm": 13.437987327575684, + "learning_rate": 4.144464536452693e-06, + "lm_loss": 5.5424, + "loss": 1.3305, + "step": 1251, + "text_contrastive_loss": 0.7373, + "train_positive_log_prob": -81.533, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3857, + "epoch": 2.8261851015801356, + "grad_norm": 13.264026641845703, + "learning_rate": 4.137317029027111e-06, + "lm_loss": 5.3547, + "loss": 1.3395, + "step": 1252, + "text_contrastive_loss": 0.8366, + "train_positive_log_prob": -78.3161, + "train_positive_token_accuracy": 0.0881, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.3836, + "epoch": 2.8284424379232505, + "grad_norm": 13.584129333496094, + "learning_rate": 4.1301713381002394e-06, + "lm_loss": 5.3982, + "loss": 1.3275, + "step": 1253, + "text_contrastive_loss": 0.8081, + "train_positive_log_prob": -78.4375, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4128, + "epoch": 2.8306997742663658, + "grad_norm": 12.125100135803223, + "learning_rate": 4.123027478718318e-06, + "lm_loss": 5.4539, + "loss": 1.2583, + "step": 1254, + "text_contrastive_loss": 0.6002, + "train_positive_log_prob": -80.9361, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4589, + "epoch": 2.832957110609481, + "grad_norm": 16.804636001586914, + "learning_rate": 4.115885465923734e-06, + "lm_loss": 5.4491, + "loss": 1.4734, + "step": 1255, + "text_contrastive_loss": 0.9393, + "train_positive_log_prob": -79.9051, + "train_positive_token_accuracy": 0.0751, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4256, + "epoch": 2.835214446952596, + "grad_norm": 12.986384391784668, + "learning_rate": 4.108745314754989e-06, + "lm_loss": 5.4318, + "loss": 1.352, + "step": 1256, + "text_contrastive_loss": 0.7665, + "train_positive_log_prob": -79.4645, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3784, + "epoch": 2.837471783295711, + "grad_norm": 14.071839332580566, + "learning_rate": 4.101607040246659e-06, + "lm_loss": 5.4539, + "loss": 1.3541, + "step": 1257, + "text_contrastive_loss": 0.8607, + "train_positive_log_prob": -83.0044, + "train_positive_token_accuracy": 0.0719, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3411, + "epoch": 2.839729119638826, + "grad_norm": 12.389908790588379, + "learning_rate": 4.094470657429374e-06, + "lm_loss": 5.5395, + "loss": 1.3404, + "step": 1258, + "text_contrastive_loss": 0.8907, + "train_positive_log_prob": -81.8363, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3981, + "epoch": 2.8419864559819414, + "grad_norm": 13.694198608398438, + "learning_rate": 4.087336181329777e-06, + "lm_loss": 5.4662, + "loss": 1.3692, + "step": 1259, + "text_contrastive_loss": 0.8489, + "train_positive_log_prob": -80.4553, + "train_positive_token_accuracy": 0.0828, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3603, + "epoch": 2.8442437923250563, + "grad_norm": 14.327945709228516, + "learning_rate": 4.080203626970498e-06, + "lm_loss": 5.5023, + "loss": 1.3561, + "step": 1260, + "text_contrastive_loss": 0.8912, + "train_positive_log_prob": -79.8023, + "train_positive_token_accuracy": 0.0719, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3818, + "epoch": 2.8465011286681716, + "grad_norm": 12.298961639404297, + "learning_rate": 4.0730730093701185e-06, + "lm_loss": 5.4488, + "loss": 1.3219, + "step": 1261, + "text_contrastive_loss": 0.7905, + "train_positive_log_prob": -81.7123, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3949, + "epoch": 2.8487584650112865, + "grad_norm": 13.795992851257324, + "learning_rate": 4.065944343543146e-06, + "lm_loss": 5.561, + "loss": 1.3324, + "step": 1262, + "text_contrastive_loss": 0.7628, + "train_positive_log_prob": -84.1086, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4803, + "epoch": 2.851015801354402, + "grad_norm": 15.573410987854004, + "learning_rate": 4.058817644499973e-06, + "lm_loss": 5.521, + "loss": 1.479, + "step": 1263, + "text_contrastive_loss": 0.8933, + "train_positive_log_prob": -81.6057, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.3554, + "epoch": 2.853273137697517, + "grad_norm": 13.085721969604492, + "learning_rate": 4.051692927246857e-06, + "lm_loss": 5.4411, + "loss": 1.2233, + "step": 1264, + "text_contrastive_loss": 0.6476, + "train_positive_log_prob": -80.7222, + "train_positive_token_accuracy": 0.0753, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.304, + "epoch": 2.855530474040632, + "grad_norm": 17.32183265686035, + "learning_rate": 4.044570206785874e-06, + "lm_loss": 5.4219, + "loss": 1.2153, + "step": 1265, + "text_contrastive_loss": 0.7382, + "train_positive_log_prob": -80.7784, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3492, + "epoch": 2.8577878103837473, + "grad_norm": 12.01098346710205, + "learning_rate": 4.037449498114903e-06, + "lm_loss": 5.5361, + "loss": 1.2929, + "step": 1266, + "text_contrastive_loss": 0.7802, + "train_positive_log_prob": -80.722, + "train_positive_token_accuracy": 0.0852, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3955, + "epoch": 2.860045146726862, + "grad_norm": 12.943686485290527, + "learning_rate": 4.0303308162275835e-06, + "lm_loss": 5.5212, + "loss": 1.2776, + "step": 1267, + "text_contrastive_loss": 0.6598, + "train_positive_log_prob": -83.9147, + "train_positive_token_accuracy": 0.0725, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3757, + "epoch": 2.8623024830699775, + "grad_norm": 12.986416816711426, + "learning_rate": 4.0232141761132894e-06, + "lm_loss": 5.3889, + "loss": 1.309, + "step": 1268, + "text_contrastive_loss": 0.7888, + "train_positive_log_prob": -78.9602, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4601, + "epoch": 2.864559819413093, + "grad_norm": 14.226264953613281, + "learning_rate": 4.016099592757091e-06, + "lm_loss": 5.4152, + "loss": 1.3697, + "step": 1269, + "text_contrastive_loss": 0.7361, + "train_positive_log_prob": -80.383, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4098, + "epoch": 2.8668171557562077, + "grad_norm": 14.365626335144043, + "learning_rate": 4.008987081139734e-06, + "lm_loss": 5.5127, + "loss": 1.2774, + "step": 1270, + "text_contrastive_loss": 0.6326, + "train_positive_log_prob": -83.4631, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.4353, + "epoch": 2.8690744920993225, + "grad_norm": 13.842521667480469, + "learning_rate": 4.0018766562375984e-06, + "lm_loss": 5.3928, + "loss": 1.3914, + "step": 1271, + "text_contrastive_loss": 0.8336, + "train_positive_log_prob": -79.1022, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3305, + "epoch": 2.871331828442438, + "grad_norm": 15.21019458770752, + "learning_rate": 3.994768333022669e-06, + "lm_loss": 5.5213, + "loss": 1.2291, + "step": 1272, + "text_contrastive_loss": 0.6929, + "train_positive_log_prob": -83.2307, + "train_positive_token_accuracy": 0.0868, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4826, + "epoch": 2.873589164785553, + "grad_norm": 13.257640838623047, + "learning_rate": 3.987662126462507e-06, + "lm_loss": 5.4774, + "loss": 1.4646, + "step": 1273, + "text_contrastive_loss": 0.8685, + "train_positive_log_prob": -82.6097, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3511, + "epoch": 2.875846501128668, + "grad_norm": 12.017789840698242, + "learning_rate": 3.980558051520218e-06, + "lm_loss": 5.4417, + "loss": 1.2802, + "step": 1274, + "text_contrastive_loss": 0.7697, + "train_positive_log_prob": -80.2013, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4262, + "epoch": 2.8781038374717833, + "grad_norm": 14.977031707763672, + "learning_rate": 3.973456123154415e-06, + "lm_loss": 5.472, + "loss": 1.3107, + "step": 1275, + "text_contrastive_loss": 0.6745, + "train_positive_log_prob": -80.3956, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.367, + "epoch": 2.880361173814898, + "grad_norm": 12.36252498626709, + "learning_rate": 3.966356356319196e-06, + "lm_loss": 5.5039, + "loss": 1.3216, + "step": 1276, + "text_contrastive_loss": 0.8084, + "train_positive_log_prob": -81.0976, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.2982, + "epoch": 2.8826185101580135, + "grad_norm": 13.103068351745605, + "learning_rate": 3.959258765964104e-06, + "lm_loss": 5.4056, + "loss": 1.2542, + "step": 1277, + "text_contrastive_loss": 0.8307, + "train_positive_log_prob": -80.0709, + "train_positive_token_accuracy": 0.0841, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.379, + "epoch": 2.884875846501129, + "grad_norm": 14.575030326843262, + "learning_rate": 3.9521633670341005e-06, + "lm_loss": 5.3843, + "loss": 1.3148, + "step": 1278, + "text_contrastive_loss": 0.7947, + "train_positive_log_prob": -78.5992, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4275, + "epoch": 2.8871331828442437, + "grad_norm": 13.459734916687012, + "learning_rate": 3.9450701744695325e-06, + "lm_loss": 5.4906, + "loss": 1.3713, + "step": 1279, + "text_contrastive_loss": 0.7895, + "train_positive_log_prob": -80.9843, + "train_positive_token_accuracy": 0.0718, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3488, + "epoch": 2.889390519187359, + "grad_norm": 12.976869583129883, + "learning_rate": 3.937979203206103e-06, + "lm_loss": 5.4831, + "loss": 1.2806, + "step": 1280, + "text_contrastive_loss": 0.7669, + "train_positive_log_prob": -83.8328, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.365, + "epoch": 2.891647855530474, + "grad_norm": 13.6196928024292, + "learning_rate": 3.930890468174833e-06, + "lm_loss": 5.3646, + "loss": 1.3295, + "step": 1281, + "text_contrastive_loss": 0.8561, + "train_positive_log_prob": -78.794, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3555, + "epoch": 2.893905191873589, + "grad_norm": 13.19919490814209, + "learning_rate": 3.92380398430204e-06, + "lm_loss": 5.4429, + "loss": 1.2727, + "step": 1282, + "text_contrastive_loss": 0.7459, + "train_positive_log_prob": -81.9848, + "train_positive_token_accuracy": 0.0856, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.5386, + "epoch": 2.8961625282167045, + "grad_norm": 20.455081939697266, + "learning_rate": 3.916719766509297e-06, + "lm_loss": 5.4365, + "loss": 1.5134, + "step": 1283, + "text_contrastive_loss": 0.8624, + "train_positive_log_prob": -81.4561, + "train_positive_token_accuracy": 0.08, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4399, + "epoch": 2.8984198645598194, + "grad_norm": 13.278593063354492, + "learning_rate": 3.9096378297134115e-06, + "lm_loss": 5.4567, + "loss": 1.3845, + "step": 1284, + "text_contrastive_loss": 0.7978, + "train_positive_log_prob": -82.1473, + "train_positive_token_accuracy": 0.0742, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4015, + "epoch": 2.9006772009029347, + "grad_norm": 14.169156074523926, + "learning_rate": 3.90255818882638e-06, + "lm_loss": 5.5246, + "loss": 1.367, + "step": 1285, + "text_contrastive_loss": 0.8261, + "train_positive_log_prob": -82.6749, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3776, + "epoch": 2.9029345372460496, + "grad_norm": 13.180066108703613, + "learning_rate": 3.89548085875537e-06, + "lm_loss": 5.4845, + "loss": 1.3327, + "step": 1286, + "text_contrastive_loss": 0.8133, + "train_positive_log_prob": -81.6495, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.34, + "epoch": 2.905191873589165, + "grad_norm": 12.781220436096191, + "learning_rate": 3.888405854402684e-06, + "lm_loss": 5.436, + "loss": 1.3331, + "step": 1287, + "text_contrastive_loss": 0.8989, + "train_positive_log_prob": -80.4524, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3441, + "epoch": 2.90744920993228, + "grad_norm": 13.240119934082031, + "learning_rate": 3.881333190665723e-06, + "lm_loss": 5.3537, + "loss": 1.2719, + "step": 1288, + "text_contrastive_loss": 0.7849, + "train_positive_log_prob": -78.0686, + "train_positive_token_accuracy": 0.0742, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4681, + "epoch": 2.909706546275395, + "grad_norm": 14.622413635253906, + "learning_rate": 3.8742628824369624e-06, + "lm_loss": 5.4398, + "loss": 1.4701, + "step": 1289, + "text_contrastive_loss": 0.916, + "train_positive_log_prob": -79.6385, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3483, + "epoch": 2.91196388261851, + "grad_norm": 13.131986618041992, + "learning_rate": 3.86719494460392e-06, + "lm_loss": 5.577, + "loss": 1.3505, + "step": 1290, + "text_contrastive_loss": 0.8889, + "train_positive_log_prob": -85.4209, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4108, + "epoch": 2.9142212189616252, + "grad_norm": 13.652870178222656, + "learning_rate": 3.8601293920491165e-06, + "lm_loss": 5.5282, + "loss": 1.422, + "step": 1291, + "text_contrastive_loss": 0.9166, + "train_positive_log_prob": -81.6933, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4169, + "epoch": 2.9164785553047405, + "grad_norm": 15.644908905029297, + "learning_rate": 3.853066239650055e-06, + "lm_loss": 5.4556, + "loss": 1.3805, + "step": 1292, + "text_contrastive_loss": 0.836, + "train_positive_log_prob": -79.1293, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4195, + "epoch": 2.9187358916478554, + "grad_norm": 12.787084579467773, + "learning_rate": 3.846005502279182e-06, + "lm_loss": 5.4533, + "loss": 1.3338, + "step": 1293, + "text_contrastive_loss": 0.7379, + "train_positive_log_prob": -80.0308, + "train_positive_token_accuracy": 0.0764, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3992, + "epoch": 2.9209932279909707, + "grad_norm": 14.623241424560547, + "learning_rate": 3.83894719480386e-06, + "lm_loss": 5.45, + "loss": 1.4298, + "step": 1294, + "text_contrastive_loss": 0.9713, + "train_positive_log_prob": -79.854, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.3769, + "epoch": 2.9232505643340856, + "grad_norm": 13.713342666625977, + "learning_rate": 3.8318913320863355e-06, + "lm_loss": 5.5111, + "loss": 1.3329, + "step": 1295, + "text_contrastive_loss": 0.8099, + "train_positive_log_prob": -81.2435, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4176, + "epoch": 2.925507900677201, + "grad_norm": 13.57674503326416, + "learning_rate": 3.8248379289837065e-06, + "lm_loss": 5.3196, + "loss": 1.3974, + "step": 1296, + "text_contrastive_loss": 0.8959, + "train_positive_log_prob": -79.1695, + "train_positive_token_accuracy": 0.0724, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4527, + "epoch": 2.927765237020316, + "grad_norm": 18.093708038330078, + "learning_rate": 3.81778700034789e-06, + "lm_loss": 5.3918, + "loss": 1.4383, + "step": 1297, + "text_contrastive_loss": 0.8929, + "train_positive_log_prob": -80.5847, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3659, + "epoch": 2.930022573363431, + "grad_norm": 12.559804916381836, + "learning_rate": 3.810738561025599e-06, + "lm_loss": 5.4819, + "loss": 1.2375, + "step": 1298, + "text_contrastive_loss": 0.6468, + "train_positive_log_prob": -81.8204, + "train_positive_token_accuracy": 0.0756, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4496, + "epoch": 2.9322799097065464, + "grad_norm": 15.355415344238281, + "learning_rate": 3.803692625858295e-06, + "lm_loss": 5.4101, + "loss": 1.4349, + "step": 1299, + "text_contrastive_loss": 0.8887, + "train_positive_log_prob": -78.8509, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3853, + "epoch": 2.9345372460496613, + "grad_norm": 13.496744155883789, + "learning_rate": 3.7966492096821773e-06, + "lm_loss": 5.3833, + "loss": 1.3453, + "step": 1300, + "text_contrastive_loss": 0.8432, + "train_positive_log_prob": -79.4463, + "train_positive_token_accuracy": 0.0868, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3289, + "epoch": 2.9367945823927766, + "grad_norm": 12.369037628173828, + "learning_rate": 3.7896083273281324e-06, + "lm_loss": 5.5349, + "loss": 1.2606, + "step": 1301, + "text_contrastive_loss": 0.7564, + "train_positive_log_prob": -83.2156, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3581, + "epoch": 2.939051918735892, + "grad_norm": 12.391029357910156, + "learning_rate": 3.7825699936217183e-06, + "lm_loss": 5.3921, + "loss": 1.2643, + "step": 1302, + "text_contrastive_loss": 0.734, + "train_positive_log_prob": -81.575, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.4868, + "epoch": 2.9413092550790068, + "grad_norm": 15.004427909851074, + "learning_rate": 3.7755342233831188e-06, + "lm_loss": 5.3904, + "loss": 1.4411, + "step": 1303, + "text_contrastive_loss": 0.8306, + "train_positive_log_prob": -77.2772, + "train_positive_token_accuracy": 0.0856, + "train_positive_token_prob": 0.0326 + }, + { + "contrastive_loss": 0.4082, + "epoch": 2.9435665914221216, + "grad_norm": 13.540632247924805, + "learning_rate": 3.7685010314271287e-06, + "lm_loss": 5.5017, + "loss": 1.4352, + "step": 1304, + "text_contrastive_loss": 0.9537, + "train_positive_log_prob": -80.7594, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.394, + "epoch": 2.945823927765237, + "grad_norm": 13.745838165283203, + "learning_rate": 3.761470432563109e-06, + "lm_loss": 5.4681, + "loss": 1.3038, + "step": 1305, + "text_contrastive_loss": 0.726, + "train_positive_log_prob": -78.7091, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.4273, + "epoch": 2.9480812641083523, + "grad_norm": 15.11688232421875, + "learning_rate": 3.75444244159496e-06, + "lm_loss": 5.4962, + "loss": 1.3409, + "step": 1306, + "text_contrastive_loss": 0.7279, + "train_positive_log_prob": -82.0817, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.438, + "epoch": 2.950338600451467, + "grad_norm": 14.449092864990234, + "learning_rate": 3.747417073321092e-06, + "lm_loss": 5.522, + "loss": 1.3789, + "step": 1307, + "text_contrastive_loss": 0.7775, + "train_positive_log_prob": -83.4332, + "train_positive_token_accuracy": 0.0807, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3452, + "epoch": 2.9525959367945824, + "grad_norm": 11.618966102600098, + "learning_rate": 3.740394342534394e-06, + "lm_loss": 5.5287, + "loss": 1.236, + "step": 1308, + "text_contrastive_loss": 0.6757, + "train_positive_log_prob": -81.7225, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.4305, + "epoch": 2.9548532731376973, + "grad_norm": 13.585977554321289, + "learning_rate": 3.7333742640221994e-06, + "lm_loss": 5.4511, + "loss": 1.3591, + "step": 1309, + "text_contrastive_loss": 0.767, + "train_positive_log_prob": -79.0654, + "train_positive_token_accuracy": 0.0765, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4364, + "epoch": 2.9571106094808126, + "grad_norm": 13.712849617004395, + "learning_rate": 3.7263568525662574e-06, + "lm_loss": 5.4976, + "loss": 1.4304, + "step": 1310, + "text_contrastive_loss": 0.8883, + "train_positive_log_prob": -81.3067, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4006, + "epoch": 2.959367945823928, + "grad_norm": 13.779935836791992, + "learning_rate": 3.7193421229427017e-06, + "lm_loss": 5.4281, + "loss": 1.355, + "step": 1311, + "text_contrastive_loss": 0.8231, + "train_positive_log_prob": -80.109, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4113, + "epoch": 2.961625282167043, + "grad_norm": 12.519417762756348, + "learning_rate": 3.7123300899220193e-06, + "lm_loss": 5.4806, + "loss": 1.4206, + "step": 1312, + "text_contrastive_loss": 0.9225, + "train_positive_log_prob": -80.6872, + "train_positive_token_accuracy": 0.0836, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3422, + "epoch": 2.963882618510158, + "grad_norm": 13.868303298950195, + "learning_rate": 3.7053207682690184e-06, + "lm_loss": 5.5162, + "loss": 1.2648, + "step": 1313, + "text_contrastive_loss": 0.7419, + "train_positive_log_prob": -81.4699, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3755, + "epoch": 2.966139954853273, + "grad_norm": 13.880483627319336, + "learning_rate": 3.698314172742799e-06, + "lm_loss": 5.5326, + "loss": 1.3141, + "step": 1314, + "text_contrastive_loss": 0.7707, + "train_positive_log_prob": -83.8562, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3887, + "epoch": 2.9683972911963883, + "grad_norm": 13.846237182617188, + "learning_rate": 3.691310318096719e-06, + "lm_loss": 5.469, + "loss": 1.3067, + "step": 1315, + "text_contrastive_loss": 0.7422, + "train_positive_log_prob": -81.0574, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.4065, + "epoch": 2.9706546275395036, + "grad_norm": 14.637057304382324, + "learning_rate": 3.684309219078368e-06, + "lm_loss": 5.4182, + "loss": 1.3516, + "step": 1316, + "text_contrastive_loss": 0.8065, + "train_positive_log_prob": -79.894, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3741, + "epoch": 2.9729119638826185, + "grad_norm": 12.735899925231934, + "learning_rate": 3.6773108904295294e-06, + "lm_loss": 5.4962, + "loss": 1.3511, + "step": 1317, + "text_contrastive_loss": 0.8548, + "train_positive_log_prob": -81.2652, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.4949, + "epoch": 2.975169300225734, + "grad_norm": 15.224711418151855, + "learning_rate": 3.6703153468861585e-06, + "lm_loss": 5.5098, + "loss": 1.4685, + "step": 1318, + "text_contrastive_loss": 0.8454, + "train_positive_log_prob": -81.1835, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.3728, + "epoch": 2.9774266365688487, + "grad_norm": 14.48100757598877, + "learning_rate": 3.663322603178339e-06, + "lm_loss": 5.4335, + "loss": 1.3286, + "step": 1319, + "text_contrastive_loss": 0.8248, + "train_positive_log_prob": -79.5752, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4183, + "epoch": 2.979683972911964, + "grad_norm": 13.197893142700195, + "learning_rate": 3.6563326740302664e-06, + "lm_loss": 5.3573, + "loss": 1.3472, + "step": 1320, + "text_contrastive_loss": 0.7864, + "train_positive_log_prob": -78.1643, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.5046, + "epoch": 2.9819413092550793, + "grad_norm": 14.0831937789917, + "learning_rate": 3.6493455741602035e-06, + "lm_loss": 5.2914, + "loss": 1.4381, + "step": 1321, + "text_contrastive_loss": 0.8086, + "train_positive_log_prob": -76.2327, + "train_positive_token_accuracy": 0.0835, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.3164, + "epoch": 2.984198645598194, + "grad_norm": 12.877252578735352, + "learning_rate": 3.642361318280461e-06, + "lm_loss": 5.3257, + "loss": 1.1353, + "step": 1322, + "text_contrastive_loss": 0.5726, + "train_positive_log_prob": -77.8467, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3554, + "epoch": 2.986455981941309, + "grad_norm": 14.354303359985352, + "learning_rate": 3.635379921097359e-06, + "lm_loss": 5.5237, + "loss": 1.3098, + "step": 1323, + "text_contrastive_loss": 0.804, + "train_positive_log_prob": -82.8738, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4128, + "epoch": 2.9887133182844243, + "grad_norm": 13.451807022094727, + "learning_rate": 3.6284013973111962e-06, + "lm_loss": 5.5031, + "loss": 1.4306, + "step": 1324, + "text_contrastive_loss": 0.935, + "train_positive_log_prob": -83.8793, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3971, + "epoch": 2.9909706546275396, + "grad_norm": 12.980441093444824, + "learning_rate": 3.621425761616224e-06, + "lm_loss": 5.4759, + "loss": 1.4187, + "step": 1325, + "text_contrastive_loss": 0.9481, + "train_positive_log_prob": -79.8069, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3338, + "epoch": 2.9932279909706545, + "grad_norm": 12.780158996582031, + "learning_rate": 3.614453028700613e-06, + "lm_loss": 5.4292, + "loss": 1.2398, + "step": 1326, + "text_contrastive_loss": 0.7262, + "train_positive_log_prob": -81.7174, + "train_positive_token_accuracy": 0.0828, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4255, + "epoch": 2.99548532731377, + "grad_norm": 16.075681686401367, + "learning_rate": 3.6074832132464165e-06, + "lm_loss": 5.5013, + "loss": 1.4036, + "step": 1327, + "text_contrastive_loss": 0.856, + "train_positive_log_prob": -80.8002, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4442, + "epoch": 2.9977426636568847, + "grad_norm": 14.092156410217285, + "learning_rate": 3.600516329929551e-06, + "lm_loss": 5.5716, + "loss": 1.4952, + "step": 1328, + "text_contrastive_loss": 0.9877, + "train_positive_log_prob": -82.2675, + "train_positive_token_accuracy": 0.073, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.2231, + "epoch": 3.0, + "grad_norm": 16.064529418945312, + "learning_rate": 3.5935523934197537e-06, + "lm_loss": 5.719, + "loss": 1.1461, + "step": 1329, + "text_contrastive_loss": 0.7022, + "train_positive_log_prob": -85.5764, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3547, + "epoch": 3.0022573363431153, + "grad_norm": 11.884598731994629, + "learning_rate": 3.5865914183805606e-06, + "lm_loss": 5.5123, + "loss": 1.2711, + "step": 1330, + "text_contrastive_loss": 0.7304, + "train_positive_log_prob": -81.4788, + "train_positive_token_accuracy": 0.0846, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3687, + "epoch": 3.00451467268623, + "grad_norm": 12.612887382507324, + "learning_rate": 3.5796334194692704e-06, + "lm_loss": 5.5413, + "loss": 1.3579, + "step": 1331, + "text_contrastive_loss": 0.8701, + "train_positive_log_prob": -80.9544, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3545, + "epoch": 3.0067720090293455, + "grad_norm": 13.504070281982422, + "learning_rate": 3.572678411336916e-06, + "lm_loss": 5.493, + "loss": 1.3106, + "step": 1332, + "text_contrastive_loss": 0.8135, + "train_positive_log_prob": -83.619, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3618, + "epoch": 3.0090293453724604, + "grad_norm": 11.488712310791016, + "learning_rate": 3.5657264086282317e-06, + "lm_loss": 5.4872, + "loss": 1.3147, + "step": 1333, + "text_contrastive_loss": 0.8083, + "train_positive_log_prob": -80.0121, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3631, + "epoch": 3.0112866817155757, + "grad_norm": 12.847582817077637, + "learning_rate": 3.5587774259816234e-06, + "lm_loss": 5.5109, + "loss": 1.2204, + "step": 1334, + "text_contrastive_loss": 0.6125, + "train_positive_log_prob": -83.7421, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4774, + "epoch": 3.0135440180586905, + "grad_norm": 13.98725700378418, + "learning_rate": 3.5518314780291384e-06, + "lm_loss": 5.3793, + "loss": 1.4602, + "step": 1335, + "text_contrastive_loss": 0.8898, + "train_positive_log_prob": -78.109, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3958, + "epoch": 3.015801354401806, + "grad_norm": 14.58008098602295, + "learning_rate": 3.544888579396435e-06, + "lm_loss": 5.534, + "loss": 1.4079, + "step": 1336, + "text_contrastive_loss": 0.9173, + "train_positive_log_prob": -82.4684, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.46, + "epoch": 3.018058690744921, + "grad_norm": 13.23354434967041, + "learning_rate": 3.5379487447027483e-06, + "lm_loss": 5.3697, + "loss": 1.3933, + "step": 1337, + "text_contrastive_loss": 0.7926, + "train_positive_log_prob": -78.9531, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3309, + "epoch": 3.020316027088036, + "grad_norm": 13.347737312316895, + "learning_rate": 3.5310119885608625e-06, + "lm_loss": 5.4498, + "loss": 1.2104, + "step": 1338, + "text_contrastive_loss": 0.669, + "train_positive_log_prob": -81.9145, + "train_positive_token_accuracy": 0.0741, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.3263, + "epoch": 3.0225733634311513, + "grad_norm": 12.796170234680176, + "learning_rate": 3.524078325577084e-06, + "lm_loss": 5.4564, + "loss": 1.2675, + "step": 1339, + "text_contrastive_loss": 0.7912, + "train_positive_log_prob": -79.8241, + "train_positive_token_accuracy": 0.0748, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.3942, + "epoch": 3.024830699774266, + "grad_norm": 12.41689395904541, + "learning_rate": 3.517147770351199e-06, + "lm_loss": 5.4538, + "loss": 1.2649, + "step": 1340, + "text_contrastive_loss": 0.6508, + "train_positive_log_prob": -82.8949, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.3995, + "epoch": 3.0270880361173815, + "grad_norm": 13.999566078186035, + "learning_rate": 3.5102203374764555e-06, + "lm_loss": 5.4543, + "loss": 1.3525, + "step": 1341, + "text_contrastive_loss": 0.8151, + "train_positive_log_prob": -81.156, + "train_positive_token_accuracy": 0.0879, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.377, + "epoch": 3.0293453724604964, + "grad_norm": 12.973153114318848, + "learning_rate": 3.503296041539522e-06, + "lm_loss": 5.4542, + "loss": 1.3335, + "step": 1342, + "text_contrastive_loss": 0.8222, + "train_positive_log_prob": -79.0614, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3815, + "epoch": 3.0316027088036117, + "grad_norm": 14.240755081176758, + "learning_rate": 3.496374897120467e-06, + "lm_loss": 5.3709, + "loss": 1.384, + "step": 1343, + "text_contrastive_loss": 0.9308, + "train_positive_log_prob": -79.1377, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3455, + "epoch": 3.033860045146727, + "grad_norm": 13.63241958618164, + "learning_rate": 3.4894569187927204e-06, + "lm_loss": 5.4512, + "loss": 1.2524, + "step": 1344, + "text_contrastive_loss": 0.7237, + "train_positive_log_prob": -81.2586, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3542, + "epoch": 3.036117381489842, + "grad_norm": 12.142577171325684, + "learning_rate": 3.4825421211230437e-06, + "lm_loss": 5.5704, + "loss": 1.2646, + "step": 1345, + "text_contrastive_loss": 0.7067, + "train_positive_log_prob": -81.6859, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3582, + "epoch": 3.038374717832957, + "grad_norm": 13.192216873168945, + "learning_rate": 3.4756305186715046e-06, + "lm_loss": 5.6655, + "loss": 1.3647, + "step": 1346, + "text_contrastive_loss": 0.8799, + "train_positive_log_prob": -83.0106, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3866, + "epoch": 3.040632054176072, + "grad_norm": 12.73764705657959, + "learning_rate": 3.4687221259914394e-06, + "lm_loss": 5.3491, + "loss": 1.2811, + "step": 1347, + "text_contrastive_loss": 0.7192, + "train_positive_log_prob": -79.9447, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.358, + "epoch": 3.0428893905191874, + "grad_norm": 12.56615924835205, + "learning_rate": 3.461816957629429e-06, + "lm_loss": 5.4639, + "loss": 1.3029, + "step": 1348, + "text_contrastive_loss": 0.797, + "train_positive_log_prob": -80.0368, + "train_positive_token_accuracy": 0.0845, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.381, + "epoch": 3.0451467268623027, + "grad_norm": 13.645767211914062, + "learning_rate": 3.4549150281252635e-06, + "lm_loss": 5.4089, + "loss": 1.2741, + "step": 1349, + "text_contrastive_loss": 0.7046, + "train_positive_log_prob": -82.0843, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4424, + "epoch": 3.0474040632054176, + "grad_norm": 14.364620208740234, + "learning_rate": 3.448016352011914e-06, + "lm_loss": 5.4528, + "loss": 1.3461, + "step": 1350, + "text_contrastive_loss": 0.7169, + "train_positive_log_prob": -82.152, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3626, + "epoch": 3.049661399548533, + "grad_norm": 12.785921096801758, + "learning_rate": 3.441120943815497e-06, + "lm_loss": 5.5062, + "loss": 1.2834, + "step": 1351, + "text_contrastive_loss": 0.7404, + "train_positive_log_prob": -81.1732, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3503, + "epoch": 3.0519187358916477, + "grad_norm": 12.212498664855957, + "learning_rate": 3.4342288180552556e-06, + "lm_loss": 5.4898, + "loss": 1.2805, + "step": 1352, + "text_contrastive_loss": 0.7625, + "train_positive_log_prob": -82.6052, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3162, + "epoch": 3.054176072234763, + "grad_norm": 13.934971809387207, + "learning_rate": 3.427339989243514e-06, + "lm_loss": 5.5303, + "loss": 1.2084, + "step": 1353, + "text_contrastive_loss": 0.6784, + "train_positive_log_prob": -82.0693, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3111, + "epoch": 3.056433408577878, + "grad_norm": 12.673248291015625, + "learning_rate": 3.420454471885659e-06, + "lm_loss": 5.3798, + "loss": 1.2045, + "step": 1354, + "text_contrastive_loss": 0.711, + "train_positive_log_prob": -80.0263, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3344, + "epoch": 3.0586907449209932, + "grad_norm": 12.789094924926758, + "learning_rate": 3.4135722804801004e-06, + "lm_loss": 5.517, + "loss": 1.3191, + "step": 1355, + "text_contrastive_loss": 0.8659, + "train_positive_log_prob": -83.2513, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3905, + "epoch": 3.0609480812641086, + "grad_norm": 13.032346725463867, + "learning_rate": 3.4066934295182496e-06, + "lm_loss": 5.4757, + "loss": 1.3458, + "step": 1356, + "text_contrastive_loss": 0.8153, + "train_positive_log_prob": -80.8074, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.2831, + "epoch": 3.0632054176072234, + "grad_norm": 11.06863784790039, + "learning_rate": 3.3998179334844823e-06, + "lm_loss": 5.4576, + "loss": 1.1903, + "step": 1357, + "text_contrastive_loss": 0.7229, + "train_positive_log_prob": -80.1163, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3206, + "epoch": 3.0654627539503387, + "grad_norm": 12.664710998535156, + "learning_rate": 3.3929458068561073e-06, + "lm_loss": 5.4483, + "loss": 1.2464, + "step": 1358, + "text_contrastive_loss": 0.762, + "train_positive_log_prob": -80.3236, + "train_positive_token_accuracy": 0.0776, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2519, + "epoch": 3.0677200902934536, + "grad_norm": 9.251008033752441, + "learning_rate": 3.3860770641033417e-06, + "lm_loss": 5.4511, + "loss": 1.1152, + "step": 1359, + "text_contrastive_loss": 0.6364, + "train_positive_log_prob": -80.7667, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3224, + "epoch": 3.069977426636569, + "grad_norm": 12.022947311401367, + "learning_rate": 3.379211719689278e-06, + "lm_loss": 5.5976, + "loss": 1.24, + "step": 1360, + "text_contrastive_loss": 0.7156, + "train_positive_log_prob": -82.9597, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.3535, + "epoch": 3.072234762979684, + "grad_norm": 10.767943382263184, + "learning_rate": 3.37234978806985e-06, + "lm_loss": 5.3906, + "loss": 1.2823, + "step": 1361, + "text_contrastive_loss": 0.7794, + "train_positive_log_prob": -79.5335, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3851, + "epoch": 3.074492099322799, + "grad_norm": 13.653422355651855, + "learning_rate": 3.365491283693807e-06, + "lm_loss": 5.3721, + "loss": 1.3238, + "step": 1362, + "text_contrastive_loss": 0.803, + "train_positive_log_prob": -79.4047, + "train_positive_token_accuracy": 0.083, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3092, + "epoch": 3.0767494356659144, + "grad_norm": 12.80960464477539, + "learning_rate": 3.358636221002682e-06, + "lm_loss": 5.642, + "loss": 1.2283, + "step": 1363, + "text_contrastive_loss": 0.7098, + "train_positive_log_prob": -82.1669, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0287 + }, + { + "contrastive_loss": 0.3021, + "epoch": 3.0790067720090293, + "grad_norm": 12.5709810256958, + "learning_rate": 3.351784614430761e-06, + "lm_loss": 5.4796, + "loss": 1.1417, + "step": 1364, + "text_contrastive_loss": 0.5834, + "train_positive_log_prob": -80.7625, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3602, + "epoch": 3.0812641083521446, + "grad_norm": 12.859617233276367, + "learning_rate": 3.3449364784050515e-06, + "lm_loss": 5.3469, + "loss": 1.3123, + "step": 1365, + "text_contrastive_loss": 0.8348, + "train_positive_log_prob": -77.1957, + "train_positive_token_accuracy": 0.0693, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.3148, + "epoch": 3.0835214446952595, + "grad_norm": 11.97329330444336, + "learning_rate": 3.3380918273452557e-06, + "lm_loss": 5.5413, + "loss": 1.2396, + "step": 1366, + "text_contrastive_loss": 0.7414, + "train_positive_log_prob": -80.0452, + "train_positive_token_accuracy": 0.0814, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3514, + "epoch": 3.0857787810383748, + "grad_norm": 13.488192558288574, + "learning_rate": 3.3312506756637343e-06, + "lm_loss": 5.4303, + "loss": 1.2478, + "step": 1367, + "text_contrastive_loss": 0.7068, + "train_positive_log_prob": -79.7598, + "train_positive_token_accuracy": 0.0856, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3439, + "epoch": 3.0880361173814896, + "grad_norm": 11.758933067321777, + "learning_rate": 3.324413037765483e-06, + "lm_loss": 5.4703, + "loss": 1.2743, + "step": 1368, + "text_contrastive_loss": 0.7667, + "train_positive_log_prob": -81.5404, + "train_positive_token_accuracy": 0.071, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3542, + "epoch": 3.090293453724605, + "grad_norm": 12.695940971374512, + "learning_rate": 3.317578928048096e-06, + "lm_loss": 5.4867, + "loss": 1.2766, + "step": 1369, + "text_contrastive_loss": 0.7474, + "train_positive_log_prob": -79.7529, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3202, + "epoch": 3.0925507900677203, + "grad_norm": 11.827890396118164, + "learning_rate": 3.310748360901741e-06, + "lm_loss": 5.4365, + "loss": 1.2479, + "step": 1370, + "text_contrastive_loss": 0.7681, + "train_positive_log_prob": -81.9709, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0331 + }, + { + "contrastive_loss": 0.27, + "epoch": 3.094808126410835, + "grad_norm": 11.169276237487793, + "learning_rate": 3.303921350709124e-06, + "lm_loss": 5.4424, + "loss": 1.1259, + "step": 1371, + "text_contrastive_loss": 0.6232, + "train_positive_log_prob": -80.7036, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.342, + "epoch": 3.0970654627539504, + "grad_norm": 13.409018516540527, + "learning_rate": 3.2970979118454616e-06, + "lm_loss": 5.3577, + "loss": 1.2281, + "step": 1372, + "text_contrastive_loss": 0.7007, + "train_positive_log_prob": -77.7269, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3207, + "epoch": 3.0993227990970653, + "grad_norm": 12.647209167480469, + "learning_rate": 3.2902780586784542e-06, + "lm_loss": 5.5545, + "loss": 1.2569, + "step": 1373, + "text_contrastive_loss": 0.7615, + "train_positive_log_prob": -83.9895, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.4516, + "epoch": 3.1015801354401806, + "grad_norm": 13.65585708618164, + "learning_rate": 3.283461805568246e-06, + "lm_loss": 5.5318, + "loss": 1.4869, + "step": 1374, + "text_contrastive_loss": 0.9644, + "train_positive_log_prob": -80.1932, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3348, + "epoch": 3.1038374717832955, + "grad_norm": 14.487042427062988, + "learning_rate": 3.276649166867406e-06, + "lm_loss": 5.4442, + "loss": 1.2493, + "step": 1375, + "text_contrastive_loss": 0.7401, + "train_positive_log_prob": -81.7249, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.2752, + "epoch": 3.106094808126411, + "grad_norm": 13.373377799987793, + "learning_rate": 3.2698401569208883e-06, + "lm_loss": 5.4165, + "loss": 1.1617, + "step": 1376, + "text_contrastive_loss": 0.6897, + "train_positive_log_prob": -78.5191, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3275, + "epoch": 3.108352144469526, + "grad_norm": 11.889525413513184, + "learning_rate": 3.2630347900660094e-06, + "lm_loss": 5.5039, + "loss": 1.2552, + "step": 1377, + "text_contrastive_loss": 0.7547, + "train_positive_log_prob": -85.0061, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3885, + "epoch": 3.110609480812641, + "grad_norm": 14.333590507507324, + "learning_rate": 3.256233080632414e-06, + "lm_loss": 5.4963, + "loss": 1.2546, + "step": 1378, + "text_contrastive_loss": 0.6331, + "train_positive_log_prob": -80.8352, + "train_positive_token_accuracy": 0.077, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3059, + "epoch": 3.1128668171557563, + "grad_norm": 11.98303508758545, + "learning_rate": 3.249435042942043e-06, + "lm_loss": 5.5387, + "loss": 1.2457, + "step": 1379, + "text_contrastive_loss": 0.7718, + "train_positive_log_prob": -82.1489, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3917, + "epoch": 3.115124153498871, + "grad_norm": 14.553565979003906, + "learning_rate": 3.242640691309111e-06, + "lm_loss": 5.5182, + "loss": 1.2765, + "step": 1380, + "text_contrastive_loss": 0.666, + "train_positive_log_prob": -81.2014, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.4007, + "epoch": 3.1173814898419865, + "grad_norm": 12.516433715820312, + "learning_rate": 3.235850040040066e-06, + "lm_loss": 5.5126, + "loss": 1.297, + "step": 1381, + "text_contrastive_loss": 0.6902, + "train_positive_log_prob": -80.8783, + "train_positive_token_accuracy": 0.0777, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3882, + "epoch": 3.119638826185102, + "grad_norm": 12.951908111572266, + "learning_rate": 3.2290631034335684e-06, + "lm_loss": 5.5146, + "loss": 1.3543, + "step": 1382, + "text_contrastive_loss": 0.8293, + "train_positive_log_prob": -81.4927, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3828, + "epoch": 3.1218961625282167, + "grad_norm": 14.279322624206543, + "learning_rate": 3.2222798957804524e-06, + "lm_loss": 5.4706, + "loss": 1.3156, + "step": 1383, + "text_contrastive_loss": 0.7714, + "train_positive_log_prob": -80.4315, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.4119, + "epoch": 3.124153498871332, + "grad_norm": 13.596724510192871, + "learning_rate": 3.215500431363706e-06, + "lm_loss": 5.3636, + "loss": 1.3057, + "step": 1384, + "text_contrastive_loss": 0.7147, + "train_positive_log_prob": -81.2293, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3943, + "epoch": 3.126410835214447, + "grad_norm": 13.19772720336914, + "learning_rate": 3.20872472445843e-06, + "lm_loss": 5.4552, + "loss": 1.3353, + "step": 1385, + "text_contrastive_loss": 0.7911, + "train_positive_log_prob": -82.3384, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.419, + "epoch": 3.128668171557562, + "grad_norm": 12.970370292663574, + "learning_rate": 3.2019527893318177e-06, + "lm_loss": 5.4481, + "loss": 1.3284, + "step": 1386, + "text_contrastive_loss": 0.7293, + "train_positive_log_prob": -80.8288, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3553, + "epoch": 3.130925507900677, + "grad_norm": 13.519413948059082, + "learning_rate": 3.195184640243115e-06, + "lm_loss": 5.3943, + "loss": 1.2773, + "step": 1387, + "text_contrastive_loss": 0.765, + "train_positive_log_prob": -80.0537, + "train_positive_token_accuracy": 0.0824, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4169, + "epoch": 3.1331828442437923, + "grad_norm": 14.123638153076172, + "learning_rate": 3.1884202914436024e-06, + "lm_loss": 5.3832, + "loss": 1.3689, + "step": 1388, + "text_contrastive_loss": 0.8274, + "train_positive_log_prob": -78.2055, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.333, + "epoch": 3.1354401805869077, + "grad_norm": 13.019171714782715, + "learning_rate": 3.1816597571765517e-06, + "lm_loss": 5.4471, + "loss": 1.2653, + "step": 1389, + "text_contrastive_loss": 0.7752, + "train_positive_log_prob": -78.4297, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3868, + "epoch": 3.1376975169300225, + "grad_norm": 12.628265380859375, + "learning_rate": 3.1749030516772084e-06, + "lm_loss": 5.4165, + "loss": 1.3045, + "step": 1390, + "text_contrastive_loss": 0.752, + "train_positive_log_prob": -77.8206, + "train_positive_token_accuracy": 0.084, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3177, + "epoch": 3.139954853273138, + "grad_norm": 12.264911651611328, + "learning_rate": 3.168150189172754e-06, + "lm_loss": 5.593, + "loss": 1.3105, + "step": 1391, + "text_contrastive_loss": 0.8669, + "train_positive_log_prob": -82.4745, + "train_positive_token_accuracy": 0.0723, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.4166, + "epoch": 3.1422121896162527, + "grad_norm": 15.056285858154297, + "learning_rate": 3.1614011838822755e-06, + "lm_loss": 5.5651, + "loss": 1.3547, + "step": 1392, + "text_contrastive_loss": 0.7632, + "train_positive_log_prob": -85.6976, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3487, + "epoch": 3.144469525959368, + "grad_norm": 11.954909324645996, + "learning_rate": 3.154656050016742e-06, + "lm_loss": 5.4233, + "loss": 1.3409, + "step": 1393, + "text_contrastive_loss": 0.8997, + "train_positive_log_prob": -77.3652, + "train_positive_token_accuracy": 0.0712, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4097, + "epoch": 3.146726862302483, + "grad_norm": 12.364349365234375, + "learning_rate": 3.1479148017789673e-06, + "lm_loss": 5.4358, + "loss": 1.366, + "step": 1394, + "text_contrastive_loss": 0.8254, + "train_positive_log_prob": -80.3919, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.307, + "epoch": 3.148984198645598, + "grad_norm": 11.107914924621582, + "learning_rate": 3.1411774533635854e-06, + "lm_loss": 5.5102, + "loss": 1.2769, + "step": 1395, + "text_contrastive_loss": 0.8377, + "train_positive_log_prob": -80.8008, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3329, + "epoch": 3.1512415349887135, + "grad_norm": 13.243362426757812, + "learning_rate": 3.134444018957019e-06, + "lm_loss": 5.4747, + "loss": 1.2538, + "step": 1396, + "text_contrastive_loss": 0.7468, + "train_positive_log_prob": -79.9206, + "train_positive_token_accuracy": 0.0838, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3358, + "epoch": 3.1534988713318284, + "grad_norm": 12.45541763305664, + "learning_rate": 3.1277145127374475e-06, + "lm_loss": 5.5226, + "loss": 1.3376, + "step": 1397, + "text_contrastive_loss": 0.8992, + "train_positive_log_prob": -81.2489, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3468, + "epoch": 3.1557562076749437, + "grad_norm": 13.882940292358398, + "learning_rate": 3.1209889488747813e-06, + "lm_loss": 5.5498, + "loss": 1.2965, + "step": 1398, + "text_contrastive_loss": 0.7894, + "train_positive_log_prob": -80.1709, + "train_positive_token_accuracy": 0.0714, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4173, + "epoch": 3.1580135440180586, + "grad_norm": 12.50239372253418, + "learning_rate": 3.114267341530627e-06, + "lm_loss": 5.3608, + "loss": 1.3413, + "step": 1399, + "text_contrastive_loss": 0.776, + "train_positive_log_prob": -78.5687, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3041, + "epoch": 3.160270880361174, + "grad_norm": 9.832123756408691, + "learning_rate": 3.1075497048582635e-06, + "lm_loss": 5.4149, + "loss": 1.1706, + "step": 1400, + "text_contrastive_loss": 0.65, + "train_positive_log_prob": -79.6804, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.4155, + "epoch": 3.1625282167042887, + "grad_norm": 14.397982597351074, + "learning_rate": 3.1008360530026053e-06, + "lm_loss": 5.4789, + "loss": 1.4562, + "step": 1401, + "text_contrastive_loss": 0.9857, + "train_positive_log_prob": -80.8564, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3093, + "epoch": 3.164785553047404, + "grad_norm": 10.96186351776123, + "learning_rate": 3.0941264001001796e-06, + "lm_loss": 5.3612, + "loss": 1.2329, + "step": 1402, + "text_contrastive_loss": 0.7751, + "train_positive_log_prob": -77.4075, + "train_positive_token_accuracy": 0.0836, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3169, + "epoch": 3.1670428893905194, + "grad_norm": 12.037833213806152, + "learning_rate": 3.0874207602790895e-06, + "lm_loss": 5.414, + "loss": 1.2078, + "step": 1403, + "text_contrastive_loss": 0.6991, + "train_positive_log_prob": -80.4912, + "train_positive_token_accuracy": 0.0828, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3565, + "epoch": 3.1693002257336342, + "grad_norm": 12.856452941894531, + "learning_rate": 3.0807191476589926e-06, + "lm_loss": 5.5385, + "loss": 1.253, + "step": 1404, + "text_contrastive_loss": 0.6853, + "train_positive_log_prob": -79.8116, + "train_positive_token_accuracy": 0.067, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3294, + "epoch": 3.1715575620767495, + "grad_norm": 12.026188850402832, + "learning_rate": 3.0740215763510617e-06, + "lm_loss": 5.4815, + "loss": 1.1745, + "step": 1405, + "text_contrastive_loss": 0.5937, + "train_positive_log_prob": -82.3723, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3659, + "epoch": 3.1738148984198644, + "grad_norm": 12.694238662719727, + "learning_rate": 3.0673280604579623e-06, + "lm_loss": 5.3454, + "loss": 1.2355, + "step": 1406, + "text_contrastive_loss": 0.6701, + "train_positive_log_prob": -80.8065, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3766, + "epoch": 3.1760722347629797, + "grad_norm": 13.556299209594727, + "learning_rate": 3.0606386140738253e-06, + "lm_loss": 5.4415, + "loss": 1.3474, + "step": 1407, + "text_contrastive_loss": 0.8532, + "train_positive_log_prob": -79.0509, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3443, + "epoch": 3.1783295711060946, + "grad_norm": 13.602184295654297, + "learning_rate": 3.053953251284205e-06, + "lm_loss": 5.5407, + "loss": 1.2453, + "step": 1408, + "text_contrastive_loss": 0.6939, + "train_positive_log_prob": -81.9392, + "train_positive_token_accuracy": 0.0721, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3498, + "epoch": 3.18058690744921, + "grad_norm": 14.03065013885498, + "learning_rate": 3.047271986166061e-06, + "lm_loss": 5.4773, + "loss": 1.2551, + "step": 1409, + "text_contrastive_loss": 0.7152, + "train_positive_log_prob": -81.9867, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3331, + "epoch": 3.1828442437923252, + "grad_norm": 11.213570594787598, + "learning_rate": 3.0405948327877233e-06, + "lm_loss": 5.3752, + "loss": 1.265, + "step": 1410, + "text_contrastive_loss": 0.7888, + "train_positive_log_prob": -81.071, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3934, + "epoch": 3.18510158013544, + "grad_norm": 13.14257526397705, + "learning_rate": 3.033921805208867e-06, + "lm_loss": 5.3741, + "loss": 1.371, + "step": 1411, + "text_contrastive_loss": 0.8803, + "train_positive_log_prob": -78.9312, + "train_positive_token_accuracy": 0.0835, + "train_positive_token_prob": 0.033 + }, + { + "contrastive_loss": 0.3391, + "epoch": 3.1873589164785554, + "grad_norm": 13.183470726013184, + "learning_rate": 3.027252917480476e-06, + "lm_loss": 5.4071, + "loss": 1.3322, + "step": 1412, + "text_contrastive_loss": 0.9048, + "train_positive_log_prob": -81.1038, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3836, + "epoch": 3.1896162528216703, + "grad_norm": 12.707260131835938, + "learning_rate": 3.0205881836448186e-06, + "lm_loss": 5.5395, + "loss": 1.3259, + "step": 1413, + "text_contrastive_loss": 0.7769, + "train_positive_log_prob": -81.0576, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3549, + "epoch": 3.1918735891647856, + "grad_norm": 12.8395357131958, + "learning_rate": 3.0139276177354188e-06, + "lm_loss": 5.5229, + "loss": 1.2947, + "step": 1414, + "text_contrastive_loss": 0.7749, + "train_positive_log_prob": -83.4032, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.437, + "epoch": 3.194130925507901, + "grad_norm": 13.19582462310791, + "learning_rate": 3.00727123377702e-06, + "lm_loss": 5.5679, + "loss": 1.4157, + "step": 1415, + "text_contrastive_loss": 0.8439, + "train_positive_log_prob": -82.1062, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3573, + "epoch": 3.1963882618510158, + "grad_norm": 12.016645431518555, + "learning_rate": 3.0006190457855643e-06, + "lm_loss": 5.5663, + "loss": 1.2424, + "step": 1416, + "text_contrastive_loss": 0.6569, + "train_positive_log_prob": -82.9945, + "train_positive_token_accuracy": 0.0904, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3663, + "epoch": 3.198645598194131, + "grad_norm": 14.027999877929688, + "learning_rate": 2.9939710677681545e-06, + "lm_loss": 5.3702, + "loss": 1.354, + "step": 1417, + "text_contrastive_loss": 0.9013, + "train_positive_log_prob": -77.8885, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.0328 + }, + { + "contrastive_loss": 0.3899, + "epoch": 3.200902934537246, + "grad_norm": 13.556990623474121, + "learning_rate": 2.987327313723033e-06, + "lm_loss": 5.4245, + "loss": 1.3048, + "step": 1418, + "text_contrastive_loss": 0.745, + "train_positive_log_prob": -80.0955, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0326 + }, + { + "contrastive_loss": 0.2939, + "epoch": 3.2031602708803613, + "grad_norm": 11.398811340332031, + "learning_rate": 2.980687797639543e-06, + "lm_loss": 5.5718, + "loss": 1.2189, + "step": 1419, + "text_contrastive_loss": 0.7357, + "train_positive_log_prob": -82.2901, + "train_positive_token_accuracy": 0.0725, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.2532, + "epoch": 3.205417607223476, + "grad_norm": 12.629672050476074, + "learning_rate": 2.9740525334981105e-06, + "lm_loss": 5.5239, + "loss": 1.0959, + "step": 1420, + "text_contrastive_loss": 0.5806, + "train_positive_log_prob": -83.3314, + "train_positive_token_accuracy": 0.0901, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3664, + "epoch": 3.2076749435665914, + "grad_norm": 13.47043228149414, + "learning_rate": 2.967421535270203e-06, + "lm_loss": 5.4552, + "loss": 1.2858, + "step": 1421, + "text_contrastive_loss": 0.7477, + "train_positive_log_prob": -80.7847, + "train_positive_token_accuracy": 0.0707, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4736, + "epoch": 3.2099322799097068, + "grad_norm": 14.50823974609375, + "learning_rate": 2.9607948169183077e-06, + "lm_loss": 5.4574, + "loss": 1.4906, + "step": 1422, + "text_contrastive_loss": 0.9425, + "train_positive_log_prob": -81.2438, + "train_positive_token_accuracy": 0.0753, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3314, + "epoch": 3.2121896162528216, + "grad_norm": 12.085290908813477, + "learning_rate": 2.9541723923958975e-06, + "lm_loss": 5.4655, + "loss": 1.219, + "step": 1423, + "text_contrastive_loss": 0.682, + "train_positive_log_prob": -80.2299, + "train_positive_token_accuracy": 0.0841, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3843, + "epoch": 3.214446952595937, + "grad_norm": 14.117097854614258, + "learning_rate": 2.94755427564741e-06, + "lm_loss": 5.4757, + "loss": 1.295, + "step": 1424, + "text_contrastive_loss": 0.7261, + "train_positive_log_prob": -79.1174, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.2945, + "epoch": 3.216704288939052, + "grad_norm": 11.705788612365723, + "learning_rate": 2.9409404806082077e-06, + "lm_loss": 5.4392, + "loss": 1.2172, + "step": 1425, + "text_contrastive_loss": 0.7577, + "train_positive_log_prob": -80.6773, + "train_positive_token_accuracy": 0.0856, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3697, + "epoch": 3.218961625282167, + "grad_norm": 12.81151008605957, + "learning_rate": 2.934331021204551e-06, + "lm_loss": 5.3161, + "loss": 1.2716, + "step": 1426, + "text_contrastive_loss": 0.7407, + "train_positive_log_prob": -76.8153, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0329 + }, + { + "contrastive_loss": 0.2823, + "epoch": 3.221218961625282, + "grad_norm": 12.046260833740234, + "learning_rate": 2.9277259113535774e-06, + "lm_loss": 5.3484, + "loss": 1.1632, + "step": 1427, + "text_contrastive_loss": 0.6922, + "train_positive_log_prob": -78.7619, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3182, + "epoch": 3.2234762979683973, + "grad_norm": 11.892464637756348, + "learning_rate": 2.9211251649632587e-06, + "lm_loss": 5.3481, + "loss": 1.2479, + "step": 1428, + "text_contrastive_loss": 0.7899, + "train_positive_log_prob": -78.0063, + "train_positive_token_accuracy": 0.0758, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.361, + "epoch": 3.2257336343115126, + "grad_norm": 11.71865463256836, + "learning_rate": 2.9145287959323852e-06, + "lm_loss": 5.3531, + "loss": 1.2791, + "step": 1429, + "text_contrastive_loss": 0.7655, + "train_positive_log_prob": -78.9977, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3815, + "epoch": 3.2279909706546275, + "grad_norm": 13.43700122833252, + "learning_rate": 2.9079368181505263e-06, + "lm_loss": 5.4217, + "loss": 1.2926, + "step": 1430, + "text_contrastive_loss": 0.7377, + "train_positive_log_prob": -81.6237, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3478, + "epoch": 3.230248306997743, + "grad_norm": 12.701522827148438, + "learning_rate": 2.9013492454980074e-06, + "lm_loss": 5.455, + "loss": 1.2129, + "step": 1431, + "text_contrastive_loss": 0.6392, + "train_positive_log_prob": -81.2072, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3249, + "epoch": 3.2325056433408577, + "grad_norm": 13.301331520080566, + "learning_rate": 2.894766091845873e-06, + "lm_loss": 5.5228, + "loss": 1.3034, + "step": 1432, + "text_contrastive_loss": 0.8524, + "train_positive_log_prob": -81.4862, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4305, + "epoch": 3.234762979683973, + "grad_norm": 13.721903800964355, + "learning_rate": 2.88818737105587e-06, + "lm_loss": 5.4536, + "loss": 1.4223, + "step": 1433, + "text_contrastive_loss": 0.8927, + "train_positive_log_prob": -79.9291, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3718, + "epoch": 3.237020316027088, + "grad_norm": 12.602433204650879, + "learning_rate": 2.881613096980407e-06, + "lm_loss": 5.3665, + "loss": 1.2694, + "step": 1434, + "text_contrastive_loss": 0.7219, + "train_positive_log_prob": -78.926, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3308, + "epoch": 3.239277652370203, + "grad_norm": 10.810551643371582, + "learning_rate": 2.8750432834625312e-06, + "lm_loss": 5.439, + "loss": 1.287, + "step": 1435, + "text_contrastive_loss": 0.8244, + "train_positive_log_prob": -79.8423, + "train_positive_token_accuracy": 0.0758, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3983, + "epoch": 3.2415349887133185, + "grad_norm": 12.627776145935059, + "learning_rate": 2.8684779443358945e-06, + "lm_loss": 5.5005, + "loss": 1.4065, + "step": 1436, + "text_contrastive_loss": 0.9163, + "train_positive_log_prob": -82.0786, + "train_positive_token_accuracy": 0.0843, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.4057, + "epoch": 3.2437923250564333, + "grad_norm": 13.637645721435547, + "learning_rate": 2.861917093424731e-06, + "lm_loss": 5.4705, + "loss": 1.3629, + "step": 1437, + "text_contrastive_loss": 0.8204, + "train_positive_log_prob": -81.2443, + "train_positive_token_accuracy": 0.0807, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.2901, + "epoch": 3.2460496613995486, + "grad_norm": 12.947152137756348, + "learning_rate": 2.855360744543822e-06, + "lm_loss": 5.3872, + "loss": 1.2553, + "step": 1438, + "text_contrastive_loss": 0.8528, + "train_positive_log_prob": -79.6265, + "train_positive_token_accuracy": 0.0727, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3271, + "epoch": 3.2483069977426635, + "grad_norm": 11.923847198486328, + "learning_rate": 2.8488089114984725e-06, + "lm_loss": 5.3944, + "loss": 1.2383, + "step": 1439, + "text_contrastive_loss": 0.7437, + "train_positive_log_prob": -79.3759, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3317, + "epoch": 3.250564334085779, + "grad_norm": 14.022499084472656, + "learning_rate": 2.84226160808447e-06, + "lm_loss": 5.5343, + "loss": 1.2698, + "step": 1440, + "text_contrastive_loss": 0.7695, + "train_positive_log_prob": -81.2849, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.4461, + "epoch": 3.2528216704288937, + "grad_norm": 13.833677291870117, + "learning_rate": 2.835718848088076e-06, + "lm_loss": 5.4159, + "loss": 1.4095, + "step": 1441, + "text_contrastive_loss": 0.8435, + "train_positive_log_prob": -79.4826, + "train_positive_token_accuracy": 0.0764, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3708, + "epoch": 3.255079006772009, + "grad_norm": 13.702215194702148, + "learning_rate": 2.8291806452859803e-06, + "lm_loss": 5.4113, + "loss": 1.2738, + "step": 1442, + "text_contrastive_loss": 0.7238, + "train_positive_log_prob": -80.1961, + "train_positive_token_accuracy": 0.0838, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.4255, + "epoch": 3.2573363431151243, + "grad_norm": 13.261329650878906, + "learning_rate": 2.822647013445272e-06, + "lm_loss": 5.4667, + "loss": 1.4273, + "step": 1443, + "text_contrastive_loss": 0.9103, + "train_positive_log_prob": -81.1246, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4196, + "epoch": 3.259593679458239, + "grad_norm": 12.944443702697754, + "learning_rate": 2.8161179663234215e-06, + "lm_loss": 5.4491, + "loss": 1.4448, + "step": 1444, + "text_contrastive_loss": 0.9604, + "train_positive_log_prob": -78.9492, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3432, + "epoch": 3.2618510158013545, + "grad_norm": 13.5558443069458, + "learning_rate": 2.809593517668243e-06, + "lm_loss": 5.4923, + "loss": 1.2369, + "step": 1445, + "text_contrastive_loss": 0.6891, + "train_positive_log_prob": -81.678, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4424, + "epoch": 3.2641083521444694, + "grad_norm": 13.479644775390625, + "learning_rate": 2.8030736812178717e-06, + "lm_loss": 5.3268, + "loss": 1.427, + "step": 1446, + "text_contrastive_loss": 0.9039, + "train_positive_log_prob": -77.3635, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3835, + "epoch": 3.2663656884875847, + "grad_norm": 12.056699752807617, + "learning_rate": 2.796558470700723e-06, + "lm_loss": 5.4116, + "loss": 1.3212, + "step": 1447, + "text_contrastive_loss": 0.793, + "train_positive_log_prob": -80.2221, + "train_positive_token_accuracy": 0.0764, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4173, + "epoch": 3.2686230248307, + "grad_norm": 13.352971076965332, + "learning_rate": 2.790047899835479e-06, + "lm_loss": 5.3827, + "loss": 1.35, + "step": 1448, + "text_contrastive_loss": 0.789, + "train_positive_log_prob": -79.5629, + "train_positive_token_accuracy": 0.0843, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.2722, + "epoch": 3.270880361173815, + "grad_norm": 12.81513786315918, + "learning_rate": 2.7835419823310507e-06, + "lm_loss": 5.4121, + "loss": 1.2066, + "step": 1449, + "text_contrastive_loss": 0.7863, + "train_positive_log_prob": -80.3548, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.335, + "epoch": 3.27313769751693, + "grad_norm": 12.399392127990723, + "learning_rate": 2.777040731886549e-06, + "lm_loss": 5.425, + "loss": 1.2649, + "step": 1450, + "text_contrastive_loss": 0.7747, + "train_positive_log_prob": -78.7252, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.2857, + "epoch": 3.275395033860045, + "grad_norm": 11.388596534729004, + "learning_rate": 2.770544162191261e-06, + "lm_loss": 5.5009, + "loss": 1.1544, + "step": 1451, + "text_contrastive_loss": 0.6372, + "train_positive_log_prob": -82.7695, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4289, + "epoch": 3.2776523702031604, + "grad_norm": 14.709929466247559, + "learning_rate": 2.7640522869246134e-06, + "lm_loss": 5.3781, + "loss": 1.2694, + "step": 1452, + "text_contrastive_loss": 0.6053, + "train_positive_log_prob": -79.7052, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3779, + "epoch": 3.2799097065462752, + "grad_norm": 12.865499496459961, + "learning_rate": 2.7575651197561504e-06, + "lm_loss": 5.3772, + "loss": 1.3361, + "step": 1453, + "text_contrastive_loss": 0.841, + "train_positive_log_prob": -79.7705, + "train_positive_token_accuracy": 0.0824, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3744, + "epoch": 3.2821670428893905, + "grad_norm": 14.63218879699707, + "learning_rate": 2.7510826743455037e-06, + "lm_loss": 5.5678, + "loss": 1.2975, + "step": 1454, + "text_contrastive_loss": 0.7327, + "train_positive_log_prob": -84.4762, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4056, + "epoch": 3.2844243792325054, + "grad_norm": 12.933475494384766, + "learning_rate": 2.744604964342364e-06, + "lm_loss": 5.4856, + "loss": 1.3871, + "step": 1455, + "text_contrastive_loss": 0.8658, + "train_positive_log_prob": -80.5967, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3463, + "epoch": 3.2866817155756207, + "grad_norm": 11.56923770904541, + "learning_rate": 2.7381320033864434e-06, + "lm_loss": 5.4014, + "loss": 1.2774, + "step": 1456, + "text_contrastive_loss": 0.782, + "train_positive_log_prob": -80.3429, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3393, + "epoch": 3.288939051918736, + "grad_norm": 15.132230758666992, + "learning_rate": 2.7316638051074605e-06, + "lm_loss": 5.4682, + "loss": 1.2762, + "step": 1457, + "text_contrastive_loss": 0.7802, + "train_positive_log_prob": -80.2082, + "train_positive_token_accuracy": 0.0836, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3408, + "epoch": 3.291196388261851, + "grad_norm": 12.281061172485352, + "learning_rate": 2.72520038312511e-06, + "lm_loss": 5.4734, + "loss": 1.2608, + "step": 1458, + "text_contrastive_loss": 0.7453, + "train_positive_log_prob": -81.8262, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4103, + "epoch": 3.293453724604966, + "grad_norm": 13.09191608428955, + "learning_rate": 2.7187417510490176e-06, + "lm_loss": 5.4509, + "loss": 1.2888, + "step": 1459, + "text_contrastive_loss": 0.6669, + "train_positive_log_prob": -79.7432, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3439, + "epoch": 3.295711060948081, + "grad_norm": 13.085578918457031, + "learning_rate": 2.7122879224787315e-06, + "lm_loss": 5.5375, + "loss": 1.3131, + "step": 1460, + "text_contrastive_loss": 0.8309, + "train_positive_log_prob": -81.419, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.4153, + "epoch": 3.2979683972911964, + "grad_norm": 14.652360916137695, + "learning_rate": 2.7058389110036835e-06, + "lm_loss": 5.4822, + "loss": 1.3402, + "step": 1461, + "text_contrastive_loss": 0.7532, + "train_positive_log_prob": -81.0731, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.3691, + "epoch": 3.3002257336343117, + "grad_norm": 13.3486967086792, + "learning_rate": 2.6993947302031643e-06, + "lm_loss": 5.3724, + "loss": 1.3395, + "step": 1462, + "text_contrastive_loss": 0.8664, + "train_positive_log_prob": -78.4713, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.2819, + "epoch": 3.3024830699774266, + "grad_norm": 12.146235466003418, + "learning_rate": 2.692955393646286e-06, + "lm_loss": 5.4441, + "loss": 1.1536, + "step": 1463, + "text_contrastive_loss": 0.6546, + "train_positive_log_prob": -80.8992, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.2486, + "epoch": 3.304740406320542, + "grad_norm": 10.897273063659668, + "learning_rate": 2.686520914891968e-06, + "lm_loss": 5.4241, + "loss": 1.0404, + "step": 1464, + "text_contrastive_loss": 0.4988, + "train_positive_log_prob": -79.5393, + "train_positive_token_accuracy": 0.0826, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.308, + "epoch": 3.3069977426636568, + "grad_norm": 11.77372932434082, + "learning_rate": 2.6800913074888984e-06, + "lm_loss": 5.513, + "loss": 1.2164, + "step": 1465, + "text_contrastive_loss": 0.7142, + "train_positive_log_prob": -83.4052, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2628, + "epoch": 3.309255079006772, + "grad_norm": 12.088827133178711, + "learning_rate": 2.6736665849755073e-06, + "lm_loss": 5.4021, + "loss": 1.1425, + "step": 1466, + "text_contrastive_loss": 0.6789, + "train_positive_log_prob": -78.7537, + "train_positive_token_accuracy": 0.0753, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3914, + "epoch": 3.311512415349887, + "grad_norm": 13.151571273803711, + "learning_rate": 2.6672467608799413e-06, + "lm_loss": 5.4911, + "loss": 1.394, + "step": 1467, + "text_contrastive_loss": 0.907, + "train_positive_log_prob": -80.8498, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.3793, + "epoch": 3.3137697516930023, + "grad_norm": 11.619816780090332, + "learning_rate": 2.660831848720028e-06, + "lm_loss": 5.452, + "loss": 1.3058, + "step": 1468, + "text_contrastive_loss": 0.7626, + "train_positive_log_prob": -80.5445, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3849, + "epoch": 3.3160270880361176, + "grad_norm": 14.064322471618652, + "learning_rate": 2.654421862003256e-06, + "lm_loss": 5.4731, + "loss": 1.2844, + "step": 1469, + "text_contrastive_loss": 0.7044, + "train_positive_log_prob": -80.1711, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3282, + "epoch": 3.3182844243792324, + "grad_norm": 14.075796127319336, + "learning_rate": 2.648016814226742e-06, + "lm_loss": 5.6378, + "loss": 1.28, + "step": 1470, + "text_contrastive_loss": 0.776, + "train_positive_log_prob": -84.744, + "train_positive_token_accuracy": 0.073, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.4015, + "epoch": 3.3205417607223477, + "grad_norm": 13.909421920776367, + "learning_rate": 2.6416167188772052e-06, + "lm_loss": 5.4708, + "loss": 1.315, + "step": 1471, + "text_contrastive_loss": 0.7328, + "train_positive_log_prob": -82.4255, + "train_positive_token_accuracy": 0.077, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4158, + "epoch": 3.3227990970654626, + "grad_norm": 15.058327674865723, + "learning_rate": 2.6352215894309306e-06, + "lm_loss": 5.4079, + "loss": 1.3381, + "step": 1472, + "text_contrastive_loss": 0.763, + "train_positive_log_prob": -81.0759, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3598, + "epoch": 3.325056433408578, + "grad_norm": 11.790754318237305, + "learning_rate": 2.6288314393537522e-06, + "lm_loss": 5.5039, + "loss": 1.2939, + "step": 1473, + "text_contrastive_loss": 0.7674, + "train_positive_log_prob": -81.0973, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3667, + "epoch": 3.327313769751693, + "grad_norm": 11.023698806762695, + "learning_rate": 2.6224462821010185e-06, + "lm_loss": 5.4978, + "loss": 1.3219, + "step": 1474, + "text_contrastive_loss": 0.811, + "train_positive_log_prob": -80.986, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3879, + "epoch": 3.329571106094808, + "grad_norm": 14.934098243713379, + "learning_rate": 2.616066131117563e-06, + "lm_loss": 5.4535, + "loss": 1.3026, + "step": 1475, + "text_contrastive_loss": 0.7387, + "train_positive_log_prob": -79.2843, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3735, + "epoch": 3.3318284424379234, + "grad_norm": 13.214871406555176, + "learning_rate": 2.6096909998376794e-06, + "lm_loss": 5.3928, + "loss": 1.3585, + "step": 1476, + "text_contrastive_loss": 0.8914, + "train_positive_log_prob": -81.1264, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3439, + "epoch": 3.3340857787810383, + "grad_norm": 12.831974983215332, + "learning_rate": 2.6033209016850926e-06, + "lm_loss": 5.5281, + "loss": 1.2859, + "step": 1477, + "text_contrastive_loss": 0.7784, + "train_positive_log_prob": -82.962, + "train_positive_token_accuracy": 0.077, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3613, + "epoch": 3.3363431151241536, + "grad_norm": 13.602977752685547, + "learning_rate": 2.596955850072928e-06, + "lm_loss": 5.4166, + "loss": 1.2874, + "step": 1478, + "text_contrastive_loss": 0.7688, + "train_positive_log_prob": -80.0842, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4305, + "epoch": 3.3386004514672685, + "grad_norm": 13.101784706115723, + "learning_rate": 2.5905958584036826e-06, + "lm_loss": 5.6299, + "loss": 1.4242, + "step": 1479, + "text_contrastive_loss": 0.8613, + "train_positive_log_prob": -82.6688, + "train_positive_token_accuracy": 0.0732, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.2826, + "epoch": 3.340857787810384, + "grad_norm": 10.809186935424805, + "learning_rate": 2.5842409400692026e-06, + "lm_loss": 5.505, + "loss": 1.1304, + "step": 1480, + "text_contrastive_loss": 0.5947, + "train_positive_log_prob": -80.1251, + "train_positive_token_accuracy": 0.08, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3325, + "epoch": 3.343115124153499, + "grad_norm": 11.67747974395752, + "learning_rate": 2.577891108450651e-06, + "lm_loss": 5.5436, + "loss": 1.2694, + "step": 1481, + "text_contrastive_loss": 0.765, + "train_positive_log_prob": -81.6404, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3285, + "epoch": 3.345372460496614, + "grad_norm": 12.611127853393555, + "learning_rate": 2.571546376918479e-06, + "lm_loss": 5.3745, + "loss": 1.2195, + "step": 1482, + "text_contrastive_loss": 0.7071, + "train_positive_log_prob": -80.1135, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3923, + "epoch": 3.3476297968397293, + "grad_norm": 12.921036720275879, + "learning_rate": 2.5652067588324015e-06, + "lm_loss": 5.4501, + "loss": 1.3112, + "step": 1483, + "text_contrastive_loss": 0.7478, + "train_positive_log_prob": -79.136, + "train_positive_token_accuracy": 0.0866, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3104, + "epoch": 3.349887133182844, + "grad_norm": 11.626730918884277, + "learning_rate": 2.55887226754136e-06, + "lm_loss": 5.3329, + "loss": 1.2155, + "step": 1484, + "text_contrastive_loss": 0.7437, + "train_positive_log_prob": -76.7685, + "train_positive_token_accuracy": 0.0841, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.4823, + "epoch": 3.3521444695259595, + "grad_norm": 14.942593574523926, + "learning_rate": 2.552542916383507e-06, + "lm_loss": 5.4315, + "loss": 1.4322, + "step": 1485, + "text_contrastive_loss": 0.8135, + "train_positive_log_prob": -77.445, + "train_positive_token_accuracy": 0.0866, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4056, + "epoch": 3.3544018058690743, + "grad_norm": 14.18901252746582, + "learning_rate": 2.5462187186861697e-06, + "lm_loss": 5.4127, + "loss": 1.3467, + "step": 1486, + "text_contrastive_loss": 0.7997, + "train_positive_log_prob": -81.2758, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.2981, + "epoch": 3.3566591422121896, + "grad_norm": 12.61660385131836, + "learning_rate": 2.5398996877658256e-06, + "lm_loss": 5.5116, + "loss": 1.2088, + "step": 1487, + "text_contrastive_loss": 0.7191, + "train_positive_log_prob": -82.8459, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3404, + "epoch": 3.3589164785553045, + "grad_norm": 14.2517728805542, + "learning_rate": 2.5335858369280674e-06, + "lm_loss": 5.4921, + "loss": 1.2979, + "step": 1488, + "text_contrastive_loss": 0.8167, + "train_positive_log_prob": -83.4503, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3578, + "epoch": 3.36117381489842, + "grad_norm": 15.055774688720703, + "learning_rate": 2.5272771794675866e-06, + "lm_loss": 5.6668, + "loss": 1.3308, + "step": 1489, + "text_contrastive_loss": 0.8126, + "train_positive_log_prob": -86.3078, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.423, + "epoch": 3.363431151241535, + "grad_norm": 13.894858360290527, + "learning_rate": 2.5209737286681367e-06, + "lm_loss": 5.4839, + "loss": 1.4224, + "step": 1490, + "text_contrastive_loss": 0.902, + "train_positive_log_prob": -81.4243, + "train_positive_token_accuracy": 0.0713, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3009, + "epoch": 3.36568848758465, + "grad_norm": 13.374127388000488, + "learning_rate": 2.514675497802508e-06, + "lm_loss": 5.5744, + "loss": 1.2516, + "step": 1491, + "text_contrastive_loss": 0.7866, + "train_positive_log_prob": -81.8286, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.283, + "epoch": 3.3679458239277653, + "grad_norm": 12.251059532165527, + "learning_rate": 2.508382500132499e-06, + "lm_loss": 5.4674, + "loss": 1.1953, + "step": 1492, + "text_contrastive_loss": 0.7312, + "train_positive_log_prob": -80.6066, + "train_positive_token_accuracy": 0.0859, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3043, + "epoch": 3.37020316027088, + "grad_norm": 12.047136306762695, + "learning_rate": 2.50209474890889e-06, + "lm_loss": 5.3919, + "loss": 1.2822, + "step": 1493, + "text_contrastive_loss": 0.8774, + "train_positive_log_prob": -79.0875, + "train_positive_token_accuracy": 0.077, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3285, + "epoch": 3.3724604966139955, + "grad_norm": 12.00024700164795, + "learning_rate": 2.495812257371416e-06, + "lm_loss": 5.3934, + "loss": 1.2672, + "step": 1494, + "text_contrastive_loss": 0.7987, + "train_positive_log_prob": -79.4826, + "train_positive_token_accuracy": 0.0734, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3212, + "epoch": 3.374717832957111, + "grad_norm": 13.271804809570312, + "learning_rate": 2.4895350387487304e-06, + "lm_loss": 5.6926, + "loss": 1.2715, + "step": 1495, + "text_contrastive_loss": 0.7622, + "train_positive_log_prob": -84.141, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.2915, + "epoch": 3.3769751693002257, + "grad_norm": 12.15084171295166, + "learning_rate": 2.4832631062583906e-06, + "lm_loss": 5.3955, + "loss": 1.2233, + "step": 1496, + "text_contrastive_loss": 0.7846, + "train_positive_log_prob": -79.1845, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4389, + "epoch": 3.379232505643341, + "grad_norm": 14.188138961791992, + "learning_rate": 2.47699647310682e-06, + "lm_loss": 5.5363, + "loss": 1.4373, + "step": 1497, + "text_contrastive_loss": 0.8895, + "train_positive_log_prob": -85.08, + "train_positive_token_accuracy": 0.0844, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3046, + "epoch": 3.381489841986456, + "grad_norm": 12.234025955200195, + "learning_rate": 2.470735152489287e-06, + "lm_loss": 5.441, + "loss": 1.1393, + "step": 1498, + "text_contrastive_loss": 0.5812, + "train_positive_log_prob": -81.3027, + "train_positive_token_accuracy": 0.0863, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.4114, + "epoch": 3.383747178329571, + "grad_norm": 13.649312019348145, + "learning_rate": 2.4644791575898665e-06, + "lm_loss": 5.5644, + "loss": 1.4019, + "step": 1499, + "text_contrastive_loss": 0.8682, + "train_positive_log_prob": -83.4828, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3366, + "epoch": 3.386004514672686, + "grad_norm": 12.053716659545898, + "learning_rate": 2.4582285015814263e-06, + "lm_loss": 5.4297, + "loss": 1.1897, + "step": 1500, + "text_contrastive_loss": 0.6204, + "train_positive_log_prob": -81.8915, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3712, + "epoch": 3.3882618510158014, + "grad_norm": 14.610511779785156, + "learning_rate": 2.4519831976255892e-06, + "lm_loss": 5.4961, + "loss": 1.3351, + "step": 1501, + "text_contrastive_loss": 0.8287, + "train_positive_log_prob": -80.1229, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.298, + "epoch": 3.3905191873589167, + "grad_norm": 12.692839622497559, + "learning_rate": 2.445743258872711e-06, + "lm_loss": 5.4693, + "loss": 1.1698, + "step": 1502, + "text_contrastive_loss": 0.6497, + "train_positive_log_prob": -81.2726, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.4164, + "epoch": 3.3927765237020315, + "grad_norm": 14.705878257751465, + "learning_rate": 2.4395086984618486e-06, + "lm_loss": 5.5159, + "loss": 1.3115, + "step": 1503, + "text_contrastive_loss": 0.6869, + "train_positive_log_prob": -80.0016, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3459, + "epoch": 3.395033860045147, + "grad_norm": 13.019169807434082, + "learning_rate": 2.433279529520732e-06, + "lm_loss": 5.4324, + "loss": 1.3071, + "step": 1504, + "text_contrastive_loss": 0.836, + "train_positive_log_prob": -80.4066, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3806, + "epoch": 3.3972911963882617, + "grad_norm": 12.697392463684082, + "learning_rate": 2.427055765165741e-06, + "lm_loss": 5.4354, + "loss": 1.3435, + "step": 1505, + "text_contrastive_loss": 0.8388, + "train_positive_log_prob": -80.6427, + "train_positive_token_accuracy": 0.0835, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3227, + "epoch": 3.399548532731377, + "grad_norm": 12.562392234802246, + "learning_rate": 2.420837418501876e-06, + "lm_loss": 5.3875, + "loss": 1.2334, + "step": 1506, + "text_contrastive_loss": 0.7438, + "train_positive_log_prob": -77.6122, + "train_positive_token_accuracy": 0.0841, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.352, + "epoch": 3.401805869074492, + "grad_norm": 14.444911003112793, + "learning_rate": 2.414624502622731e-06, + "lm_loss": 5.4369, + "loss": 1.2929, + "step": 1507, + "text_contrastive_loss": 0.7944, + "train_positive_log_prob": -78.5596, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.328, + "epoch": 3.404063205417607, + "grad_norm": 12.534345626831055, + "learning_rate": 2.408417030610457e-06, + "lm_loss": 5.4775, + "loss": 1.2484, + "step": 1508, + "text_contrastive_loss": 0.7452, + "train_positive_log_prob": -80.2411, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3472, + "epoch": 3.4063205417607225, + "grad_norm": 14.023134231567383, + "learning_rate": 2.4022150155357526e-06, + "lm_loss": 5.5629, + "loss": 1.2362, + "step": 1509, + "text_contrastive_loss": 0.6654, + "train_positive_log_prob": -84.7518, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3415, + "epoch": 3.4085778781038374, + "grad_norm": 11.74463176727295, + "learning_rate": 2.396018470457821e-06, + "lm_loss": 5.3451, + "loss": 1.2499, + "step": 1510, + "text_contrastive_loss": 0.7476, + "train_positive_log_prob": -78.4643, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.2724, + "epoch": 3.4108352144469527, + "grad_norm": 12.518877029418945, + "learning_rate": 2.389827408424345e-06, + "lm_loss": 5.5256, + "loss": 1.1175, + "step": 1511, + "text_contrastive_loss": 0.5852, + "train_positive_log_prob": -82.9249, + "train_positive_token_accuracy": 0.0742, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4259, + "epoch": 3.4130925507900676, + "grad_norm": 13.404441833496094, + "learning_rate": 2.3836418424714665e-06, + "lm_loss": 5.3805, + "loss": 1.4151, + "step": 1512, + "text_contrastive_loss": 0.9023, + "train_positive_log_prob": -80.1172, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3436, + "epoch": 3.415349887133183, + "grad_norm": 13.044618606567383, + "learning_rate": 2.377461785623752e-06, + "lm_loss": 5.3992, + "loss": 1.2625, + "step": 1513, + "text_contrastive_loss": 0.758, + "train_positive_log_prob": -78.9232, + "train_positive_token_accuracy": 0.0708, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.333, + "epoch": 3.417607223476298, + "grad_norm": 11.65329647064209, + "learning_rate": 2.3712872508941714e-06, + "lm_loss": 5.4098, + "loss": 1.2041, + "step": 1514, + "text_contrastive_loss": 0.6602, + "train_positive_log_prob": -82.1584, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3086, + "epoch": 3.419864559819413, + "grad_norm": 12.141254425048828, + "learning_rate": 2.3651182512840604e-06, + "lm_loss": 5.4426, + "loss": 1.241, + "step": 1515, + "text_contrastive_loss": 0.7764, + "train_positive_log_prob": -80.1312, + "train_positive_token_accuracy": 0.069, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.3512, + "epoch": 3.4221218961625284, + "grad_norm": 12.427938461303711, + "learning_rate": 2.358954799783106e-06, + "lm_loss": 5.3755, + "loss": 1.3403, + "step": 1516, + "text_contrastive_loss": 0.9031, + "train_positive_log_prob": -79.6495, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3297, + "epoch": 3.4243792325056432, + "grad_norm": 12.42595100402832, + "learning_rate": 2.3527969093693105e-06, + "lm_loss": 5.5102, + "loss": 1.2681, + "step": 1517, + "text_contrastive_loss": 0.7747, + "train_positive_log_prob": -81.852, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3057, + "epoch": 3.4266365688487586, + "grad_norm": 10.852728843688965, + "learning_rate": 2.346644593008966e-06, + "lm_loss": 5.4427, + "loss": 1.1988, + "step": 1518, + "text_contrastive_loss": 0.6976, + "train_positive_log_prob": -82.0853, + "train_positive_token_accuracy": 0.0855, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.2503, + "epoch": 3.4288939051918734, + "grad_norm": 10.33969783782959, + "learning_rate": 2.3404978636566312e-06, + "lm_loss": 5.3858, + "loss": 1.177, + "step": 1519, + "text_contrastive_loss": 0.7762, + "train_positive_log_prob": -80.8463, + "train_positive_token_accuracy": 0.0814, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3294, + "epoch": 3.4311512415349887, + "grad_norm": 14.799424171447754, + "learning_rate": 2.3343567342550933e-06, + "lm_loss": 5.4364, + "loss": 1.2267, + "step": 1520, + "text_contrastive_loss": 0.7072, + "train_positive_log_prob": -79.1218, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3184, + "epoch": 3.4334085778781036, + "grad_norm": 12.71818733215332, + "learning_rate": 2.328221217735355e-06, + "lm_loss": 5.3584, + "loss": 1.1865, + "step": 1521, + "text_contrastive_loss": 0.6645, + "train_positive_log_prob": -78.6976, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.397, + "epoch": 3.435665914221219, + "grad_norm": 13.054512023925781, + "learning_rate": 2.322091327016597e-06, + "lm_loss": 5.3944, + "loss": 1.3944, + "step": 1522, + "text_contrastive_loss": 0.9158, + "train_positive_log_prob": -79.7106, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3239, + "epoch": 3.4379232505643342, + "grad_norm": 12.34341049194336, + "learning_rate": 2.3159670750061563e-06, + "lm_loss": 5.4866, + "loss": 1.1989, + "step": 1523, + "text_contrastive_loss": 0.6526, + "train_positive_log_prob": -81.8706, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3727, + "epoch": 3.440180586907449, + "grad_norm": 13.520145416259766, + "learning_rate": 2.3098484745994933e-06, + "lm_loss": 5.4234, + "loss": 1.3206, + "step": 1524, + "text_contrastive_loss": 0.8111, + "train_positive_log_prob": -83.407, + "train_positive_token_accuracy": 0.0726, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3323, + "epoch": 3.4424379232505644, + "grad_norm": 12.443741798400879, + "learning_rate": 2.3037355386801683e-06, + "lm_loss": 5.4063, + "loss": 1.1927, + "step": 1525, + "text_contrastive_loss": 0.6395, + "train_positive_log_prob": -79.2818, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3182, + "epoch": 3.4446952595936793, + "grad_norm": 11.998956680297852, + "learning_rate": 2.2976282801198237e-06, + "lm_loss": 5.4199, + "loss": 1.1773, + "step": 1526, + "text_contrastive_loss": 0.6341, + "train_positive_log_prob": -80.3, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4168, + "epoch": 3.4469525959367946, + "grad_norm": 14.485918998718262, + "learning_rate": 2.2915267117781328e-06, + "lm_loss": 5.4367, + "loss": 1.4144, + "step": 1527, + "text_contrastive_loss": 0.9079, + "train_positive_log_prob": -80.6141, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3213, + "epoch": 3.44920993227991, + "grad_norm": 11.161625862121582, + "learning_rate": 2.2854308465027963e-06, + "lm_loss": 5.4661, + "loss": 1.2426, + "step": 1528, + "text_contrastive_loss": 0.7495, + "train_positive_log_prob": -82.3303, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3388, + "epoch": 3.4514672686230248, + "grad_norm": 12.288647651672363, + "learning_rate": 2.279340697129505e-06, + "lm_loss": 5.423, + "loss": 1.2354, + "step": 1529, + "text_contrastive_loss": 0.7085, + "train_positive_log_prob": -80.7308, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3776, + "epoch": 3.45372460496614, + "grad_norm": 11.723231315612793, + "learning_rate": 2.2732562764819157e-06, + "lm_loss": 5.4406, + "loss": 1.2894, + "step": 1530, + "text_contrastive_loss": 0.7356, + "train_positive_log_prob": -81.5258, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.285, + "epoch": 3.455981941309255, + "grad_norm": 12.01729965209961, + "learning_rate": 2.267177597371616e-06, + "lm_loss": 5.3329, + "loss": 1.1978, + "step": 1531, + "text_contrastive_loss": 0.7591, + "train_positive_log_prob": -81.1599, + "train_positive_token_accuracy": 0.0842, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.3579, + "epoch": 3.4582392776523703, + "grad_norm": 12.327338218688965, + "learning_rate": 2.26110467259811e-06, + "lm_loss": 5.3666, + "loss": 1.2861, + "step": 1532, + "text_contrastive_loss": 0.7831, + "train_positive_log_prob": -79.0154, + "train_positive_token_accuracy": 0.0849, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.3169, + "epoch": 3.460496613995485, + "grad_norm": 12.386646270751953, + "learning_rate": 2.255037514948785e-06, + "lm_loss": 5.3893, + "loss": 1.2432, + "step": 1533, + "text_contrastive_loss": 0.7747, + "train_positive_log_prob": -79.8302, + "train_positive_token_accuracy": 0.0865, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.413, + "epoch": 3.4627539503386005, + "grad_norm": 14.126786231994629, + "learning_rate": 2.2489761371988826e-06, + "lm_loss": 5.3938, + "loss": 1.3778, + "step": 1534, + "text_contrastive_loss": 0.8507, + "train_positive_log_prob": -81.2654, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3497, + "epoch": 3.4650112866817158, + "grad_norm": 15.412215232849121, + "learning_rate": 2.242920552111473e-06, + "lm_loss": 5.3937, + "loss": 1.2585, + "step": 1535, + "text_contrastive_loss": 0.7389, + "train_positive_log_prob": -80.4813, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3951, + "epoch": 3.4672686230248306, + "grad_norm": 13.417200088500977, + "learning_rate": 2.236870772437433e-06, + "lm_loss": 5.5269, + "loss": 1.3807, + "step": 1536, + "text_contrastive_loss": 0.8658, + "train_positive_log_prob": -81.4793, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3015, + "epoch": 3.469525959367946, + "grad_norm": 12.396004676818848, + "learning_rate": 2.2308268109154126e-06, + "lm_loss": 5.525, + "loss": 1.2115, + "step": 1537, + "text_contrastive_loss": 0.715, + "train_positive_log_prob": -82.4661, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3918, + "epoch": 3.471783295711061, + "grad_norm": 13.730256080627441, + "learning_rate": 2.224788680271811e-06, + "lm_loss": 5.4208, + "loss": 1.3069, + "step": 1538, + "text_contrastive_loss": 0.7459, + "train_positive_log_prob": -81.7353, + "train_positive_token_accuracy": 0.0764, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3635, + "epoch": 3.474040632054176, + "grad_norm": 13.051783561706543, + "learning_rate": 2.218756393220753e-06, + "lm_loss": 5.5327, + "loss": 1.3163, + "step": 1539, + "text_contrastive_loss": 0.799, + "train_positive_log_prob": -81.5636, + "train_positive_token_accuracy": 0.0713, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.2739, + "epoch": 3.476297968397291, + "grad_norm": 11.161017417907715, + "learning_rate": 2.212729962464051e-06, + "lm_loss": 5.4738, + "loss": 1.157, + "step": 1540, + "text_contrastive_loss": 0.6714, + "train_positive_log_prob": -80.4169, + "train_positive_token_accuracy": 0.0867, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.399, + "epoch": 3.4785553047404063, + "grad_norm": 13.931135177612305, + "learning_rate": 2.2067094006911943e-06, + "lm_loss": 5.3826, + "loss": 1.42, + "step": 1541, + "text_contrastive_loss": 0.9656, + "train_positive_log_prob": -79.5854, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.327, + "epoch": 3.4808126410835216, + "grad_norm": 12.971481323242188, + "learning_rate": 2.2006947205793107e-06, + "lm_loss": 5.3727, + "loss": 1.2679, + "step": 1542, + "text_contrastive_loss": 0.8071, + "train_positive_log_prob": -78.2375, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3923, + "epoch": 3.4830699774266365, + "grad_norm": 14.117362976074219, + "learning_rate": 2.1946859347931442e-06, + "lm_loss": 5.4001, + "loss": 1.3156, + "step": 1543, + "text_contrastive_loss": 0.7665, + "train_positive_log_prob": -78.6815, + "train_positive_token_accuracy": 0.0847, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.3723, + "epoch": 3.485327313769752, + "grad_norm": 12.617060661315918, + "learning_rate": 2.1886830559850264e-06, + "lm_loss": 5.396, + "loss": 1.285, + "step": 1544, + "text_contrastive_loss": 0.7461, + "train_positive_log_prob": -78.2191, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3351, + "epoch": 3.4875846501128667, + "grad_norm": 12.729511260986328, + "learning_rate": 2.182686096794852e-06, + "lm_loss": 5.4622, + "loss": 1.2903, + "step": 1545, + "text_contrastive_loss": 0.818, + "train_positive_log_prob": -81.4419, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3362, + "epoch": 3.489841986455982, + "grad_norm": 13.382636070251465, + "learning_rate": 2.176695069850053e-06, + "lm_loss": 5.4945, + "loss": 1.2525, + "step": 1546, + "text_contrastive_loss": 0.7338, + "train_positive_log_prob": -79.9313, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3893, + "epoch": 3.4920993227990973, + "grad_norm": 13.417473793029785, + "learning_rate": 2.1707099877655634e-06, + "lm_loss": 5.3825, + "loss": 1.32, + "step": 1547, + "text_contrastive_loss": 0.7847, + "train_positive_log_prob": -80.5472, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.2652, + "epoch": 3.494356659142212, + "grad_norm": 11.2703857421875, + "learning_rate": 2.1647308631438068e-06, + "lm_loss": 5.358, + "loss": 1.1719, + "step": 1548, + "text_contrastive_loss": 0.7418, + "train_positive_log_prob": -79.5092, + "train_positive_token_accuracy": 0.0882, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.3119, + "epoch": 3.4966139954853275, + "grad_norm": 13.06146240234375, + "learning_rate": 2.1587577085746596e-06, + "lm_loss": 5.4155, + "loss": 1.2043, + "step": 1549, + "text_contrastive_loss": 0.7018, + "train_positive_log_prob": -78.2922, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.0326 + }, + { + "contrastive_loss": 0.414, + "epoch": 3.4988713318284423, + "grad_norm": 14.1589937210083, + "learning_rate": 2.1527905366354292e-06, + "lm_loss": 5.3184, + "loss": 1.2823, + "step": 1550, + "text_contrastive_loss": 0.673, + "train_positive_log_prob": -79.1563, + "train_positive_token_accuracy": 0.0777, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4281, + "epoch": 3.5011286681715577, + "grad_norm": 13.723095893859863, + "learning_rate": 2.14682935989082e-06, + "lm_loss": 5.3547, + "loss": 1.351, + "step": 1551, + "text_contrastive_loss": 0.7749, + "train_positive_log_prob": -77.521, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3478, + "epoch": 3.5033860045146725, + "grad_norm": 13.135018348693848, + "learning_rate": 2.14087419089292e-06, + "lm_loss": 5.3895, + "loss": 1.29, + "step": 1552, + "text_contrastive_loss": 0.8065, + "train_positive_log_prob": -80.1069, + "train_positive_token_accuracy": 0.0857, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.2952, + "epoch": 3.505643340857788, + "grad_norm": 11.934090614318848, + "learning_rate": 2.1349250421811622e-06, + "lm_loss": 5.3701, + "loss": 1.1954, + "step": 1553, + "text_contrastive_loss": 0.7264, + "train_positive_log_prob": -80.7787, + "train_positive_token_accuracy": 0.0836, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2546, + "epoch": 3.5079006772009027, + "grad_norm": 12.147294044494629, + "learning_rate": 2.1289819262823065e-06, + "lm_loss": 5.426, + "loss": 1.1223, + "step": 1554, + "text_contrastive_loss": 0.6503, + "train_positive_log_prob": -78.7625, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3776, + "epoch": 3.510158013544018, + "grad_norm": 14.151532173156738, + "learning_rate": 2.1230448557104087e-06, + "lm_loss": 5.3315, + "loss": 1.2501, + "step": 1555, + "text_contrastive_loss": 0.6787, + "train_positive_log_prob": -77.3484, + "train_positive_token_accuracy": 0.0867, + "train_positive_token_prob": 0.0338 + }, + { + "contrastive_loss": 0.3334, + "epoch": 3.5124153498871333, + "grad_norm": 12.25490665435791, + "learning_rate": 2.117113842966792e-06, + "lm_loss": 5.5106, + "loss": 1.2282, + "step": 1556, + "text_contrastive_loss": 0.6875, + "train_positive_log_prob": -81.1052, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4045, + "epoch": 3.514672686230248, + "grad_norm": 13.677350044250488, + "learning_rate": 2.111188900540028e-06, + "lm_loss": 5.3986, + "loss": 1.3026, + "step": 1557, + "text_contrastive_loss": 0.7165, + "train_positive_log_prob": -81.8014, + "train_positive_token_accuracy": 0.0869, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.2848, + "epoch": 3.5169300225733635, + "grad_norm": 10.790732383728027, + "learning_rate": 2.1052700409059057e-06, + "lm_loss": 5.4969, + "loss": 1.1378, + "step": 1558, + "text_contrastive_loss": 0.6067, + "train_positive_log_prob": -82.6206, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.297, + "epoch": 3.5191873589164784, + "grad_norm": 13.040787696838379, + "learning_rate": 2.0993572765274044e-06, + "lm_loss": 5.5216, + "loss": 1.2618, + "step": 1559, + "text_contrastive_loss": 0.8254, + "train_positive_log_prob": -80.8334, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.2941, + "epoch": 3.5214446952595937, + "grad_norm": 12.020641326904297, + "learning_rate": 2.093450619854671e-06, + "lm_loss": 5.55, + "loss": 1.2966, + "step": 1560, + "text_contrastive_loss": 0.895, + "train_positive_log_prob": -82.4666, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3236, + "epoch": 3.523702031602709, + "grad_norm": 13.676803588867188, + "learning_rate": 2.08755008332499e-06, + "lm_loss": 5.4172, + "loss": 1.2455, + "step": 1561, + "text_contrastive_loss": 0.7605, + "train_positive_log_prob": -80.858, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3723, + "epoch": 3.525959367945824, + "grad_norm": 13.02118968963623, + "learning_rate": 2.0816556793627624e-06, + "lm_loss": 5.4805, + "loss": 1.3172, + "step": 1562, + "text_contrastive_loss": 0.7937, + "train_positive_log_prob": -82.3814, + "train_positive_token_accuracy": 0.0826, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4239, + "epoch": 3.528216704288939, + "grad_norm": 13.896027565002441, + "learning_rate": 2.0757674203794696e-06, + "lm_loss": 5.5036, + "loss": 1.3446, + "step": 1563, + "text_contrastive_loss": 0.7407, + "train_positive_log_prob": -83.6078, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3236, + "epoch": 3.530474040632054, + "grad_norm": 12.533717155456543, + "learning_rate": 2.06988531877366e-06, + "lm_loss": 5.2708, + "loss": 1.2117, + "step": 1564, + "text_contrastive_loss": 0.722, + "train_positive_log_prob": -77.4799, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3532, + "epoch": 3.5327313769751694, + "grad_norm": 12.881248474121094, + "learning_rate": 2.064009386930915e-06, + "lm_loss": 5.4923, + "loss": 1.3257, + "step": 1565, + "text_contrastive_loss": 0.8466, + "train_positive_log_prob": -80.1536, + "train_positive_token_accuracy": 0.0846, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4385, + "epoch": 3.5349887133182847, + "grad_norm": 14.63247299194336, + "learning_rate": 2.0581396372238254e-06, + "lm_loss": 5.3747, + "loss": 1.3829, + "step": 1566, + "text_contrastive_loss": 0.8139, + "train_positive_log_prob": -78.5237, + "train_positive_token_accuracy": 0.0751, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3244, + "epoch": 3.5372460496613995, + "grad_norm": 13.624622344970703, + "learning_rate": 2.0522760820119615e-06, + "lm_loss": 5.4967, + "loss": 1.2315, + "step": 1567, + "text_contrastive_loss": 0.715, + "train_positive_log_prob": -82.5798, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.462, + "epoch": 3.5395033860045144, + "grad_norm": 16.469789505004883, + "learning_rate": 2.046418733641853e-06, + "lm_loss": 5.5086, + "loss": 1.4521, + "step": 1568, + "text_contrastive_loss": 0.8784, + "train_positive_log_prob": -81.2941, + "train_positive_token_accuracy": 0.077, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3068, + "epoch": 3.5417607223476297, + "grad_norm": 13.421969413757324, + "learning_rate": 2.04056760444696e-06, + "lm_loss": 5.5623, + "loss": 1.2227, + "step": 1569, + "text_contrastive_loss": 0.7193, + "train_positive_log_prob": -85.6535, + "train_positive_token_accuracy": 0.0873, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3354, + "epoch": 3.544018058690745, + "grad_norm": 12.142635345458984, + "learning_rate": 2.0347227067476478e-06, + "lm_loss": 5.4408, + "loss": 1.3561, + "step": 1570, + "text_contrastive_loss": 0.9531, + "train_positive_log_prob": -79.1458, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.2655, + "epoch": 3.54627539503386, + "grad_norm": 10.728946685791016, + "learning_rate": 2.02888405285116e-06, + "lm_loss": 5.4376, + "loss": 1.1793, + "step": 1571, + "text_contrastive_loss": 0.74, + "train_positive_log_prob": -78.5897, + "train_positive_token_accuracy": 0.073, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.418, + "epoch": 3.5485327313769752, + "grad_norm": 13.324907302856445, + "learning_rate": 2.02305165505159e-06, + "lm_loss": 5.4095, + "loss": 1.3383, + "step": 1572, + "text_contrastive_loss": 0.7586, + "train_positive_log_prob": -80.7038, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.4185, + "epoch": 3.55079006772009, + "grad_norm": 13.187376022338867, + "learning_rate": 2.0172255256298623e-06, + "lm_loss": 5.4101, + "loss": 1.3616, + "step": 1573, + "text_contrastive_loss": 0.8042, + "train_positive_log_prob": -80.7716, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2939, + "epoch": 3.5530474040632054, + "grad_norm": 10.816732406616211, + "learning_rate": 2.0114056768537005e-06, + "lm_loss": 5.3044, + "loss": 1.1691, + "step": 1574, + "text_contrastive_loss": 0.6895, + "train_positive_log_prob": -79.7297, + "train_positive_token_accuracy": 0.0829, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3104, + "epoch": 3.5553047404063207, + "grad_norm": 12.659784317016602, + "learning_rate": 2.005592120977606e-06, + "lm_loss": 5.4125, + "loss": 1.2332, + "step": 1575, + "text_contrastive_loss": 0.7631, + "train_positive_log_prob": -78.757, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4129, + "epoch": 3.5575620767494356, + "grad_norm": 14.013603210449219, + "learning_rate": 1.9997848702428226e-06, + "lm_loss": 5.4054, + "loss": 1.4142, + "step": 1576, + "text_contrastive_loss": 0.9216, + "train_positive_log_prob": -80.5539, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4494, + "epoch": 3.559819413092551, + "grad_norm": 15.276517868041992, + "learning_rate": 1.9939839368773267e-06, + "lm_loss": 5.5981, + "loss": 1.4349, + "step": 1577, + "text_contrastive_loss": 0.8513, + "train_positive_log_prob": -82.2205, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.4807, + "epoch": 3.5620767494356658, + "grad_norm": 15.83739185333252, + "learning_rate": 1.9881893330957893e-06, + "lm_loss": 5.5022, + "loss": 1.5267, + "step": 1578, + "text_contrastive_loss": 0.9916, + "train_positive_log_prob": -81.4366, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.2512, + "epoch": 3.564334085778781, + "grad_norm": 11.277016639709473, + "learning_rate": 1.982401071099549e-06, + "lm_loss": 5.3861, + "loss": 1.2154, + "step": 1579, + "text_contrastive_loss": 0.8512, + "train_positive_log_prob": -82.5564, + "train_positive_token_accuracy": 0.0849, + "train_positive_token_prob": 0.0326 + }, + { + "contrastive_loss": 0.412, + "epoch": 3.5665914221218964, + "grad_norm": 13.39768123626709, + "learning_rate": 1.9766191630765964e-06, + "lm_loss": 5.5388, + "loss": 1.3725, + "step": 1580, + "text_contrastive_loss": 0.8132, + "train_positive_log_prob": -82.0002, + "train_positive_token_accuracy": 0.0718, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.2447, + "epoch": 3.5688487584650113, + "grad_norm": 9.737256050109863, + "learning_rate": 1.970843621201541e-06, + "lm_loss": 5.3961, + "loss": 1.1214, + "step": 1581, + "text_contrastive_loss": 0.6741, + "train_positive_log_prob": -79.0642, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3651, + "epoch": 3.5711060948081266, + "grad_norm": 12.136470794677734, + "learning_rate": 1.9650744576355894e-06, + "lm_loss": 5.4595, + "loss": 1.2708, + "step": 1582, + "text_contrastive_loss": 0.7195, + "train_positive_log_prob": -80.7617, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.4361, + "epoch": 3.5733634311512414, + "grad_norm": 13.926996231079102, + "learning_rate": 1.959311684526513e-06, + "lm_loss": 5.5117, + "loss": 1.3979, + "step": 1583, + "text_contrastive_loss": 0.8212, + "train_positive_log_prob": -84.4235, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3809, + "epoch": 3.5756207674943568, + "grad_norm": 14.882469177246094, + "learning_rate": 1.9535553140086322e-06, + "lm_loss": 5.3393, + "loss": 1.3332, + "step": 1584, + "text_contrastive_loss": 0.8366, + "train_positive_log_prob": -78.4825, + "train_positive_token_accuracy": 0.0856, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.375, + "epoch": 3.5778781038374716, + "grad_norm": 14.453103065490723, + "learning_rate": 1.9478053582027826e-06, + "lm_loss": 5.3759, + "loss": 1.3111, + "step": 1585, + "text_contrastive_loss": 0.7972, + "train_positive_log_prob": -78.0752, + "train_positive_token_accuracy": 0.0758, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4642, + "epoch": 3.580135440180587, + "grad_norm": 12.975154876708984, + "learning_rate": 1.9420618292162974e-06, + "lm_loss": 5.3147, + "loss": 1.4381, + "step": 1586, + "text_contrastive_loss": 0.8848, + "train_positive_log_prob": -79.8862, + "train_positive_token_accuracy": 0.0907, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.3517, + "epoch": 3.582392776523702, + "grad_norm": 13.040855407714844, + "learning_rate": 1.9363247391429695e-06, + "lm_loss": 5.4496, + "loss": 1.2954, + "step": 1587, + "text_contrastive_loss": 0.7975, + "train_positive_log_prob": -81.7, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3409, + "epoch": 3.584650112866817, + "grad_norm": 12.049163818359375, + "learning_rate": 1.93059410006304e-06, + "lm_loss": 5.479, + "loss": 1.2794, + "step": 1588, + "text_contrastive_loss": 0.7812, + "train_positive_log_prob": -82.9113, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3167, + "epoch": 3.5869074492099324, + "grad_norm": 11.480716705322266, + "learning_rate": 1.924869924043165e-06, + "lm_loss": 5.4008, + "loss": 1.2696, + "step": 1589, + "text_contrastive_loss": 0.8256, + "train_positive_log_prob": -80.4452, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3677, + "epoch": 3.5891647855530473, + "grad_norm": 12.872091293334961, + "learning_rate": 1.919152223136391e-06, + "lm_loss": 5.4399, + "loss": 1.3073, + "step": 1590, + "text_contrastive_loss": 0.7913, + "train_positive_log_prob": -80.8766, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3326, + "epoch": 3.5914221218961626, + "grad_norm": 14.663481712341309, + "learning_rate": 1.913441009382133e-06, + "lm_loss": 5.3591, + "loss": 1.27, + "step": 1591, + "text_contrastive_loss": 0.8029, + "train_positive_log_prob": -79.2333, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.033 + }, + { + "contrastive_loss": 0.3776, + "epoch": 3.5936794582392775, + "grad_norm": 13.156084060668945, + "learning_rate": 1.9077362948061404e-06, + "lm_loss": 5.3629, + "loss": 1.3204, + "step": 1592, + "text_contrastive_loss": 0.8129, + "train_positive_log_prob": -79.6901, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.5031, + "epoch": 3.595936794582393, + "grad_norm": 13.418434143066406, + "learning_rate": 1.902038091420481e-06, + "lm_loss": 5.4151, + "loss": 1.4281, + "step": 1593, + "text_contrastive_loss": 0.767, + "train_positive_log_prob": -79.2372, + "train_positive_token_accuracy": 0.0734, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3917, + "epoch": 3.598194130925508, + "grad_norm": 12.24238395690918, + "learning_rate": 1.8963464112235185e-06, + "lm_loss": 5.4585, + "loss": 1.3838, + "step": 1594, + "text_contrastive_loss": 0.8923, + "train_positive_log_prob": -79.5288, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4299, + "epoch": 3.600451467268623, + "grad_norm": 17.335559844970703, + "learning_rate": 1.8906612661998698e-06, + "lm_loss": 5.439, + "loss": 1.3305, + "step": 1595, + "text_contrastive_loss": 0.7134, + "train_positive_log_prob": -80.5463, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3163, + "epoch": 3.6027088036117383, + "grad_norm": 12.135326385498047, + "learning_rate": 1.884982668320398e-06, + "lm_loss": 5.4719, + "loss": 1.2287, + "step": 1596, + "text_contrastive_loss": 0.7305, + "train_positive_log_prob": -78.8079, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3358, + "epoch": 3.604966139954853, + "grad_norm": 12.836174011230469, + "learning_rate": 1.8793106295421797e-06, + "lm_loss": 5.3454, + "loss": 1.2056, + "step": 1597, + "text_contrastive_loss": 0.6706, + "train_positive_log_prob": -77.51, + "train_positive_token_accuracy": 0.0741, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3901, + "epoch": 3.6072234762979685, + "grad_norm": 13.927318572998047, + "learning_rate": 1.873645161808481e-06, + "lm_loss": 5.377, + "loss": 1.369, + "step": 1598, + "text_contrastive_loss": 0.8823, + "train_positive_log_prob": -78.495, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3517, + "epoch": 3.609480812641084, + "grad_norm": 11.994914054870605, + "learning_rate": 1.8679862770487273e-06, + "lm_loss": 5.3841, + "loss": 1.2821, + "step": 1599, + "text_contrastive_loss": 0.7839, + "train_positive_log_prob": -78.1345, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4044, + "epoch": 3.6117381489841986, + "grad_norm": 13.263752937316895, + "learning_rate": 1.8623339871784869e-06, + "lm_loss": 5.4197, + "loss": 1.3456, + "step": 1600, + "text_contrastive_loss": 0.7984, + "train_positive_log_prob": -82.4585, + "train_positive_token_accuracy": 0.0814, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.4253, + "epoch": 3.6139954853273135, + "grad_norm": 13.720125198364258, + "learning_rate": 1.8566883040994411e-06, + "lm_loss": 5.4787, + "loss": 1.418, + "step": 1601, + "text_contrastive_loss": 0.8896, + "train_positive_log_prob": -81.8773, + "train_positive_token_accuracy": 0.0883, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3145, + "epoch": 3.616252821670429, + "grad_norm": 11.541487693786621, + "learning_rate": 1.8510492396993595e-06, + "lm_loss": 5.4636, + "loss": 1.2331, + "step": 1602, + "text_contrastive_loss": 0.7443, + "train_positive_log_prob": -83.3814, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.407, + "epoch": 3.618510158013544, + "grad_norm": 13.385784149169922, + "learning_rate": 1.8454168058520732e-06, + "lm_loss": 5.5153, + "loss": 1.3968, + "step": 1603, + "text_contrastive_loss": 0.8767, + "train_positive_log_prob": -80.3584, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3372, + "epoch": 3.620767494356659, + "grad_norm": 12.2462797164917, + "learning_rate": 1.8397910144174536e-06, + "lm_loss": 5.327, + "loss": 1.2465, + "step": 1604, + "text_contrastive_loss": 0.7533, + "train_positive_log_prob": -78.8243, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3157, + "epoch": 3.6230248306997743, + "grad_norm": 11.935406684875488, + "learning_rate": 1.8341718772413852e-06, + "lm_loss": 5.3351, + "loss": 1.1653, + "step": 1605, + "text_contrastive_loss": 0.6322, + "train_positive_log_prob": -77.3802, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.4277, + "epoch": 3.625282167042889, + "grad_norm": 12.694575309753418, + "learning_rate": 1.8285594061557421e-06, + "lm_loss": 5.4119, + "loss": 1.3953, + "step": 1606, + "text_contrastive_loss": 0.8528, + "train_positive_log_prob": -80.9231, + "train_positive_token_accuracy": 0.0731, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.349, + "epoch": 3.6275395033860045, + "grad_norm": 13.389094352722168, + "learning_rate": 1.822953612978362e-06, + "lm_loss": 5.5018, + "loss": 1.2744, + "step": 1607, + "text_contrastive_loss": 0.7506, + "train_positive_log_prob": -83.6408, + "train_positive_token_accuracy": 0.0776, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3948, + "epoch": 3.62979683972912, + "grad_norm": 12.698433876037598, + "learning_rate": 1.817354509513017e-06, + "lm_loss": 5.3521, + "loss": 1.3284, + "step": 1608, + "text_contrastive_loss": 0.7967, + "train_positive_log_prob": -79.7998, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.331, + "epoch": 3.6320541760722347, + "grad_norm": 12.244834899902344, + "learning_rate": 1.8117621075493979e-06, + "lm_loss": 5.3925, + "loss": 1.2298, + "step": 1609, + "text_contrastive_loss": 0.7191, + "train_positive_log_prob": -80.951, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3605, + "epoch": 3.63431151241535, + "grad_norm": 11.774707794189453, + "learning_rate": 1.8061764188630831e-06, + "lm_loss": 5.3916, + "loss": 1.3264, + "step": 1610, + "text_contrastive_loss": 0.8536, + "train_positive_log_prob": -79.5087, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2775, + "epoch": 3.636568848758465, + "grad_norm": 12.249534606933594, + "learning_rate": 1.8005974552155158e-06, + "lm_loss": 5.4907, + "loss": 1.149, + "step": 1611, + "text_contrastive_loss": 0.645, + "train_positive_log_prob": -81.911, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3944, + "epoch": 3.63882618510158, + "grad_norm": 12.408869743347168, + "learning_rate": 1.7950252283539776e-06, + "lm_loss": 5.5657, + "loss": 1.3845, + "step": 1612, + "text_contrastive_loss": 0.867, + "train_positive_log_prob": -83.203, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3244, + "epoch": 3.6410835214446955, + "grad_norm": 12.736432075500488, + "learning_rate": 1.7894597500115657e-06, + "lm_loss": 5.4443, + "loss": 1.2986, + "step": 1613, + "text_contrastive_loss": 0.8596, + "train_positive_log_prob": -80.8564, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3955, + "epoch": 3.6433408577878104, + "grad_norm": 13.778947830200195, + "learning_rate": 1.7839010319071687e-06, + "lm_loss": 5.384, + "loss": 1.3737, + "step": 1614, + "text_contrastive_loss": 0.8796, + "train_positive_log_prob": -79.7498, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3416, + "epoch": 3.6455981941309257, + "grad_norm": 13.371996879577637, + "learning_rate": 1.7783490857454354e-06, + "lm_loss": 5.5404, + "loss": 1.3123, + "step": 1615, + "text_contrastive_loss": 0.8333, + "train_positive_log_prob": -81.5294, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3193, + "epoch": 3.6478555304740405, + "grad_norm": 11.556200981140137, + "learning_rate": 1.7728039232167603e-06, + "lm_loss": 5.4475, + "loss": 1.2263, + "step": 1616, + "text_contrastive_loss": 0.7244, + "train_positive_log_prob": -80.6255, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.3433, + "epoch": 3.650112866817156, + "grad_norm": 12.634450912475586, + "learning_rate": 1.7672655559972535e-06, + "lm_loss": 5.3203, + "loss": 1.2808, + "step": 1617, + "text_contrastive_loss": 0.811, + "train_positive_log_prob": -78.2828, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.2259, + "epoch": 3.6523702031602707, + "grad_norm": 10.944430351257324, + "learning_rate": 1.7617339957487167e-06, + "lm_loss": 5.4636, + "loss": 1.1102, + "step": 1618, + "text_contrastive_loss": 0.6759, + "train_positive_log_prob": -82.0802, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3135, + "epoch": 3.654627539503386, + "grad_norm": 12.742888450622559, + "learning_rate": 1.7562092541186144e-06, + "lm_loss": 5.4843, + "loss": 1.2122, + "step": 1619, + "text_contrastive_loss": 0.7005, + "train_positive_log_prob": -81.9652, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3481, + "epoch": 3.656884875846501, + "grad_norm": 12.079643249511719, + "learning_rate": 1.750691342740058e-06, + "lm_loss": 5.403, + "loss": 1.2652, + "step": 1620, + "text_contrastive_loss": 0.7535, + "train_positive_log_prob": -78.5967, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.4107, + "epoch": 3.659142212189616, + "grad_norm": 13.736719131469727, + "learning_rate": 1.7451802732317763e-06, + "lm_loss": 5.4379, + "loss": 1.3022, + "step": 1621, + "text_contrastive_loss": 0.6954, + "train_positive_log_prob": -82.3723, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.2613, + "epoch": 3.6613995485327315, + "grad_norm": 12.820518493652344, + "learning_rate": 1.7396760571980902e-06, + "lm_loss": 5.3914, + "loss": 1.1312, + "step": 1622, + "text_contrastive_loss": 0.6615, + "train_positive_log_prob": -80.364, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.325, + "epoch": 3.6636568848758464, + "grad_norm": 12.226383209228516, + "learning_rate": 1.7341787062288928e-06, + "lm_loss": 5.4771, + "loss": 1.222, + "step": 1623, + "text_contrastive_loss": 0.6986, + "train_positive_log_prob": -81.7253, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3679, + "epoch": 3.6659142212189617, + "grad_norm": 12.769431114196777, + "learning_rate": 1.7286882318996162e-06, + "lm_loss": 5.4515, + "loss": 1.2986, + "step": 1624, + "text_contrastive_loss": 0.7711, + "train_positive_log_prob": -80.3, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3603, + "epoch": 3.6681715575620766, + "grad_norm": 13.161138534545898, + "learning_rate": 1.7232046457712164e-06, + "lm_loss": 5.3942, + "loss": 1.251, + "step": 1625, + "text_contrastive_loss": 0.7026, + "train_positive_log_prob": -79.2056, + "train_positive_token_accuracy": 0.0873, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.5014, + "epoch": 3.670428893905192, + "grad_norm": 14.961957931518555, + "learning_rate": 1.7177279593901463e-06, + "lm_loss": 5.4697, + "loss": 1.4832, + "step": 1626, + "text_contrastive_loss": 0.8697, + "train_positive_log_prob": -82.9641, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3658, + "epoch": 3.672686230248307, + "grad_norm": 12.485333442687988, + "learning_rate": 1.712258184288328e-06, + "lm_loss": 5.4463, + "loss": 1.3425, + "step": 1627, + "text_contrastive_loss": 0.8642, + "train_positive_log_prob": -80.5674, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3963, + "epoch": 3.674943566591422, + "grad_norm": 13.958459854125977, + "learning_rate": 1.7067953319831327e-06, + "lm_loss": 5.2874, + "loss": 1.3539, + "step": 1628, + "text_contrastive_loss": 0.8578, + "train_positive_log_prob": -77.41, + "train_positive_token_accuracy": 0.0776, + "train_positive_token_prob": 0.0328 + }, + { + "contrastive_loss": 0.4493, + "epoch": 3.6772009029345374, + "grad_norm": 14.333114624023438, + "learning_rate": 1.7013394139773537e-06, + "lm_loss": 5.45, + "loss": 1.4528, + "step": 1629, + "text_contrastive_loss": 0.917, + "train_positive_log_prob": -80.5564, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3345, + "epoch": 3.6794582392776523, + "grad_norm": 13.485366821289062, + "learning_rate": 1.6958904417591853e-06, + "lm_loss": 5.5247, + "loss": 1.3064, + "step": 1630, + "text_contrastive_loss": 0.8389, + "train_positive_log_prob": -83.6235, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.403, + "epoch": 3.6817155756207676, + "grad_norm": 15.880334854125977, + "learning_rate": 1.6904484268021915e-06, + "lm_loss": 5.4342, + "loss": 1.3695, + "step": 1631, + "text_contrastive_loss": 0.8461, + "train_positive_log_prob": -82.2962, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3287, + "epoch": 3.683972911963883, + "grad_norm": 12.249526977539062, + "learning_rate": 1.6850133805652907e-06, + "lm_loss": 5.5216, + "loss": 1.2978, + "step": 1632, + "text_contrastive_loss": 0.834, + "train_positive_log_prob": -81.225, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3059, + "epoch": 3.6862302483069977, + "grad_norm": 12.724023818969727, + "learning_rate": 1.6795853144927282e-06, + "lm_loss": 5.5501, + "loss": 1.2597, + "step": 1633, + "text_contrastive_loss": 0.7976, + "train_positive_log_prob": -82.67, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.286, + "epoch": 3.6884875846501126, + "grad_norm": 10.801351547241211, + "learning_rate": 1.6741642400140513e-06, + "lm_loss": 5.4213, + "loss": 1.1818, + "step": 1634, + "text_contrastive_loss": 0.7074, + "train_positive_log_prob": -81.7, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2804, + "epoch": 3.690744920993228, + "grad_norm": 11.62018871307373, + "learning_rate": 1.668750168544081e-06, + "lm_loss": 5.4677, + "loss": 1.2152, + "step": 1635, + "text_contrastive_loss": 0.7761, + "train_positive_log_prob": -80.1569, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.2794, + "epoch": 3.6930022573363432, + "grad_norm": 12.390840530395508, + "learning_rate": 1.663343111482898e-06, + "lm_loss": 5.3312, + "loss": 1.1892, + "step": 1636, + "text_contrastive_loss": 0.7534, + "train_positive_log_prob": -77.3641, + "train_positive_token_accuracy": 0.0824, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.4025, + "epoch": 3.695259593679458, + "grad_norm": 15.107317924499512, + "learning_rate": 1.657943080215812e-06, + "lm_loss": 5.3958, + "loss": 1.3531, + "step": 1637, + "text_contrastive_loss": 0.8219, + "train_positive_log_prob": -80.1128, + "train_positive_token_accuracy": 0.0845, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.2976, + "epoch": 3.6975169300225734, + "grad_norm": 11.038142204284668, + "learning_rate": 1.6525500861133386e-06, + "lm_loss": 5.5744, + "loss": 1.2172, + "step": 1638, + "text_contrastive_loss": 0.7244, + "train_positive_log_prob": -81.7425, + "train_positive_token_accuracy": 0.073, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.3574, + "epoch": 3.6997742663656883, + "grad_norm": 12.428092002868652, + "learning_rate": 1.6471641405311727e-06, + "lm_loss": 5.3017, + "loss": 1.2785, + "step": 1639, + "text_contrastive_loss": 0.7819, + "train_positive_log_prob": -78.9227, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3725, + "epoch": 3.7020316027088036, + "grad_norm": 13.82121467590332, + "learning_rate": 1.641785254810172e-06, + "lm_loss": 5.4508, + "loss": 1.3721, + "step": 1640, + "text_contrastive_loss": 0.9091, + "train_positive_log_prob": -82.6126, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3557, + "epoch": 3.704288939051919, + "grad_norm": 12.76018238067627, + "learning_rate": 1.636413440276326e-06, + "lm_loss": 5.3938, + "loss": 1.3376, + "step": 1641, + "text_contrastive_loss": 0.885, + "train_positive_log_prob": -82.3776, + "train_positive_token_accuracy": 0.0867, + "train_positive_token_prob": 0.0326 + }, + { + "contrastive_loss": 0.3327, + "epoch": 3.706546275395034, + "grad_norm": 10.917482376098633, + "learning_rate": 1.631048708240736e-06, + "lm_loss": 5.6108, + "loss": 1.276, + "step": 1642, + "text_contrastive_loss": 0.7644, + "train_positive_log_prob": -83.7882, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.3644, + "epoch": 3.708803611738149, + "grad_norm": 12.80161190032959, + "learning_rate": 1.6256910699995921e-06, + "lm_loss": 5.394, + "loss": 1.2603, + "step": 1643, + "text_contrastive_loss": 0.7129, + "train_positive_log_prob": -79.7082, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.2731, + "epoch": 3.711060948081264, + "grad_norm": 11.543538093566895, + "learning_rate": 1.620340536834139e-06, + "lm_loss": 5.4571, + "loss": 1.1743, + "step": 1644, + "text_contrastive_loss": 0.7109, + "train_positive_log_prob": -80.4701, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3483, + "epoch": 3.7133182844243793, + "grad_norm": 13.757126808166504, + "learning_rate": 1.6149971200106723e-06, + "lm_loss": 5.4001, + "loss": 1.3184, + "step": 1645, + "text_contrastive_loss": 0.8602, + "train_positive_log_prob": -79.4562, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3259, + "epoch": 3.7155756207674946, + "grad_norm": 12.917257308959961, + "learning_rate": 1.6096608307804973e-06, + "lm_loss": 5.3973, + "loss": 1.2203, + "step": 1646, + "text_contrastive_loss": 0.7093, + "train_positive_log_prob": -78.5139, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3082, + "epoch": 3.7178329571106095, + "grad_norm": 10.721746444702148, + "learning_rate": 1.604331680379908e-06, + "lm_loss": 5.4802, + "loss": 1.1649, + "step": 1647, + "text_contrastive_loss": 0.6173, + "train_positive_log_prob": -82.309, + "train_positive_token_accuracy": 0.0679, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.2929, + "epoch": 3.7200902934537243, + "grad_norm": 12.32038688659668, + "learning_rate": 1.599009680030173e-06, + "lm_loss": 5.5861, + "loss": 1.2878, + "step": 1648, + "text_contrastive_loss": 0.8726, + "train_positive_log_prob": -83.984, + "train_positive_token_accuracy": 0.0738, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.3588, + "epoch": 3.7223476297968396, + "grad_norm": 13.859363555908203, + "learning_rate": 1.5936948409375007e-06, + "lm_loss": 5.4422, + "loss": 1.3496, + "step": 1649, + "text_contrastive_loss": 0.8933, + "train_positive_log_prob": -81.105, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.4109, + "epoch": 3.724604966139955, + "grad_norm": 14.201754570007324, + "learning_rate": 1.5883871742930257e-06, + "lm_loss": 5.4932, + "loss": 1.3261, + "step": 1650, + "text_contrastive_loss": 0.7318, + "train_positive_log_prob": -81.5419, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3265, + "epoch": 3.72686230248307, + "grad_norm": 11.490397453308105, + "learning_rate": 1.5830866912727722e-06, + "lm_loss": 5.4679, + "loss": 1.2512, + "step": 1651, + "text_contrastive_loss": 0.7559, + "train_positive_log_prob": -81.9596, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3278, + "epoch": 3.729119638826185, + "grad_norm": 13.056407928466797, + "learning_rate": 1.5777934030376445e-06, + "lm_loss": 5.4045, + "loss": 1.2547, + "step": 1652, + "text_contrastive_loss": 0.7728, + "train_positive_log_prob": -81.4676, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.2913, + "epoch": 3.7313769751693, + "grad_norm": 12.370354652404785, + "learning_rate": 1.5725073207333963e-06, + "lm_loss": 5.3833, + "loss": 1.2204, + "step": 1653, + "text_contrastive_loss": 0.7817, + "train_positive_log_prob": -82.7247, + "train_positive_token_accuracy": 0.0826, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.4257, + "epoch": 3.7336343115124153, + "grad_norm": 16.134822845458984, + "learning_rate": 1.5672284554906087e-06, + "lm_loss": 5.4242, + "loss": 1.3022, + "step": 1654, + "text_contrastive_loss": 0.668, + "train_positive_log_prob": -80.3469, + "train_positive_token_accuracy": 0.0712, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.4168, + "epoch": 3.7358916478555306, + "grad_norm": 13.646318435668945, + "learning_rate": 1.561956818424661e-06, + "lm_loss": 5.4048, + "loss": 1.2874, + "step": 1655, + "text_contrastive_loss": 0.6602, + "train_positive_log_prob": -79.4267, + "train_positive_token_accuracy": 0.0869, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.3256, + "epoch": 3.7381489841986455, + "grad_norm": 13.392562866210938, + "learning_rate": 1.5566924206357187e-06, + "lm_loss": 5.4989, + "loss": 1.1982, + "step": 1656, + "text_contrastive_loss": 0.6454, + "train_positive_log_prob": -81.1326, + "train_positive_token_accuracy": 0.0694, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3326, + "epoch": 3.740406320541761, + "grad_norm": 12.870278358459473, + "learning_rate": 1.5514352732087024e-06, + "lm_loss": 5.4017, + "loss": 1.1909, + "step": 1657, + "text_contrastive_loss": 0.6362, + "train_positive_log_prob": -80.5146, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.4004, + "epoch": 3.7426636568848757, + "grad_norm": 13.213074684143066, + "learning_rate": 1.5461853872132648e-06, + "lm_loss": 5.3041, + "loss": 1.3485, + "step": 1658, + "text_contrastive_loss": 0.8353, + "train_positive_log_prob": -78.4033, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3405, + "epoch": 3.744920993227991, + "grad_norm": 11.495987892150879, + "learning_rate": 1.5409427737037713e-06, + "lm_loss": 5.3369, + "loss": 1.2776, + "step": 1659, + "text_contrastive_loss": 0.8068, + "train_positive_log_prob": -79.4514, + "train_positive_token_accuracy": 0.0724, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3253, + "epoch": 3.7471783295711063, + "grad_norm": 12.667852401733398, + "learning_rate": 1.5357074437192688e-06, + "lm_loss": 5.4119, + "loss": 1.2381, + "step": 1660, + "text_contrastive_loss": 0.7433, + "train_positive_log_prob": -78.9428, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3245, + "epoch": 3.749435665914221, + "grad_norm": 12.08480453491211, + "learning_rate": 1.5304794082834713e-06, + "lm_loss": 5.4983, + "loss": 1.2549, + "step": 1661, + "text_contrastive_loss": 0.7612, + "train_positive_log_prob": -82.1116, + "train_positive_token_accuracy": 0.0853, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3249, + "epoch": 3.7516930022573365, + "grad_norm": 12.697017669677734, + "learning_rate": 1.5252586784047374e-06, + "lm_loss": 5.5411, + "loss": 1.2985, + "step": 1662, + "text_contrastive_loss": 0.8389, + "train_positive_log_prob": -81.5998, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3132, + "epoch": 3.7539503386004514, + "grad_norm": 13.437516212463379, + "learning_rate": 1.520045265076034e-06, + "lm_loss": 5.4348, + "loss": 1.1751, + "step": 1663, + "text_contrastive_loss": 0.6367, + "train_positive_log_prob": -80.0555, + "train_positive_token_accuracy": 0.0875, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3964, + "epoch": 3.7562076749435667, + "grad_norm": 13.98206615447998, + "learning_rate": 1.5148391792749272e-06, + "lm_loss": 5.4066, + "loss": 1.3513, + "step": 1664, + "text_contrastive_loss": 0.8285, + "train_positive_log_prob": -78.8201, + "train_positive_token_accuracy": 0.0718, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3845, + "epoch": 3.758465011286682, + "grad_norm": 14.41606616973877, + "learning_rate": 1.5096404319635533e-06, + "lm_loss": 5.3016, + "loss": 1.3042, + "step": 1665, + "text_contrastive_loss": 0.779, + "train_positive_log_prob": -77.2906, + "train_positive_token_accuracy": 0.0758, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.2496, + "epoch": 3.760722347629797, + "grad_norm": 11.555524826049805, + "learning_rate": 1.5044490340885987e-06, + "lm_loss": 5.3767, + "loss": 1.0767, + "step": 1666, + "text_contrastive_loss": 0.5788, + "train_positive_log_prob": -79.9972, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4592, + "epoch": 3.7629796839729117, + "grad_norm": 15.030644416809082, + "learning_rate": 1.4992649965812673e-06, + "lm_loss": 5.3628, + "loss": 1.5019, + "step": 1667, + "text_contrastive_loss": 1.0127, + "train_positive_log_prob": -76.4015, + "train_positive_token_accuracy": 0.0839, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3516, + "epoch": 3.765237020316027, + "grad_norm": 10.679621696472168, + "learning_rate": 1.4940883303572724e-06, + "lm_loss": 5.3893, + "loss": 1.2231, + "step": 1668, + "text_contrastive_loss": 0.6652, + "train_positive_log_prob": -80.9434, + "train_positive_token_accuracy": 0.0741, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.4155, + "epoch": 3.7674943566591423, + "grad_norm": 13.80992603302002, + "learning_rate": 1.4889190463168019e-06, + "lm_loss": 5.358, + "loss": 1.3275, + "step": 1669, + "text_contrastive_loss": 0.7525, + "train_positive_log_prob": -80.1424, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3345, + "epoch": 3.769751693002257, + "grad_norm": 12.975834846496582, + "learning_rate": 1.483757155344503e-06, + "lm_loss": 5.4797, + "loss": 1.2998, + "step": 1670, + "text_contrastive_loss": 0.8346, + "train_positive_log_prob": -80.4404, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3487, + "epoch": 3.7720090293453725, + "grad_norm": 12.123541831970215, + "learning_rate": 1.47860266830945e-06, + "lm_loss": 5.4339, + "loss": 1.265, + "step": 1671, + "text_contrastive_loss": 0.7458, + "train_positive_log_prob": -80.9159, + "train_positive_token_accuracy": 0.0842, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.2, + "epoch": 3.7742663656884874, + "grad_norm": 9.676545143127441, + "learning_rate": 1.473455596065133e-06, + "lm_loss": 5.4369, + "loss": 1.0842, + "step": 1672, + "text_contrastive_loss": 0.6809, + "train_positive_log_prob": -81.0627, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3387, + "epoch": 3.7765237020316027, + "grad_norm": 13.159178733825684, + "learning_rate": 1.4683159494494259e-06, + "lm_loss": 5.4352, + "loss": 1.2632, + "step": 1673, + "text_contrastive_loss": 0.7621, + "train_positive_log_prob": -79.5132, + "train_positive_token_accuracy": 0.0828, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.4549, + "epoch": 3.778781038374718, + "grad_norm": 13.989200592041016, + "learning_rate": 1.4631837392845694e-06, + "lm_loss": 5.4304, + "loss": 1.4196, + "step": 1674, + "text_contrastive_loss": 0.8434, + "train_positive_log_prob": -81.7784, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3147, + "epoch": 3.781038374717833, + "grad_norm": 11.835813522338867, + "learning_rate": 1.4580589763771413e-06, + "lm_loss": 5.4121, + "loss": 1.2553, + "step": 1675, + "text_contrastive_loss": 0.7987, + "train_positive_log_prob": -80.145, + "train_positive_token_accuracy": 0.0826, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3664, + "epoch": 3.783295711060948, + "grad_norm": 12.686626434326172, + "learning_rate": 1.4529416715180434e-06, + "lm_loss": 5.382, + "loss": 1.2769, + "step": 1676, + "text_contrastive_loss": 0.7445, + "train_positive_log_prob": -80.3908, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.2749, + "epoch": 3.785553047404063, + "grad_norm": 11.486528396606445, + "learning_rate": 1.44783183548247e-06, + "lm_loss": 5.4662, + "loss": 1.1844, + "step": 1677, + "text_contrastive_loss": 0.7256, + "train_positive_log_prob": -81.1188, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.2536, + "epoch": 3.7878103837471784, + "grad_norm": 12.280549049377441, + "learning_rate": 1.4427294790298902e-06, + "lm_loss": 5.4331, + "loss": 1.1063, + "step": 1678, + "text_contrastive_loss": 0.6188, + "train_positive_log_prob": -80.4998, + "train_positive_token_accuracy": 0.0825, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3336, + "epoch": 3.7900677200902937, + "grad_norm": 12.250114440917969, + "learning_rate": 1.4376346129040243e-06, + "lm_loss": 5.4719, + "loss": 1.2451, + "step": 1679, + "text_contrastive_loss": 0.7285, + "train_positive_log_prob": -79.6631, + "train_positive_token_accuracy": 0.0722, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.3097, + "epoch": 3.7923250564334086, + "grad_norm": 12.244065284729004, + "learning_rate": 1.432547247832819e-06, + "lm_loss": 5.4479, + "loss": 1.245, + "step": 1680, + "text_contrastive_loss": 0.781, + "train_positive_log_prob": -80.0105, + "train_positive_token_accuracy": 0.0665, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.348, + "epoch": 3.7945823927765234, + "grad_norm": 13.669023513793945, + "learning_rate": 1.4274673945284278e-06, + "lm_loss": 5.3841, + "loss": 1.2095, + "step": 1681, + "text_contrastive_loss": 0.6462, + "train_positive_log_prob": -80.5982, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.3835, + "epoch": 3.7968397291196387, + "grad_norm": 13.256448745727539, + "learning_rate": 1.422395063687188e-06, + "lm_loss": 5.3715, + "loss": 1.2844, + "step": 1682, + "text_contrastive_loss": 0.7275, + "train_positive_log_prob": -79.6479, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.4086, + "epoch": 3.799097065462754, + "grad_norm": 16.562536239624023, + "learning_rate": 1.4173302659895938e-06, + "lm_loss": 5.4766, + "loss": 1.3018, + "step": 1683, + "text_contrastive_loss": 0.6911, + "train_positive_log_prob": -80.6275, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3863, + "epoch": 3.801354401805869, + "grad_norm": 13.026150703430176, + "learning_rate": 1.4122730121002808e-06, + "lm_loss": 5.4614, + "loss": 1.308, + "step": 1684, + "text_contrastive_loss": 0.7512, + "train_positive_log_prob": -81.6249, + "train_positive_token_accuracy": 0.0902, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.2908, + "epoch": 3.8036117381489842, + "grad_norm": 13.314957618713379, + "learning_rate": 1.4072233126679985e-06, + "lm_loss": 5.5148, + "loss": 1.1472, + "step": 1685, + "text_contrastive_loss": 0.6098, + "train_positive_log_prob": -81.8378, + "train_positive_token_accuracy": 0.0887, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3242, + "epoch": 3.805869074492099, + "grad_norm": 12.59289836883545, + "learning_rate": 1.4021811783255912e-06, + "lm_loss": 5.5335, + "loss": 1.2697, + "step": 1686, + "text_contrastive_loss": 0.7841, + "train_positive_log_prob": -80.8176, + "train_positive_token_accuracy": 0.0735, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.4031, + "epoch": 3.8081264108352144, + "grad_norm": 14.396559715270996, + "learning_rate": 1.3971466196899697e-06, + "lm_loss": 5.5835, + "loss": 1.3417, + "step": 1687, + "text_contrastive_loss": 0.7605, + "train_positive_log_prob": -81.4886, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3634, + "epoch": 3.8103837471783297, + "grad_norm": 12.683608055114746, + "learning_rate": 1.3921196473620975e-06, + "lm_loss": 5.4633, + "loss": 1.2996, + "step": 1688, + "text_contrastive_loss": 0.7798, + "train_positive_log_prob": -80.5414, + "train_positive_token_accuracy": 0.0835, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.2969, + "epoch": 3.8126410835214446, + "grad_norm": 12.5073881149292, + "learning_rate": 1.3871002719269616e-06, + "lm_loss": 5.4707, + "loss": 1.2562, + "step": 1689, + "text_contrastive_loss": 0.8245, + "train_positive_log_prob": -79.1098, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3038, + "epoch": 3.81489841986456, + "grad_norm": 13.346036911010742, + "learning_rate": 1.3820885039535564e-06, + "lm_loss": 5.3716, + "loss": 1.1923, + "step": 1690, + "text_contrastive_loss": 0.7027, + "train_positive_log_prob": -79.7769, + "train_positive_token_accuracy": 0.0842, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.3562, + "epoch": 3.8171557562076748, + "grad_norm": 13.995391845703125, + "learning_rate": 1.3770843539948508e-06, + "lm_loss": 5.5332, + "loss": 1.2854, + "step": 1691, + "text_contrastive_loss": 0.7517, + "train_positive_log_prob": -81.3979, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3775, + "epoch": 3.81941309255079, + "grad_norm": 13.737689971923828, + "learning_rate": 1.3720878325877785e-06, + "lm_loss": 5.402, + "loss": 1.3134, + "step": 1692, + "text_contrastive_loss": 0.7915, + "train_positive_log_prob": -79.0878, + "train_positive_token_accuracy": 0.083, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3243, + "epoch": 3.8216704288939054, + "grad_norm": 12.652764320373535, + "learning_rate": 1.3670989502532089e-06, + "lm_loss": 5.399, + "loss": 1.3312, + "step": 1693, + "text_contrastive_loss": 0.9341, + "train_positive_log_prob": -81.0269, + "train_positive_token_accuracy": 0.0843, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.3374, + "epoch": 3.8239277652370203, + "grad_norm": 12.762588500976562, + "learning_rate": 1.362117717495926e-06, + "lm_loss": 5.3973, + "loss": 1.3003, + "step": 1694, + "text_contrastive_loss": 0.8463, + "train_positive_log_prob": -78.681, + "train_positive_token_accuracy": 0.0748, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3196, + "epoch": 3.8261851015801356, + "grad_norm": 12.742837905883789, + "learning_rate": 1.3571441448046086e-06, + "lm_loss": 5.4695, + "loss": 1.2328, + "step": 1695, + "text_contrastive_loss": 0.7326, + "train_positive_log_prob": -81.3418, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3593, + "epoch": 3.8284424379232505, + "grad_norm": 12.992274284362793, + "learning_rate": 1.3521782426517988e-06, + "lm_loss": 5.4408, + "loss": 1.3324, + "step": 1696, + "text_contrastive_loss": 0.858, + "train_positive_log_prob": -79.8891, + "train_positive_token_accuracy": 0.0697, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3565, + "epoch": 3.8306997742663658, + "grad_norm": 12.198596000671387, + "learning_rate": 1.3472200214938974e-06, + "lm_loss": 5.5585, + "loss": 1.2282, + "step": 1697, + "text_contrastive_loss": 0.6319, + "train_positive_log_prob": -84.5013, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.4224, + "epoch": 3.832957110609481, + "grad_norm": 13.373780250549316, + "learning_rate": 1.3422694917711276e-06, + "lm_loss": 5.4182, + "loss": 1.386, + "step": 1698, + "text_contrastive_loss": 0.8436, + "train_positive_log_prob": -79.1767, + "train_positive_token_accuracy": 0.0727, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3688, + "epoch": 3.835214446952596, + "grad_norm": 13.223564147949219, + "learning_rate": 1.3373266639075134e-06, + "lm_loss": 5.389, + "loss": 1.2751, + "step": 1699, + "text_contrastive_loss": 0.7349, + "train_positive_log_prob": -81.1676, + "train_positive_token_accuracy": 0.0858, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.361, + "epoch": 3.837471783295711, + "grad_norm": 13.410101890563965, + "learning_rate": 1.3323915483108662e-06, + "lm_loss": 5.3996, + "loss": 1.2681, + "step": 1700, + "text_contrastive_loss": 0.7343, + "train_positive_log_prob": -77.0073, + "train_positive_token_accuracy": 0.0707, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3976, + "epoch": 3.839729119638826, + "grad_norm": 12.529494285583496, + "learning_rate": 1.3274641553727568e-06, + "lm_loss": 5.4367, + "loss": 1.3524, + "step": 1701, + "text_contrastive_loss": 0.8224, + "train_positive_log_prob": -80.466, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4066, + "epoch": 3.8419864559819414, + "grad_norm": 12.37010669708252, + "learning_rate": 1.3225444954684962e-06, + "lm_loss": 5.3219, + "loss": 1.3199, + "step": 1702, + "text_contrastive_loss": 0.7624, + "train_positive_log_prob": -79.0784, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3477, + "epoch": 3.8442437923250563, + "grad_norm": 14.329082489013672, + "learning_rate": 1.3176325789571075e-06, + "lm_loss": 5.474, + "loss": 1.335, + "step": 1703, + "text_contrastive_loss": 0.8799, + "train_positive_log_prob": -80.7387, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3133, + "epoch": 3.8465011286681716, + "grad_norm": 11.631160736083984, + "learning_rate": 1.3127284161813153e-06, + "lm_loss": 5.4866, + "loss": 1.2114, + "step": 1704, + "text_contrastive_loss": 0.6987, + "train_positive_log_prob": -80.584, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.3948, + "epoch": 3.8487584650112865, + "grad_norm": 14.525671005249023, + "learning_rate": 1.3078320174675141e-06, + "lm_loss": 5.489, + "loss": 1.3269, + "step": 1705, + "text_contrastive_loss": 0.7663, + "train_positive_log_prob": -82.616, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3507, + "epoch": 3.851015801354402, + "grad_norm": 13.05989933013916, + "learning_rate": 1.3029433931257524e-06, + "lm_loss": 5.474, + "loss": 1.2238, + "step": 1706, + "text_contrastive_loss": 0.6515, + "train_positive_log_prob": -80.9346, + "train_positive_token_accuracy": 0.075, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3482, + "epoch": 3.853273137697517, + "grad_norm": 12.000130653381348, + "learning_rate": 1.2980625534497037e-06, + "lm_loss": 5.3701, + "loss": 1.3341, + "step": 1707, + "text_contrastive_loss": 0.8977, + "train_positive_log_prob": -78.8742, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3255, + "epoch": 3.855530474040632, + "grad_norm": 13.143017768859863, + "learning_rate": 1.2931895087166551e-06, + "lm_loss": 5.4801, + "loss": 1.2567, + "step": 1708, + "text_contrastive_loss": 0.7664, + "train_positive_log_prob": -79.5886, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.2985, + "epoch": 3.8577878103837473, + "grad_norm": 11.157387733459473, + "learning_rate": 1.2883242691874792e-06, + "lm_loss": 5.4797, + "loss": 1.1744, + "step": 1709, + "text_contrastive_loss": 0.6559, + "train_positive_log_prob": -82.1108, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3396, + "epoch": 3.860045146726862, + "grad_norm": 12.673001289367676, + "learning_rate": 1.2834668451066118e-06, + "lm_loss": 5.376, + "loss": 1.2906, + "step": 1710, + "text_contrastive_loss": 0.8266, + "train_positive_log_prob": -82.4257, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3443, + "epoch": 3.8623024830699775, + "grad_norm": 12.394901275634766, + "learning_rate": 1.2786172467020357e-06, + "lm_loss": 5.377, + "loss": 1.2955, + "step": 1711, + "text_contrastive_loss": 0.827, + "train_positive_log_prob": -79.5028, + "train_positive_token_accuracy": 0.0848, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4027, + "epoch": 3.864559819413093, + "grad_norm": 12.732975959777832, + "learning_rate": 1.2737754841852501e-06, + "lm_loss": 5.4111, + "loss": 1.314, + "step": 1712, + "text_contrastive_loss": 0.7404, + "train_positive_log_prob": -80.7026, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2839, + "epoch": 3.8668171557562077, + "grad_norm": 10.664950370788574, + "learning_rate": 1.2689415677512574e-06, + "lm_loss": 5.3887, + "loss": 1.2135, + "step": 1713, + "text_contrastive_loss": 0.7815, + "train_positive_log_prob": -79.5008, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3513, + "epoch": 3.8690744920993225, + "grad_norm": 12.026086807250977, + "learning_rate": 1.2641155075785444e-06, + "lm_loss": 5.43, + "loss": 1.3521, + "step": 1714, + "text_contrastive_loss": 0.9156, + "train_positive_log_prob": -81.4942, + "train_positive_token_accuracy": 0.0776, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3855, + "epoch": 3.871331828442438, + "grad_norm": 13.175518035888672, + "learning_rate": 1.259297313829046e-06, + "lm_loss": 5.325, + "loss": 1.2801, + "step": 1715, + "text_contrastive_loss": 0.7242, + "train_positive_log_prob": -78.0227, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3928, + "epoch": 3.873589164785553, + "grad_norm": 12.77806282043457, + "learning_rate": 1.2544869966481389e-06, + "lm_loss": 5.5093, + "loss": 1.2807, + "step": 1716, + "text_contrastive_loss": 0.6739, + "train_positive_log_prob": -82.2105, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3112, + "epoch": 3.875846501128668, + "grad_norm": 11.70065689086914, + "learning_rate": 1.249684566164614e-06, + "lm_loss": 5.3924, + "loss": 1.2484, + "step": 1717, + "text_contrastive_loss": 0.7959, + "train_positive_log_prob": -78.6531, + "train_positive_token_accuracy": 0.0825, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.4424, + "epoch": 3.8781038374717833, + "grad_norm": 13.04219913482666, + "learning_rate": 1.2448900324906559e-06, + "lm_loss": 5.3418, + "loss": 1.3493, + "step": 1718, + "text_contrastive_loss": 0.7456, + "train_positive_log_prob": -78.772, + "train_positive_token_accuracy": 0.0861, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.347, + "epoch": 3.880361173814898, + "grad_norm": 13.512873649597168, + "learning_rate": 1.2401034057218181e-06, + "lm_loss": 5.337, + "loss": 1.2563, + "step": 1719, + "text_contrastive_loss": 0.7512, + "train_positive_log_prob": -77.1608, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3697, + "epoch": 3.8826185101580135, + "grad_norm": 13.093762397766113, + "learning_rate": 1.2353246959370086e-06, + "lm_loss": 5.4073, + "loss": 1.3375, + "step": 1720, + "text_contrastive_loss": 0.8543, + "train_positive_log_prob": -80.0304, + "train_positive_token_accuracy": 0.085, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.4105, + "epoch": 3.884875846501129, + "grad_norm": 14.194730758666992, + "learning_rate": 1.2305539131984646e-06, + "lm_loss": 5.349, + "loss": 1.4127, + "step": 1721, + "text_contrastive_loss": 0.9346, + "train_positive_log_prob": -78.7293, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.4045, + "epoch": 3.8871331828442437, + "grad_norm": 12.694623947143555, + "learning_rate": 1.2257910675517315e-06, + "lm_loss": 5.3647, + "loss": 1.3253, + "step": 1722, + "text_contrastive_loss": 0.7687, + "train_positive_log_prob": -78.6069, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2696, + "epoch": 3.889390519187359, + "grad_norm": 11.258502006530762, + "learning_rate": 1.22103616902564e-06, + "lm_loss": 5.2801, + "loss": 1.2154, + "step": 1723, + "text_contrastive_loss": 0.8355, + "train_positive_log_prob": -78.3572, + "train_positive_token_accuracy": 0.0845, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.4415, + "epoch": 3.891647855530474, + "grad_norm": 15.386812210083008, + "learning_rate": 1.21628922763229e-06, + "lm_loss": 5.3852, + "loss": 1.3795, + "step": 1724, + "text_contrastive_loss": 0.799, + "train_positive_log_prob": -78.5724, + "train_positive_token_accuracy": 0.0833, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3751, + "epoch": 3.893905191873589, + "grad_norm": 14.10409927368164, + "learning_rate": 1.2115502533670253e-06, + "lm_loss": 5.6036, + "loss": 1.3593, + "step": 1725, + "text_contrastive_loss": 0.8475, + "train_positive_log_prob": -84.7352, + "train_positive_token_accuracy": 0.0718, + "train_positive_token_prob": 0.0289 + }, + { + "contrastive_loss": 0.4129, + "epoch": 3.8961625282167045, + "grad_norm": 15.11279296875, + "learning_rate": 1.2068192562084146e-06, + "lm_loss": 5.2712, + "loss": 1.3667, + "step": 1726, + "text_contrastive_loss": 0.8533, + "train_positive_log_prob": -76.2894, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0328 + }, + { + "contrastive_loss": 0.3504, + "epoch": 3.8984198645598194, + "grad_norm": 11.453778266906738, + "learning_rate": 1.2020962461182268e-06, + "lm_loss": 5.3852, + "loss": 1.2424, + "step": 1727, + "text_contrastive_loss": 0.7071, + "train_positive_log_prob": -79.8094, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3366, + "epoch": 3.9006772009029347, + "grad_norm": 14.966188430786133, + "learning_rate": 1.1973812330414159e-06, + "lm_loss": 5.3083, + "loss": 1.2008, + "step": 1728, + "text_contrastive_loss": 0.6668, + "train_positive_log_prob": -77.5319, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3466, + "epoch": 3.9029345372460496, + "grad_norm": 13.08997631072998, + "learning_rate": 1.1926742269060965e-06, + "lm_loss": 5.4059, + "loss": 1.2936, + "step": 1729, + "text_contrastive_loss": 0.8129, + "train_positive_log_prob": -79.2303, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3684, + "epoch": 3.905191873589165, + "grad_norm": 12.344026565551758, + "learning_rate": 1.1879752376235231e-06, + "lm_loss": 5.4985, + "loss": 1.3023, + "step": 1730, + "text_contrastive_loss": 0.7681, + "train_positive_log_prob": -78.8052, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.2984, + "epoch": 3.90744920993228, + "grad_norm": 11.953618049621582, + "learning_rate": 1.1832842750880702e-06, + "lm_loss": 5.4749, + "loss": 1.2204, + "step": 1731, + "text_contrastive_loss": 0.7489, + "train_positive_log_prob": -79.7506, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3525, + "epoch": 3.909706546275395, + "grad_norm": 12.915794372558594, + "learning_rate": 1.1786013491772103e-06, + "lm_loss": 5.3016, + "loss": 1.2778, + "step": 1732, + "text_contrastive_loss": 0.7903, + "train_positive_log_prob": -77.9924, + "train_positive_token_accuracy": 0.0874, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.3478, + "epoch": 3.91196388261851, + "grad_norm": 13.471266746520996, + "learning_rate": 1.173926469751493e-06, + "lm_loss": 5.4212, + "loss": 1.2925, + "step": 1733, + "text_contrastive_loss": 0.8052, + "train_positive_log_prob": -78.9267, + "train_positive_token_accuracy": 0.0765, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.3412, + "epoch": 3.9142212189616252, + "grad_norm": 10.858116149902344, + "learning_rate": 1.1692596466545275e-06, + "lm_loss": 5.4364, + "loss": 1.2897, + "step": 1734, + "text_contrastive_loss": 0.8096, + "train_positive_log_prob": -82.1847, + "train_positive_token_accuracy": 0.077, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2974, + "epoch": 3.9164785553047405, + "grad_norm": 11.386345863342285, + "learning_rate": 1.1646008897129546e-06, + "lm_loss": 5.4474, + "loss": 1.2086, + "step": 1735, + "text_contrastive_loss": 0.7329, + "train_positive_log_prob": -80.3901, + "train_positive_token_accuracy": 0.09, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3475, + "epoch": 3.9187358916478554, + "grad_norm": 12.945756912231445, + "learning_rate": 1.1599502087364345e-06, + "lm_loss": 5.5655, + "loss": 1.2365, + "step": 1736, + "text_contrastive_loss": 0.6649, + "train_positive_log_prob": -83.7224, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.4151, + "epoch": 3.9209932279909707, + "grad_norm": 12.884122848510742, + "learning_rate": 1.1553076135176222e-06, + "lm_loss": 5.5143, + "loss": 1.3511, + "step": 1737, + "text_contrastive_loss": 0.7693, + "train_positive_log_prob": -80.4555, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3313, + "epoch": 3.9232505643340856, + "grad_norm": 12.329414367675781, + "learning_rate": 1.1506731138321474e-06, + "lm_loss": 5.4672, + "loss": 1.2211, + "step": 1738, + "text_contrastive_loss": 0.6862, + "train_positive_log_prob": -78.7644, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4516, + "epoch": 3.925507900677201, + "grad_norm": 14.973881721496582, + "learning_rate": 1.1460467194385889e-06, + "lm_loss": 5.4097, + "loss": 1.5122, + "step": 1739, + "text_contrastive_loss": 1.0392, + "train_positive_log_prob": -79.8177, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3339, + "epoch": 3.927765237020316, + "grad_norm": 12.941485404968262, + "learning_rate": 1.1414284400784643e-06, + "lm_loss": 5.3986, + "loss": 1.2673, + "step": 1740, + "text_contrastive_loss": 0.7869, + "train_positive_log_prob": -80.6846, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3023, + "epoch": 3.930022573363431, + "grad_norm": 11.898416519165039, + "learning_rate": 1.1368182854762005e-06, + "lm_loss": 5.4583, + "loss": 1.1561, + "step": 1741, + "text_contrastive_loss": 0.616, + "train_positive_log_prob": -80.8111, + "train_positive_token_accuracy": 0.0836, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.2913, + "epoch": 3.9322799097065464, + "grad_norm": 12.622970581054688, + "learning_rate": 1.13221626533912e-06, + "lm_loss": 5.5088, + "loss": 1.1899, + "step": 1742, + "text_contrastive_loss": 0.6955, + "train_positive_log_prob": -81.2138, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3457, + "epoch": 3.9345372460496613, + "grad_norm": 13.77286148071289, + "learning_rate": 1.1276223893574123e-06, + "lm_loss": 5.4392, + "loss": 1.2988, + "step": 1743, + "text_contrastive_loss": 0.8183, + "train_positive_log_prob": -80.5886, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.328, + "epoch": 3.9367945823927766, + "grad_norm": 11.536191940307617, + "learning_rate": 1.1230366672041216e-06, + "lm_loss": 5.3854, + "loss": 1.2338, + "step": 1744, + "text_contrastive_loss": 0.7346, + "train_positive_log_prob": -80.1877, + "train_positive_token_accuracy": 0.0842, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3349, + "epoch": 3.939051918735892, + "grad_norm": 11.769485473632812, + "learning_rate": 1.118459108535122e-06, + "lm_loss": 5.3697, + "loss": 1.2411, + "step": 1745, + "text_contrastive_loss": 0.7384, + "train_positive_log_prob": -78.2167, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3783, + "epoch": 3.9413092550790068, + "grad_norm": 13.79159927368164, + "learning_rate": 1.1138897229890995e-06, + "lm_loss": 5.4387, + "loss": 1.3637, + "step": 1746, + "text_contrastive_loss": 0.8832, + "train_positive_log_prob": -82.0485, + "train_positive_token_accuracy": 0.0839, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.4193, + "epoch": 3.9435665914221216, + "grad_norm": 13.227442741394043, + "learning_rate": 1.109328520187528e-06, + "lm_loss": 5.5909, + "loss": 1.3745, + "step": 1747, + "text_contrastive_loss": 0.7921, + "train_positive_log_prob": -85.6355, + "train_positive_token_accuracy": 0.0814, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.2691, + "epoch": 3.945823927765237, + "grad_norm": 12.027087211608887, + "learning_rate": 1.1047755097346541e-06, + "lm_loss": 5.4944, + "loss": 1.1971, + "step": 1748, + "text_contrastive_loss": 0.757, + "train_positive_log_prob": -78.9637, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4028, + "epoch": 3.9480812641083523, + "grad_norm": 13.5175199508667, + "learning_rate": 1.100230701217473e-06, + "lm_loss": 5.2957, + "loss": 1.351, + "step": 1749, + "text_contrastive_loss": 0.8373, + "train_positive_log_prob": -78.108, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.393, + "epoch": 3.950338600451467, + "grad_norm": 12.79059886932373, + "learning_rate": 1.0956941042057106e-06, + "lm_loss": 5.4454, + "loss": 1.3408, + "step": 1750, + "text_contrastive_loss": 0.8067, + "train_positive_log_prob": -80.6592, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3548, + "epoch": 3.9525959367945824, + "grad_norm": 12.10927677154541, + "learning_rate": 1.091165728251799e-06, + "lm_loss": 5.4584, + "loss": 1.3203, + "step": 1751, + "text_contrastive_loss": 0.8393, + "train_positive_log_prob": -81.4501, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3569, + "epoch": 3.9548532731376973, + "grad_norm": 12.969940185546875, + "learning_rate": 1.0866455828908634e-06, + "lm_loss": 5.4602, + "loss": 1.2626, + "step": 1752, + "text_contrastive_loss": 0.7195, + "train_positive_log_prob": -80.3216, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3291, + "epoch": 3.9571106094808126, + "grad_norm": 12.869458198547363, + "learning_rate": 1.082133677640697e-06, + "lm_loss": 5.4765, + "loss": 1.2956, + "step": 1753, + "text_contrastive_loss": 0.8377, + "train_positive_log_prob": -80.0906, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3978, + "epoch": 3.959367945823928, + "grad_norm": 13.036502838134766, + "learning_rate": 1.0776300220017437e-06, + "lm_loss": 5.3854, + "loss": 1.3136, + "step": 1754, + "text_contrastive_loss": 0.7546, + "train_positive_log_prob": -79.1601, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3067, + "epoch": 3.961625282167043, + "grad_norm": 11.820329666137695, + "learning_rate": 1.0731346254570735e-06, + "lm_loss": 5.2704, + "loss": 1.1793, + "step": 1755, + "text_contrastive_loss": 0.6911, + "train_positive_log_prob": -79.0337, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3445, + "epoch": 3.963882618510158, + "grad_norm": 12.24409294128418, + "learning_rate": 1.068647497472368e-06, + "lm_loss": 5.3973, + "loss": 1.2742, + "step": 1756, + "text_contrastive_loss": 0.7799, + "train_positive_log_prob": -80.8978, + "train_positive_token_accuracy": 0.087, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3891, + "epoch": 3.966139954853273, + "grad_norm": 14.378227233886719, + "learning_rate": 1.064168647495899e-06, + "lm_loss": 5.3828, + "loss": 1.4058, + "step": 1757, + "text_contrastive_loss": 0.9569, + "train_positive_log_prob": -78.7874, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.4327, + "epoch": 3.9683972911963883, + "grad_norm": 13.396885871887207, + "learning_rate": 1.0596980849585065e-06, + "lm_loss": 5.4468, + "loss": 1.3699, + "step": 1758, + "text_contrastive_loss": 0.7851, + "train_positive_log_prob": -81.5337, + "train_positive_token_accuracy": 0.0827, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3756, + "epoch": 3.9706546275395036, + "grad_norm": 11.884635925292969, + "learning_rate": 1.0552358192735784e-06, + "lm_loss": 5.3959, + "loss": 1.2723, + "step": 1759, + "text_contrastive_loss": 0.7142, + "train_positive_log_prob": -79.8662, + "train_positive_token_accuracy": 0.0776, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3636, + "epoch": 3.9729119638826185, + "grad_norm": 13.764908790588379, + "learning_rate": 1.0507818598370355e-06, + "lm_loss": 5.4769, + "loss": 1.2941, + "step": 1760, + "text_contrastive_loss": 0.7655, + "train_positive_log_prob": -83.6964, + "train_positive_token_accuracy": 0.0838, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.2761, + "epoch": 3.975169300225734, + "grad_norm": 11.162527084350586, + "learning_rate": 1.0463362160273076e-06, + "lm_loss": 5.3873, + "loss": 1.1361, + "step": 1761, + "text_contrastive_loss": 0.6424, + "train_positive_log_prob": -79.2954, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3749, + "epoch": 3.9774266365688487, + "grad_norm": 14.49768352508545, + "learning_rate": 1.0418988972053162e-06, + "lm_loss": 5.5277, + "loss": 1.3248, + "step": 1762, + "text_contrastive_loss": 0.7944, + "train_positive_log_prob": -81.6186, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.3017, + "epoch": 3.979683972911964, + "grad_norm": 11.335138320922852, + "learning_rate": 1.037469912714449e-06, + "lm_loss": 5.4907, + "loss": 1.1456, + "step": 1763, + "text_contrastive_loss": 0.5897, + "train_positive_log_prob": -82.1666, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3361, + "epoch": 3.9819413092550793, + "grad_norm": 11.933809280395508, + "learning_rate": 1.0330492718805469e-06, + "lm_loss": 5.3967, + "loss": 1.2693, + "step": 1764, + "text_contrastive_loss": 0.7871, + "train_positive_log_prob": -79.6426, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3085, + "epoch": 3.984198645598194, + "grad_norm": 12.292765617370605, + "learning_rate": 1.0286369840118859e-06, + "lm_loss": 5.4524, + "loss": 1.2448, + "step": 1765, + "text_contrastive_loss": 0.782, + "train_positive_log_prob": -80.1465, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3801, + "epoch": 3.986455981941309, + "grad_norm": 12.009864807128906, + "learning_rate": 1.0242330583991507e-06, + "lm_loss": 5.3342, + "loss": 1.3172, + "step": 1766, + "text_contrastive_loss": 0.8073, + "train_positive_log_prob": -78.5573, + "train_positive_token_accuracy": 0.0878, + "train_positive_token_prob": 0.0333 + }, + { + "contrastive_loss": 0.3845, + "epoch": 3.9887133182844243, + "grad_norm": 13.129654884338379, + "learning_rate": 1.0198375043154142e-06, + "lm_loss": 5.3882, + "loss": 1.3614, + "step": 1767, + "text_contrastive_loss": 0.8763, + "train_positive_log_prob": -80.4404, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3597, + "epoch": 3.9909706546275396, + "grad_norm": 12.79511833190918, + "learning_rate": 1.0154503310161269e-06, + "lm_loss": 5.4232, + "loss": 1.3009, + "step": 1768, + "text_contrastive_loss": 0.7978, + "train_positive_log_prob": -80.2236, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.304, + "epoch": 3.9932279909706545, + "grad_norm": 12.126627922058105, + "learning_rate": 1.0110715477390915e-06, + "lm_loss": 5.391, + "loss": 1.2102, + "step": 1769, + "text_contrastive_loss": 0.7342, + "train_positive_log_prob": -77.7322, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.4188, + "epoch": 3.99548532731377, + "grad_norm": 14.251039505004883, + "learning_rate": 1.006701163704445e-06, + "lm_loss": 5.3844, + "loss": 1.3225, + "step": 1770, + "text_contrastive_loss": 0.7306, + "train_positive_log_prob": -79.4619, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.339, + "epoch": 3.9977426636568847, + "grad_norm": 11.922215461730957, + "learning_rate": 1.0023391881146349e-06, + "lm_loss": 5.3986, + "loss": 1.2125, + "step": 1771, + "text_contrastive_loss": 0.6673, + "train_positive_log_prob": -77.5417, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.2784, + "epoch": 4.0, + "grad_norm": 16.960506439208984, + "learning_rate": 9.97985630154407e-07, + "lm_loss": 5.2852, + "loss": 1.0724, + "step": 1772, + "text_contrastive_loss": 0.5311, + "train_positive_log_prob": -80.4094, + "train_positive_token_accuracy": 0.0886, + "train_positive_token_prob": 0.0334 + }, + { + "contrastive_loss": 0.2981, + "epoch": 4.002257336343115, + "grad_norm": 11.822565078735352, + "learning_rate": 9.936404989907828e-07, + "lm_loss": 5.3024, + "loss": 1.1372, + "step": 1773, + "text_contrastive_loss": 0.6177, + "train_positive_log_prob": -78.1102, + "train_positive_token_accuracy": 0.0835, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3811, + "epoch": 4.004514672686231, + "grad_norm": 12.052889823913574, + "learning_rate": 9.89303803773039e-07, + "lm_loss": 5.3536, + "loss": 1.3002, + "step": 1774, + "text_contrastive_loss": 0.7675, + "train_positive_log_prob": -78.4654, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3353, + "epoch": 4.006772009029345, + "grad_norm": 13.668062210083008, + "learning_rate": 9.849755536326866e-07, + "lm_loss": 5.4925, + "loss": 1.2777, + "step": 1775, + "text_contrastive_loss": 0.7862, + "train_positive_log_prob": -81.55, + "train_positive_token_accuracy": 0.0837, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.404, + "epoch": 4.00902934537246, + "grad_norm": 13.64235782623291, + "learning_rate": 9.806557576834591e-07, + "lm_loss": 5.4554, + "loss": 1.4009, + "step": 1776, + "text_contrastive_loss": 0.9027, + "train_positive_log_prob": -83.325, + "train_positive_token_accuracy": 0.0719, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.2895, + "epoch": 4.011286681715576, + "grad_norm": 12.464550971984863, + "learning_rate": 9.763444250212855e-07, + "lm_loss": 5.4405, + "loss": 1.1938, + "step": 1777, + "text_contrastive_loss": 0.7205, + "train_positive_log_prob": -82.2004, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3024, + "epoch": 4.013544018058691, + "grad_norm": 12.07010555267334, + "learning_rate": 9.72041564724277e-07, + "lm_loss": 5.3832, + "loss": 1.233, + "step": 1778, + "text_contrastive_loss": 0.7847, + "train_positive_log_prob": -79.3184, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3166, + "epoch": 4.015801354401806, + "grad_norm": 11.087263107299805, + "learning_rate": 9.677471858526998e-07, + "lm_loss": 5.3556, + "loss": 1.1836, + "step": 1779, + "text_contrastive_loss": 0.6628, + "train_positive_log_prob": -78.3251, + "train_positive_token_accuracy": 0.0844, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3074, + "epoch": 4.018058690744921, + "grad_norm": 11.637508392333984, + "learning_rate": 9.63461297448966e-07, + "lm_loss": 5.4175, + "loss": 1.1949, + "step": 1780, + "text_contrastive_loss": 0.6914, + "train_positive_log_prob": -79.7211, + "train_positive_token_accuracy": 0.0711, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3395, + "epoch": 4.020316027088036, + "grad_norm": 11.589192390441895, + "learning_rate": 9.59183908537607e-07, + "lm_loss": 5.4286, + "loss": 1.3017, + "step": 1781, + "text_contrastive_loss": 0.8388, + "train_positive_log_prob": -81.5108, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.2507, + "epoch": 4.022573363431151, + "grad_norm": 11.744787216186523, + "learning_rate": 9.549150281252633e-07, + "lm_loss": 5.5591, + "loss": 1.218, + "step": 1782, + "text_contrastive_loss": 0.8228, + "train_positive_log_prob": -82.1091, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.2901, + "epoch": 4.024830699774267, + "grad_norm": 12.500025749206543, + "learning_rate": 9.506546652006504e-07, + "lm_loss": 5.4636, + "loss": 1.1892, + "step": 1783, + "text_contrastive_loss": 0.7053, + "train_positive_log_prob": -80.2879, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.2786, + "epoch": 4.027088036117381, + "grad_norm": 12.859606742858887, + "learning_rate": 9.464028287345551e-07, + "lm_loss": 5.3956, + "loss": 1.1693, + "step": 1784, + "text_contrastive_loss": 0.7022, + "train_positive_log_prob": -80.7013, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.3297, + "epoch": 4.029345372460496, + "grad_norm": 12.583005905151367, + "learning_rate": 9.421595276798084e-07, + "lm_loss": 5.4319, + "loss": 1.3058, + "step": 1785, + "text_contrastive_loss": 0.866, + "train_positive_log_prob": -79.2895, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.345, + "epoch": 4.031602708803612, + "grad_norm": 11.402308464050293, + "learning_rate": 9.379247709712725e-07, + "lm_loss": 5.4178, + "loss": 1.2672, + "step": 1786, + "text_contrastive_loss": 0.7609, + "train_positive_log_prob": -79.1937, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3446, + "epoch": 4.033860045146727, + "grad_norm": 11.948596000671387, + "learning_rate": 9.336985675258109e-07, + "lm_loss": 5.3957, + "loss": 1.2656, + "step": 1787, + "text_contrastive_loss": 0.7628, + "train_positive_log_prob": -79.251, + "train_positive_token_accuracy": 0.0715, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3092, + "epoch": 4.036117381489842, + "grad_norm": 10.598356246948242, + "learning_rate": 9.294809262422838e-07, + "lm_loss": 5.5578, + "loss": 1.2135, + "step": 1788, + "text_contrastive_loss": 0.6971, + "train_positive_log_prob": -83.38, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.384, + "epoch": 4.038374717832957, + "grad_norm": 13.19804573059082, + "learning_rate": 9.2527185600152e-07, + "lm_loss": 5.4054, + "loss": 1.2948, + "step": 1789, + "text_contrastive_loss": 0.7404, + "train_positive_log_prob": -80.6997, + "train_positive_token_accuracy": 0.084, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.2921, + "epoch": 4.040632054176072, + "grad_norm": 11.58687686920166, + "learning_rate": 9.210713656663023e-07, + "lm_loss": 5.4999, + "loss": 1.1737, + "step": 1790, + "text_contrastive_loss": 0.6631, + "train_positive_log_prob": -81.8331, + "train_positive_token_accuracy": 0.0706, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.2615, + "epoch": 4.042889390519187, + "grad_norm": 11.666924476623535, + "learning_rate": 9.168794640813428e-07, + "lm_loss": 5.3863, + "loss": 1.1034, + "step": 1791, + "text_contrastive_loss": 0.6066, + "train_positive_log_prob": -80.4403, + "train_positive_token_accuracy": 0.0829, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.2988, + "epoch": 4.045146726862303, + "grad_norm": 10.956388473510742, + "learning_rate": 9.126961600732742e-07, + "lm_loss": 5.3695, + "loss": 1.1836, + "step": 1792, + "text_contrastive_loss": 0.6957, + "train_positive_log_prob": -79.996, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3143, + "epoch": 4.047404063205418, + "grad_norm": 12.528949737548828, + "learning_rate": 9.085214624506228e-07, + "lm_loss": 5.4053, + "loss": 1.2811, + "step": 1793, + "text_contrastive_loss": 0.8526, + "train_positive_log_prob": -79.9386, + "train_positive_token_accuracy": 0.0777, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3712, + "epoch": 4.049661399548532, + "grad_norm": 12.259926795959473, + "learning_rate": 9.043553800037952e-07, + "lm_loss": 5.3923, + "loss": 1.2661, + "step": 1794, + "text_contrastive_loss": 0.7114, + "train_positive_log_prob": -78.3359, + "train_positive_token_accuracy": 0.0726, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.2825, + "epoch": 4.051918735891648, + "grad_norm": 12.099518775939941, + "learning_rate": 9.001979215050544e-07, + "lm_loss": 5.4537, + "loss": 1.254, + "step": 1795, + "text_contrastive_loss": 0.8522, + "train_positive_log_prob": -78.6777, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3283, + "epoch": 4.054176072234763, + "grad_norm": 11.813817977905273, + "learning_rate": 8.960490957085061e-07, + "lm_loss": 5.4295, + "loss": 1.192, + "step": 1796, + "text_contrastive_loss": 0.6415, + "train_positive_log_prob": -79.7898, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.4226, + "epoch": 4.056433408577878, + "grad_norm": 13.415582656860352, + "learning_rate": 8.919089113500795e-07, + "lm_loss": 5.3407, + "loss": 1.385, + "step": 1797, + "text_contrastive_loss": 0.8567, + "train_positive_log_prob": -78.4424, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.3213, + "epoch": 4.058690744920993, + "grad_norm": 11.460143089294434, + "learning_rate": 8.877773771475074e-07, + "lm_loss": 5.4705, + "loss": 1.2087, + "step": 1798, + "text_contrastive_loss": 0.6808, + "train_positive_log_prob": -82.0132, + "train_positive_token_accuracy": 0.0732, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.3529, + "epoch": 4.060948081264108, + "grad_norm": 12.055941581726074, + "learning_rate": 8.836545018003084e-07, + "lm_loss": 5.512, + "loss": 1.3028, + "step": 1799, + "text_contrastive_loss": 0.7974, + "train_positive_log_prob": -81.7767, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3897, + "epoch": 4.063205417607223, + "grad_norm": 14.699162483215332, + "learning_rate": 8.795402939897679e-07, + "lm_loss": 5.4581, + "loss": 1.3618, + "step": 1800, + "text_contrastive_loss": 0.8525, + "train_positive_log_prob": -82.5777, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3792, + "epoch": 4.065462753950339, + "grad_norm": 12.59511661529541, + "learning_rate": 8.754347623789222e-07, + "lm_loss": 5.3659, + "loss": 1.3662, + "step": 1801, + "text_contrastive_loss": 0.9008, + "train_positive_log_prob": -79.7321, + "train_positive_token_accuracy": 0.0828, + "train_positive_token_prob": 0.0336 + }, + { + "contrastive_loss": 0.3033, + "epoch": 4.067720090293454, + "grad_norm": 12.665350914001465, + "learning_rate": 8.713379156125385e-07, + "lm_loss": 5.3962, + "loss": 1.1595, + "step": 1802, + "text_contrastive_loss": 0.633, + "train_positive_log_prob": -78.3694, + "train_positive_token_accuracy": 0.0882, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.3308, + "epoch": 4.0699774266365685, + "grad_norm": 12.011173248291016, + "learning_rate": 8.672497623170944e-07, + "lm_loss": 5.4768, + "loss": 1.2796, + "step": 1803, + "text_contrastive_loss": 0.8023, + "train_positive_log_prob": -81.0157, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3001, + "epoch": 4.072234762979684, + "grad_norm": 11.953372955322266, + "learning_rate": 8.631703111007645e-07, + "lm_loss": 5.577, + "loss": 1.2073, + "step": 1804, + "text_contrastive_loss": 0.6991, + "train_positive_log_prob": -81.583, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3439, + "epoch": 4.074492099322799, + "grad_norm": 12.106359481811523, + "learning_rate": 8.590995705533994e-07, + "lm_loss": 5.3762, + "loss": 1.2588, + "step": 1805, + "text_contrastive_loss": 0.7546, + "train_positive_log_prob": -80.8138, + "train_positive_token_accuracy": 0.0871, + "train_positive_token_prob": 0.0328 + }, + { + "contrastive_loss": 0.3064, + "epoch": 4.076749435665914, + "grad_norm": 12.077173233032227, + "learning_rate": 8.550375492465102e-07, + "lm_loss": 5.3883, + "loss": 1.1626, + "step": 1806, + "text_contrastive_loss": 0.6348, + "train_positive_log_prob": -81.8352, + "train_positive_token_accuracy": 0.0844, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.4243, + "epoch": 4.07900677200903, + "grad_norm": 13.205684661865234, + "learning_rate": 8.509842557332437e-07, + "lm_loss": 5.4931, + "loss": 1.3737, + "step": 1807, + "text_contrastive_loss": 0.8002, + "train_positive_log_prob": -83.3795, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3543, + "epoch": 4.081264108352144, + "grad_norm": 13.819039344787598, + "learning_rate": 8.469396985483724e-07, + "lm_loss": 5.4809, + "loss": 1.2811, + "step": 1808, + "text_contrastive_loss": 0.7575, + "train_positive_log_prob": -80.6961, + "train_positive_token_accuracy": 0.0764, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.2827, + "epoch": 4.0835214446952595, + "grad_norm": 10.551326751708984, + "learning_rate": 8.429038862082734e-07, + "lm_loss": 5.4652, + "loss": 1.1714, + "step": 1809, + "text_contrastive_loss": 0.6843, + "train_positive_log_prob": -79.0134, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.2798, + "epoch": 4.085778781038375, + "grad_norm": 12.77468204498291, + "learning_rate": 8.388768272109105e-07, + "lm_loss": 5.4723, + "loss": 1.2427, + "step": 1810, + "text_contrastive_loss": 0.8315, + "train_positive_log_prob": -80.6517, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.2904, + "epoch": 4.08803611738149, + "grad_norm": 13.338013648986816, + "learning_rate": 8.34858530035813e-07, + "lm_loss": 5.4503, + "loss": 1.1812, + "step": 1811, + "text_contrastive_loss": 0.6916, + "train_positive_log_prob": -82.3727, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3947, + "epoch": 4.090293453724605, + "grad_norm": 12.224340438842773, + "learning_rate": 8.308490031440641e-07, + "lm_loss": 5.4716, + "loss": 1.3509, + "step": 1812, + "text_contrastive_loss": 0.818, + "train_positive_log_prob": -79.6168, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3095, + "epoch": 4.09255079006772, + "grad_norm": 12.623326301574707, + "learning_rate": 8.268482549782797e-07, + "lm_loss": 5.3688, + "loss": 1.2482, + "step": 1813, + "text_contrastive_loss": 0.8037, + "train_positive_log_prob": -79.5106, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.2541, + "epoch": 4.094808126410835, + "grad_norm": 11.240360260009766, + "learning_rate": 8.228562939625906e-07, + "lm_loss": 5.3495, + "loss": 1.1104, + "step": 1814, + "text_contrastive_loss": 0.6429, + "train_positive_log_prob": -79.8229, + "train_positive_token_accuracy": 0.0914, + "train_positive_token_prob": 0.0333 + }, + { + "contrastive_loss": 0.2331, + "epoch": 4.0970654627539504, + "grad_norm": 10.2955961227417, + "learning_rate": 8.188731285026219e-07, + "lm_loss": 5.5102, + "loss": 1.1071, + "step": 1815, + "text_contrastive_loss": 0.6461, + "train_positive_log_prob": -83.2784, + "train_positive_token_accuracy": 0.0825, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.2776, + "epoch": 4.099322799097066, + "grad_norm": 11.683948516845703, + "learning_rate": 8.148987669854846e-07, + "lm_loss": 5.3789, + "loss": 1.1916, + "step": 1816, + "text_contrastive_loss": 0.7522, + "train_positive_log_prob": -79.7243, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3333, + "epoch": 4.10158013544018, + "grad_norm": 11.619467735290527, + "learning_rate": 8.109332177797469e-07, + "lm_loss": 5.4242, + "loss": 1.2928, + "step": 1817, + "text_contrastive_loss": 0.8341, + "train_positive_log_prob": -81.6831, + "train_positive_token_accuracy": 0.0872, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.3334, + "epoch": 4.1038374717832955, + "grad_norm": 14.022225379943848, + "learning_rate": 8.069764892354237e-07, + "lm_loss": 5.4694, + "loss": 1.3321, + "step": 1818, + "text_contrastive_loss": 0.9034, + "train_positive_log_prob": -82.3829, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3667, + "epoch": 4.106094808126411, + "grad_norm": 12.644960403442383, + "learning_rate": 8.030285896839546e-07, + "lm_loss": 5.4, + "loss": 1.2974, + "step": 1819, + "text_contrastive_loss": 0.7814, + "train_positive_log_prob": -79.9241, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.4218, + "epoch": 4.108352144469526, + "grad_norm": 14.45919418334961, + "learning_rate": 7.99089527438191e-07, + "lm_loss": 5.3356, + "loss": 1.4038, + "step": 1820, + "text_contrastive_loss": 0.8969, + "train_positive_log_prob": -78.6559, + "train_positive_token_accuracy": 0.0835, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.3181, + "epoch": 4.110609480812641, + "grad_norm": 11.615678787231445, + "learning_rate": 7.951593107923744e-07, + "lm_loss": 5.4829, + "loss": 1.2902, + "step": 1821, + "text_contrastive_loss": 0.8476, + "train_positive_log_prob": -82.52, + "train_positive_token_accuracy": 0.069, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.2682, + "epoch": 4.112866817155756, + "grad_norm": 12.299796104431152, + "learning_rate": 7.912379480221228e-07, + "lm_loss": 5.2247, + "loss": 1.1559, + "step": 1822, + "text_contrastive_loss": 0.7304, + "train_positive_log_prob": -76.4534, + "train_positive_token_accuracy": 0.0902, + "train_positive_token_prob": 0.0334 + }, + { + "contrastive_loss": 0.3667, + "epoch": 4.115124153498871, + "grad_norm": 13.268454551696777, + "learning_rate": 7.873254473844077e-07, + "lm_loss": 5.4359, + "loss": 1.2967, + "step": 1823, + "text_contrastive_loss": 0.7727, + "train_positive_log_prob": -80.1572, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.2825, + "epoch": 4.1173814898419865, + "grad_norm": 11.050955772399902, + "learning_rate": 7.834218171175428e-07, + "lm_loss": 5.4139, + "loss": 1.2126, + "step": 1824, + "text_contrastive_loss": 0.7774, + "train_positive_log_prob": -80.4885, + "train_positive_token_accuracy": 0.078, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.2778, + "epoch": 4.119638826185102, + "grad_norm": 12.545204162597656, + "learning_rate": 7.795270654411635e-07, + "lm_loss": 5.3841, + "loss": 1.1476, + "step": 1825, + "text_contrastive_loss": 0.6628, + "train_positive_log_prob": -81.4758, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.338, + "epoch": 4.121896162528217, + "grad_norm": 12.868317604064941, + "learning_rate": 7.756412005562114e-07, + "lm_loss": 5.4345, + "loss": 1.2682, + "step": 1826, + "text_contrastive_loss": 0.7736, + "train_positive_log_prob": -79.6713, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3829, + "epoch": 4.1241534988713315, + "grad_norm": 11.64218807220459, + "learning_rate": 7.717642306449113e-07, + "lm_loss": 5.5046, + "loss": 1.3311, + "step": 1827, + "text_contrastive_loss": 0.7953, + "train_positive_log_prob": -80.225, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3452, + "epoch": 4.126410835214447, + "grad_norm": 12.985198020935059, + "learning_rate": 7.678961638707633e-07, + "lm_loss": 5.4255, + "loss": 1.3104, + "step": 1828, + "text_contrastive_loss": 0.8454, + "train_positive_log_prob": -80.6833, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.264, + "epoch": 4.128668171557562, + "grad_norm": 11.229561805725098, + "learning_rate": 7.640370083785175e-07, + "lm_loss": 5.3597, + "loss": 1.1756, + "step": 1829, + "text_contrastive_loss": 0.7514, + "train_positive_log_prob": -79.0939, + "train_positive_token_accuracy": 0.0879, + "train_positive_token_prob": 0.0332 + }, + { + "contrastive_loss": 0.3308, + "epoch": 4.1309255079006775, + "grad_norm": 12.839822769165039, + "learning_rate": 7.601867722941642e-07, + "lm_loss": 5.472, + "loss": 1.2783, + "step": 1830, + "text_contrastive_loss": 0.8005, + "train_positive_log_prob": -81.905, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3423, + "epoch": 4.133182844243792, + "grad_norm": 12.389822006225586, + "learning_rate": 7.563454637249056e-07, + "lm_loss": 5.5068, + "loss": 1.3078, + "step": 1831, + "text_contrastive_loss": 0.8296, + "train_positive_log_prob": -81.4395, + "train_positive_token_accuracy": 0.0824, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3454, + "epoch": 4.135440180586907, + "grad_norm": 13.781306266784668, + "learning_rate": 7.52513090759151e-07, + "lm_loss": 5.478, + "loss": 1.2568, + "step": 1832, + "text_contrastive_loss": 0.7273, + "train_positive_log_prob": -79.9061, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.2889, + "epoch": 4.1376975169300225, + "grad_norm": 11.953801155090332, + "learning_rate": 7.486896614664962e-07, + "lm_loss": 5.3392, + "loss": 1.1653, + "step": 1833, + "text_contrastive_loss": 0.6849, + "train_positive_log_prob": -79.9176, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3061, + "epoch": 4.139954853273138, + "grad_norm": 12.944199562072754, + "learning_rate": 7.448751838977014e-07, + "lm_loss": 5.3706, + "loss": 1.1867, + "step": 1834, + "text_contrastive_loss": 0.6872, + "train_positive_log_prob": -79.452, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3411, + "epoch": 4.142212189616253, + "grad_norm": 12.639607429504395, + "learning_rate": 7.410696660846761e-07, + "lm_loss": 5.4773, + "loss": 1.2776, + "step": 1835, + "text_contrastive_loss": 0.7776, + "train_positive_log_prob": -82.0948, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4114, + "epoch": 4.144469525959368, + "grad_norm": 13.546647071838379, + "learning_rate": 7.372731160404672e-07, + "lm_loss": 5.3359, + "loss": 1.3983, + "step": 1836, + "text_contrastive_loss": 0.9066, + "train_positive_log_prob": -78.331, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3932, + "epoch": 4.146726862302483, + "grad_norm": 12.406811714172363, + "learning_rate": 7.334855417592385e-07, + "lm_loss": 5.4489, + "loss": 1.313, + "step": 1837, + "text_contrastive_loss": 0.7497, + "train_positive_log_prob": -79.967, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.2987, + "epoch": 4.148984198645598, + "grad_norm": 13.156391143798828, + "learning_rate": 7.297069512162535e-07, + "lm_loss": 5.4151, + "loss": 1.1672, + "step": 1838, + "text_contrastive_loss": 0.6539, + "train_positive_log_prob": -80.7595, + "train_positive_token_accuracy": 0.0731, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.2722, + "epoch": 4.1512415349887135, + "grad_norm": 11.492459297180176, + "learning_rate": 7.25937352367857e-07, + "lm_loss": 5.4001, + "loss": 1.2129, + "step": 1839, + "text_contrastive_loss": 0.8013, + "train_positive_log_prob": -79.5275, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0328 + }, + { + "contrastive_loss": 0.2776, + "epoch": 4.153498871331829, + "grad_norm": 10.589356422424316, + "learning_rate": 7.22176753151464e-07, + "lm_loss": 5.5647, + "loss": 1.1245, + "step": 1840, + "text_contrastive_loss": 0.5809, + "train_positive_log_prob": -83.297, + "train_positive_token_accuracy": 0.0765, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.2989, + "epoch": 4.155756207674943, + "grad_norm": 12.230340957641602, + "learning_rate": 7.184251614855369e-07, + "lm_loss": 5.4518, + "loss": 1.1219, + "step": 1841, + "text_contrastive_loss": 0.5557, + "train_positive_log_prob": -80.7982, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3101, + "epoch": 4.158013544018059, + "grad_norm": 11.801525115966797, + "learning_rate": 7.146825852695749e-07, + "lm_loss": 5.457, + "loss": 1.2132, + "step": 1842, + "text_contrastive_loss": 0.7148, + "train_positive_log_prob": -84.3311, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3449, + "epoch": 4.160270880361174, + "grad_norm": 12.68387508392334, + "learning_rate": 7.109490323840884e-07, + "lm_loss": 5.4665, + "loss": 1.3371, + "step": 1843, + "text_contrastive_loss": 0.8912, + "train_positive_log_prob": -78.9494, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3663, + "epoch": 4.162528216704289, + "grad_norm": 11.69442081451416, + "learning_rate": 7.072245106905928e-07, + "lm_loss": 5.5113, + "loss": 1.2236, + "step": 1844, + "text_contrastive_loss": 0.6124, + "train_positive_log_prob": -81.7223, + "train_positive_token_accuracy": 0.0719, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3553, + "epoch": 4.164785553047404, + "grad_norm": 12.80620002746582, + "learning_rate": 7.035090280315854e-07, + "lm_loss": 5.55, + "loss": 1.3143, + "step": 1845, + "text_contrastive_loss": 0.808, + "train_positive_log_prob": -81.6681, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3167, + "epoch": 4.167042889390519, + "grad_norm": 11.069784164428711, + "learning_rate": 6.998025922305313e-07, + "lm_loss": 5.409, + "loss": 1.2387, + "step": 1846, + "text_contrastive_loss": 0.7623, + "train_positive_log_prob": -79.5486, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3587, + "epoch": 4.169300225733634, + "grad_norm": 14.951382637023926, + "learning_rate": 6.961052110918432e-07, + "lm_loss": 5.3124, + "loss": 1.2791, + "step": 1847, + "text_contrastive_loss": 0.7783, + "train_positive_log_prob": -77.8568, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0333 + }, + { + "contrastive_loss": 0.3703, + "epoch": 4.1715575620767495, + "grad_norm": 13.43285846710205, + "learning_rate": 6.924168924008712e-07, + "lm_loss": 5.3905, + "loss": 1.1849, + "step": 1848, + "text_contrastive_loss": 0.5512, + "train_positive_log_prob": -79.5078, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3301, + "epoch": 4.173814898419865, + "grad_norm": 13.087642669677734, + "learning_rate": 6.887376439238813e-07, + "lm_loss": 5.4573, + "loss": 1.2557, + "step": 1849, + "text_contrastive_loss": 0.7599, + "train_positive_log_prob": -80.0269, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3175, + "epoch": 4.176072234762979, + "grad_norm": 12.306053161621094, + "learning_rate": 6.850674734080454e-07, + "lm_loss": 5.4582, + "loss": 1.209, + "step": 1850, + "text_contrastive_loss": 0.6914, + "train_positive_log_prob": -80.4212, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.274, + "epoch": 4.178329571106095, + "grad_norm": 12.079581260681152, + "learning_rate": 6.814063885814127e-07, + "lm_loss": 5.4158, + "loss": 1.1677, + "step": 1851, + "text_contrastive_loss": 0.7043, + "train_positive_log_prob": -77.2407, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.2831, + "epoch": 4.18058690744921, + "grad_norm": 11.69601058959961, + "learning_rate": 6.77754397152906e-07, + "lm_loss": 5.3537, + "loss": 1.1951, + "step": 1852, + "text_contrastive_loss": 0.7534, + "train_positive_log_prob": -78.2763, + "train_positive_token_accuracy": 0.0906, + "train_positive_token_prob": 0.0338 + }, + { + "contrastive_loss": 0.3758, + "epoch": 4.182844243792325, + "grad_norm": 13.489706039428711, + "learning_rate": 6.741115068123017e-07, + "lm_loss": 5.366, + "loss": 1.3216, + "step": 1853, + "text_contrastive_loss": 0.8184, + "train_positive_log_prob": -79.5697, + "train_positive_token_accuracy": 0.0867, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3599, + "epoch": 4.1851015801354405, + "grad_norm": 14.01357650756836, + "learning_rate": 6.704777252302108e-07, + "lm_loss": 5.4534, + "loss": 1.3333, + "step": 1854, + "text_contrastive_loss": 0.8561, + "train_positive_log_prob": -80.72, + "train_positive_token_accuracy": 0.0694, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.3568, + "epoch": 4.187358916478555, + "grad_norm": 14.267851829528809, + "learning_rate": 6.66853060058063e-07, + "lm_loss": 5.4264, + "loss": 1.326, + "step": 1855, + "text_contrastive_loss": 0.8531, + "train_positive_log_prob": -81.6548, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2818, + "epoch": 4.18961625282167, + "grad_norm": 11.369948387145996, + "learning_rate": 6.632375189280948e-07, + "lm_loss": 5.4843, + "loss": 1.2275, + "step": 1856, + "text_contrastive_loss": 0.7947, + "train_positive_log_prob": -80.9986, + "train_positive_token_accuracy": 0.0756, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.3855, + "epoch": 4.191873589164786, + "grad_norm": 13.534159660339355, + "learning_rate": 6.596311094533292e-07, + "lm_loss": 5.4528, + "loss": 1.3522, + "step": 1857, + "text_contrastive_loss": 0.8429, + "train_positive_log_prob": -81.0453, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.4202, + "epoch": 4.194130925507901, + "grad_norm": 14.720059394836426, + "learning_rate": 6.56033839227564e-07, + "lm_loss": 5.3853, + "loss": 1.3398, + "step": 1858, + "text_contrastive_loss": 0.7621, + "train_positive_log_prob": -80.1683, + "train_positive_token_accuracy": 0.0829, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3275, + "epoch": 4.196388261851016, + "grad_norm": 12.959580421447754, + "learning_rate": 6.524457158253472e-07, + "lm_loss": 5.314, + "loss": 1.2654, + "step": 1859, + "text_contrastive_loss": 0.8129, + "train_positive_log_prob": -77.4499, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3433, + "epoch": 4.198645598194131, + "grad_norm": 11.659183502197266, + "learning_rate": 6.488667468019727e-07, + "lm_loss": 5.4754, + "loss": 1.2803, + "step": 1860, + "text_contrastive_loss": 0.7788, + "train_positive_log_prob": -79.5246, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.2934, + "epoch": 4.200902934537246, + "grad_norm": 12.40251636505127, + "learning_rate": 6.452969396934567e-07, + "lm_loss": 5.5571, + "loss": 1.1828, + "step": 1861, + "text_contrastive_loss": 0.6674, + "train_positive_log_prob": -81.7094, + "train_positive_token_accuracy": 0.0824, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.2561, + "epoch": 4.203160270880361, + "grad_norm": 12.418269157409668, + "learning_rate": 6.417363020165235e-07, + "lm_loss": 5.3398, + "loss": 1.1196, + "step": 1862, + "text_contrastive_loss": 0.659, + "train_positive_log_prob": -78.7432, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.2779, + "epoch": 4.205417607223477, + "grad_norm": 10.463964462280273, + "learning_rate": 6.381848412685882e-07, + "lm_loss": 5.4204, + "loss": 1.1488, + "step": 1863, + "text_contrastive_loss": 0.6577, + "train_positive_log_prob": -79.5072, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3761, + "epoch": 4.207674943566591, + "grad_norm": 12.59749984741211, + "learning_rate": 6.346425649277454e-07, + "lm_loss": 5.4517, + "loss": 1.3123, + "step": 1864, + "text_contrastive_loss": 0.7819, + "train_positive_log_prob": -81.1997, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.383, + "epoch": 4.209932279909706, + "grad_norm": 15.2352933883667, + "learning_rate": 6.31109480452749e-07, + "lm_loss": 5.4643, + "loss": 1.3901, + "step": 1865, + "text_contrastive_loss": 0.9212, + "train_positive_log_prob": -81.1535, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3783, + "epoch": 4.212189616252822, + "grad_norm": 15.096981048583984, + "learning_rate": 6.275855952829995e-07, + "lm_loss": 5.3899, + "loss": 1.3795, + "step": 1866, + "text_contrastive_loss": 0.9245, + "train_positive_log_prob": -79.6714, + "train_positive_token_accuracy": 0.0883, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.329, + "epoch": 4.214446952595937, + "grad_norm": 11.837920188903809, + "learning_rate": 6.240709168385251e-07, + "lm_loss": 5.414, + "loss": 1.2068, + "step": 1867, + "text_contrastive_loss": 0.6727, + "train_positive_log_prob": -82.2237, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3006, + "epoch": 4.216704288939052, + "grad_norm": 11.470030784606934, + "learning_rate": 6.2056545251997e-07, + "lm_loss": 5.4016, + "loss": 1.2199, + "step": 1868, + "text_contrastive_loss": 0.7585, + "train_positive_log_prob": -80.0099, + "train_positive_token_accuracy": 0.0728, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.2731, + "epoch": 4.218961625282167, + "grad_norm": 11.526351928710938, + "learning_rate": 6.170692097085751e-07, + "lm_loss": 5.4008, + "loss": 1.1865, + "step": 1869, + "text_contrastive_loss": 0.7465, + "train_positive_log_prob": -80.552, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3099, + "epoch": 4.221218961625282, + "grad_norm": 11.92476749420166, + "learning_rate": 6.135821957661658e-07, + "lm_loss": 5.3979, + "loss": 1.1786, + "step": 1870, + "text_contrastive_loss": 0.6578, + "train_positive_log_prob": -80.4431, + "train_positive_token_accuracy": 0.0896, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.302, + "epoch": 4.223476297968397, + "grad_norm": 11.30788803100586, + "learning_rate": 6.101044180351318e-07, + "lm_loss": 5.5367, + "loss": 1.1983, + "step": 1871, + "text_contrastive_loss": 0.6853, + "train_positive_log_prob": -84.3898, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.3195, + "epoch": 4.225733634311513, + "grad_norm": 12.253396034240723, + "learning_rate": 6.066358838384184e-07, + "lm_loss": 5.4404, + "loss": 1.2195, + "step": 1872, + "text_contrastive_loss": 0.7121, + "train_positive_log_prob": -81.2507, + "train_positive_token_accuracy": 0.0824, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3138, + "epoch": 4.227990970654628, + "grad_norm": 12.765376091003418, + "learning_rate": 6.031766004795047e-07, + "lm_loss": 5.4653, + "loss": 1.2335, + "step": 1873, + "text_contrastive_loss": 0.7463, + "train_positive_log_prob": -81.1702, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.2873, + "epoch": 4.230248306997742, + "grad_norm": 11.71290111541748, + "learning_rate": 5.997265752423936e-07, + "lm_loss": 5.4351, + "loss": 1.1675, + "step": 1874, + "text_contrastive_loss": 0.6733, + "train_positive_log_prob": -80.8498, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.2556, + "epoch": 4.232505643340858, + "grad_norm": 12.444053649902344, + "learning_rate": 5.962858153915896e-07, + "lm_loss": 5.4355, + "loss": 1.1841, + "step": 1875, + "text_contrastive_loss": 0.7697, + "train_positive_log_prob": -80.2772, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3621, + "epoch": 4.234762979683973, + "grad_norm": 12.990580558776855, + "learning_rate": 5.928543281720917e-07, + "lm_loss": 5.4451, + "loss": 1.3331, + "step": 1876, + "text_contrastive_loss": 0.853, + "train_positive_log_prob": -80.9955, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4092, + "epoch": 4.237020316027088, + "grad_norm": 14.139510154724121, + "learning_rate": 5.894321208093712e-07, + "lm_loss": 5.4285, + "loss": 1.3919, + "step": 1877, + "text_contrastive_loss": 0.8797, + "train_positive_log_prob": -81.3397, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.304, + "epoch": 4.239277652370204, + "grad_norm": 11.48646068572998, + "learning_rate": 5.860192005093624e-07, + "lm_loss": 5.4642, + "loss": 1.2589, + "step": 1878, + "text_contrastive_loss": 0.817, + "train_positive_log_prob": -82.0083, + "train_positive_token_accuracy": 0.0836, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3297, + "epoch": 4.241534988713318, + "grad_norm": 13.291266441345215, + "learning_rate": 5.826155744584405e-07, + "lm_loss": 5.3317, + "loss": 1.1858, + "step": 1879, + "text_contrastive_loss": 0.6459, + "train_positive_log_prob": -78.1424, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.2772, + "epoch": 4.243792325056433, + "grad_norm": 11.592615127563477, + "learning_rate": 5.792212498234134e-07, + "lm_loss": 5.3731, + "loss": 1.2347, + "step": 1880, + "text_contrastive_loss": 0.8403, + "train_positive_log_prob": -79.7798, + "train_positive_token_accuracy": 0.0905, + "train_positive_token_prob": 0.0341 + }, + { + "contrastive_loss": 0.3423, + "epoch": 4.246049661399549, + "grad_norm": 13.210052490234375, + "learning_rate": 5.758362337515028e-07, + "lm_loss": 5.4228, + "loss": 1.3055, + "step": 1881, + "text_contrastive_loss": 0.8418, + "train_positive_log_prob": -80.8149, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.2854, + "epoch": 4.248306997742664, + "grad_norm": 12.998099327087402, + "learning_rate": 5.724605333703303e-07, + "lm_loss": 5.4218, + "loss": 1.209, + "step": 1882, + "text_contrastive_loss": 0.7628, + "train_positive_log_prob": -77.7446, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.315, + "epoch": 4.250564334085778, + "grad_norm": 12.372293472290039, + "learning_rate": 5.690941557878988e-07, + "lm_loss": 5.4566, + "loss": 1.2781, + "step": 1883, + "text_contrastive_loss": 0.8348, + "train_positive_log_prob": -80.8219, + "train_positive_token_accuracy": 0.0717, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2828, + "epoch": 4.252821670428894, + "grad_norm": 12.008219718933105, + "learning_rate": 5.657371080925866e-07, + "lm_loss": 5.4191, + "loss": 1.142, + "step": 1884, + "text_contrastive_loss": 0.6346, + "train_positive_log_prob": -80.5659, + "train_positive_token_accuracy": 0.0788, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.2387, + "epoch": 4.255079006772009, + "grad_norm": 10.793996810913086, + "learning_rate": 5.623893973531225e-07, + "lm_loss": 5.4935, + "loss": 1.0843, + "step": 1885, + "text_contrastive_loss": 0.5925, + "train_positive_log_prob": -81.6112, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3258, + "epoch": 4.257336343115124, + "grad_norm": 11.538509368896484, + "learning_rate": 5.590510306185765e-07, + "lm_loss": 5.4411, + "loss": 1.2877, + "step": 1886, + "text_contrastive_loss": 0.8355, + "train_positive_log_prob": -81.0478, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3302, + "epoch": 4.25959367945824, + "grad_norm": 12.504379272460938, + "learning_rate": 5.557220149183412e-07, + "lm_loss": 5.4643, + "loss": 1.3162, + "step": 1887, + "text_contrastive_loss": 0.8791, + "train_positive_log_prob": -81.9218, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.2919, + "epoch": 4.261851015801354, + "grad_norm": 12.128462791442871, + "learning_rate": 5.524023572621229e-07, + "lm_loss": 5.4277, + "loss": 1.2669, + "step": 1888, + "text_contrastive_loss": 0.8645, + "train_positive_log_prob": -80.6491, + "train_positive_token_accuracy": 0.0718, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.2919, + "epoch": 4.264108352144469, + "grad_norm": 12.53120231628418, + "learning_rate": 5.4909206463992e-07, + "lm_loss": 5.4282, + "loss": 1.2248, + "step": 1889, + "text_contrastive_loss": 0.7803, + "train_positive_log_prob": -80.2809, + "train_positive_token_accuracy": 0.0814, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.35, + "epoch": 4.266365688487585, + "grad_norm": 13.36356258392334, + "learning_rate": 5.457911440220154e-07, + "lm_loss": 5.4442, + "loss": 1.3205, + "step": 1890, + "text_contrastive_loss": 0.8522, + "train_positive_log_prob": -79.2366, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.297, + "epoch": 4.2686230248307, + "grad_norm": 11.962207794189453, + "learning_rate": 5.424996023589524e-07, + "lm_loss": 5.4935, + "loss": 1.2066, + "step": 1891, + "text_contrastive_loss": 0.7204, + "train_positive_log_prob": -80.2594, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3514, + "epoch": 4.270880361173815, + "grad_norm": 13.353402137756348, + "learning_rate": 5.392174465815308e-07, + "lm_loss": 5.4393, + "loss": 1.3342, + "step": 1892, + "text_contrastive_loss": 0.8776, + "train_positive_log_prob": -80.5282, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.2009, + "epoch": 4.27313769751693, + "grad_norm": 9.642064094543457, + "learning_rate": 5.359446836007842e-07, + "lm_loss": 5.3742, + "loss": 1.047, + "step": 1893, + "text_contrastive_loss": 0.6173, + "train_positive_log_prob": -78.6003, + "train_positive_token_accuracy": 0.0737, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3002, + "epoch": 4.275395033860045, + "grad_norm": 12.75507640838623, + "learning_rate": 5.326813203079706e-07, + "lm_loss": 5.4046, + "loss": 1.2259, + "step": 1894, + "text_contrastive_loss": 0.7705, + "train_positive_log_prob": -78.8536, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3266, + "epoch": 4.27765237020316, + "grad_norm": 13.167447090148926, + "learning_rate": 5.294273635745517e-07, + "lm_loss": 5.408, + "loss": 1.3222, + "step": 1895, + "text_contrastive_loss": 0.9095, + "train_positive_log_prob": -80.635, + "train_positive_token_accuracy": 0.0841, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.5008, + "epoch": 4.279909706546276, + "grad_norm": 12.977544784545898, + "learning_rate": 5.261828202521868e-07, + "lm_loss": 5.4682, + "loss": 1.5206, + "step": 1896, + "text_contrastive_loss": 0.946, + "train_positive_log_prob": -83.0882, + "train_positive_token_accuracy": 0.0756, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.232, + "epoch": 4.282167042889391, + "grad_norm": 11.340666770935059, + "learning_rate": 5.229476971727115e-07, + "lm_loss": 5.3678, + "loss": 1.1104, + "step": 1897, + "text_contrastive_loss": 0.6833, + "train_positive_log_prob": -78.8153, + "train_positive_token_accuracy": 0.0889, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3095, + "epoch": 4.284424379232505, + "grad_norm": 11.869278907775879, + "learning_rate": 5.197220011481274e-07, + "lm_loss": 5.3944, + "loss": 1.2193, + "step": 1898, + "text_contrastive_loss": 0.7407, + "train_positive_log_prob": -79.4202, + "train_positive_token_accuracy": 0.0833, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.3121, + "epoch": 4.286681715575621, + "grad_norm": 11.564894676208496, + "learning_rate": 5.165057389705835e-07, + "lm_loss": 5.4148, + "loss": 1.2322, + "step": 1899, + "text_contrastive_loss": 0.7573, + "train_positive_log_prob": -79.1711, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3902, + "epoch": 4.288939051918736, + "grad_norm": 13.57264518737793, + "learning_rate": 5.132989174123659e-07, + "lm_loss": 5.486, + "loss": 1.314, + "step": 1900, + "text_contrastive_loss": 0.7503, + "train_positive_log_prob": -81.2414, + "train_positive_token_accuracy": 0.0699, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3479, + "epoch": 4.291196388261851, + "grad_norm": 12.010721206665039, + "learning_rate": 5.101015432258843e-07, + "lm_loss": 5.448, + "loss": 1.2726, + "step": 1901, + "text_contrastive_loss": 0.7597, + "train_positive_log_prob": -80.3977, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3134, + "epoch": 4.293453724604966, + "grad_norm": 12.800092697143555, + "learning_rate": 5.069136231436539e-07, + "lm_loss": 5.4215, + "loss": 1.2087, + "step": 1902, + "text_contrastive_loss": 0.7063, + "train_positive_log_prob": -79.2919, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3708, + "epoch": 4.295711060948081, + "grad_norm": 10.993557929992676, + "learning_rate": 5.037351638782812e-07, + "lm_loss": 5.4392, + "loss": 1.363, + "step": 1903, + "text_contrastive_loss": 0.8966, + "train_positive_log_prob": -79.678, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3717, + "epoch": 4.297968397291196, + "grad_norm": 12.910844802856445, + "learning_rate": 5.00566172122453e-07, + "lm_loss": 5.3877, + "loss": 1.3213, + "step": 1904, + "text_contrastive_loss": 0.8216, + "train_positive_log_prob": -79.9558, + "train_positive_token_accuracy": 0.0777, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4046, + "epoch": 4.300225733634312, + "grad_norm": 12.662449836730957, + "learning_rate": 4.97406654548922e-07, + "lm_loss": 5.3821, + "loss": 1.4675, + "step": 1905, + "text_contrastive_loss": 1.0493, + "train_positive_log_prob": -77.7959, + "train_positive_token_accuracy": 0.0835, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.4331, + "epoch": 4.302483069977427, + "grad_norm": 13.598183631896973, + "learning_rate": 4.942566178104924e-07, + "lm_loss": 5.3445, + "loss": 1.3805, + "step": 1906, + "text_contrastive_loss": 0.826, + "train_positive_log_prob": -78.2677, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3339, + "epoch": 4.3047404063205414, + "grad_norm": 11.163721084594727, + "learning_rate": 4.911160685400008e-07, + "lm_loss": 5.4164, + "loss": 1.266, + "step": 1907, + "text_contrastive_loss": 0.7808, + "train_positive_log_prob": -82.0511, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.2948, + "epoch": 4.306997742663657, + "grad_norm": 11.243133544921875, + "learning_rate": 4.879850133503106e-07, + "lm_loss": 5.4099, + "loss": 1.1973, + "step": 1908, + "text_contrastive_loss": 0.7231, + "train_positive_log_prob": -81.1485, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.2864, + "epoch": 4.309255079006772, + "grad_norm": 12.847887992858887, + "learning_rate": 4.848634588342932e-07, + "lm_loss": 5.4605, + "loss": 1.2076, + "step": 1909, + "text_contrastive_loss": 0.7503, + "train_positive_log_prob": -81.5449, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.2958, + "epoch": 4.311512415349887, + "grad_norm": 12.57180404663086, + "learning_rate": 4.817514115648164e-07, + "lm_loss": 5.4021, + "loss": 1.1114, + "step": 1910, + "text_contrastive_loss": 0.551, + "train_positive_log_prob": -78.7542, + "train_positive_token_accuracy": 0.0723, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.2957, + "epoch": 4.313769751693002, + "grad_norm": 10.835553169250488, + "learning_rate": 4.786488780947246e-07, + "lm_loss": 5.413, + "loss": 1.177, + "step": 1911, + "text_contrastive_loss": 0.68, + "train_positive_log_prob": -80.5582, + "train_positive_token_accuracy": 0.0751, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.332, + "epoch": 4.316027088036117, + "grad_norm": 12.120919227600098, + "learning_rate": 4.755558649568337e-07, + "lm_loss": 5.4679, + "loss": 1.2179, + "step": 1912, + "text_contrastive_loss": 0.6782, + "train_positive_log_prob": -81.1225, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3434, + "epoch": 4.318284424379232, + "grad_norm": 12.455890655517578, + "learning_rate": 4.7247237866391236e-07, + "lm_loss": 5.3573, + "loss": 1.2703, + "step": 1913, + "text_contrastive_loss": 0.7823, + "train_positive_log_prob": -78.1845, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2288, + "epoch": 4.320541760722348, + "grad_norm": 10.431032180786133, + "learning_rate": 4.6939842570867034e-07, + "lm_loss": 5.2493, + "loss": 1.0763, + "step": 1914, + "text_contrastive_loss": 0.6451, + "train_positive_log_prob": -76.5625, + "train_positive_token_accuracy": 0.0841, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.2848, + "epoch": 4.322799097065463, + "grad_norm": 11.289437294006348, + "learning_rate": 4.663340125637389e-07, + "lm_loss": 5.4806, + "loss": 1.1374, + "step": 1915, + "text_contrastive_loss": 0.6091, + "train_positive_log_prob": -84.048, + "train_positive_token_accuracy": 0.0828, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3687, + "epoch": 4.3250564334085775, + "grad_norm": 13.44126033782959, + "learning_rate": 4.6327914568166763e-07, + "lm_loss": 5.5336, + "loss": 1.3129, + "step": 1916, + "text_contrastive_loss": 0.7817, + "train_positive_log_prob": -83.3066, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3784, + "epoch": 4.327313769751693, + "grad_norm": 11.802950859069824, + "learning_rate": 4.6023383149490066e-07, + "lm_loss": 5.3629, + "loss": 1.3362, + "step": 1917, + "text_contrastive_loss": 0.8429, + "train_positive_log_prob": -79.6871, + "train_positive_token_accuracy": 0.0883, + "train_positive_token_prob": 0.0326 + }, + { + "contrastive_loss": 0.3071, + "epoch": 4.329571106094808, + "grad_norm": 12.232736587524414, + "learning_rate": 4.571980764157724e-07, + "lm_loss": 5.421, + "loss": 1.2417, + "step": 1918, + "text_contrastive_loss": 0.785, + "train_positive_log_prob": -79.7273, + "train_positive_token_accuracy": 0.0839, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3813, + "epoch": 4.331828442437923, + "grad_norm": 12.735857009887695, + "learning_rate": 4.5417188683648417e-07, + "lm_loss": 5.4474, + "loss": 1.3502, + "step": 1919, + "text_contrastive_loss": 0.8484, + "train_positive_log_prob": -81.578, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3293, + "epoch": 4.334085778781039, + "grad_norm": 12.686532020568848, + "learning_rate": 4.511552691290988e-07, + "lm_loss": 5.3641, + "loss": 1.2786, + "step": 1920, + "text_contrastive_loss": 0.8258, + "train_positive_log_prob": -80.0228, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3391, + "epoch": 4.336343115124153, + "grad_norm": 12.862334251403809, + "learning_rate": 4.4814822964552363e-07, + "lm_loss": 5.4243, + "loss": 1.2839, + "step": 1921, + "text_contrastive_loss": 0.8046, + "train_positive_log_prob": -82.2109, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.4771, + "epoch": 4.3386004514672685, + "grad_norm": 16.829465866088867, + "learning_rate": 4.4515077471749767e-07, + "lm_loss": 5.3488, + "loss": 1.4713, + "step": 1922, + "text_contrastive_loss": 0.9187, + "train_positive_log_prob": -77.4326, + "train_positive_token_accuracy": 0.0863, + "train_positive_token_prob": 0.0336 + }, + { + "contrastive_loss": 0.4245, + "epoch": 4.340857787810384, + "grad_norm": 14.479100227355957, + "learning_rate": 4.421629106565778e-07, + "lm_loss": 5.4165, + "loss": 1.3536, + "step": 1923, + "text_contrastive_loss": 0.7748, + "train_positive_log_prob": -80.1873, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3624, + "epoch": 4.343115124153499, + "grad_norm": 14.585872650146484, + "learning_rate": 4.391846437541258e-07, + "lm_loss": 5.5271, + "loss": 1.3041, + "step": 1924, + "text_contrastive_loss": 0.7781, + "train_positive_log_prob": -82.4796, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.0299 + }, + { + "contrastive_loss": 0.3458, + "epoch": 4.345372460496614, + "grad_norm": 12.340688705444336, + "learning_rate": 4.362159802812971e-07, + "lm_loss": 5.3881, + "loss": 1.2718, + "step": 1925, + "text_contrastive_loss": 0.7743, + "train_positive_log_prob": -78.5049, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3803, + "epoch": 4.347629796839729, + "grad_norm": 11.838594436645508, + "learning_rate": 4.332569264890252e-07, + "lm_loss": 5.4003, + "loss": 1.2923, + "step": 1926, + "text_contrastive_loss": 0.7439, + "train_positive_log_prob": -79.0865, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.348, + "epoch": 4.349887133182844, + "grad_norm": 13.021111488342285, + "learning_rate": 4.3030748860800606e-07, + "lm_loss": 5.4495, + "loss": 1.2221, + "step": 1927, + "text_contrastive_loss": 0.6583, + "train_positive_log_prob": -81.0201, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3268, + "epoch": 4.3521444695259595, + "grad_norm": 14.07114315032959, + "learning_rate": 4.273676728486925e-07, + "lm_loss": 5.4473, + "loss": 1.22, + "step": 1928, + "text_contrastive_loss": 0.6969, + "train_positive_log_prob": -80.8161, + "train_positive_token_accuracy": 0.07, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.3049, + "epoch": 4.354401805869075, + "grad_norm": 11.086020469665527, + "learning_rate": 4.244374854012734e-07, + "lm_loss": 5.3687, + "loss": 1.1745, + "step": 1929, + "text_contrastive_loss": 0.6655, + "train_positive_log_prob": -79.3783, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3782, + "epoch": 4.356659142212189, + "grad_norm": 13.118365287780762, + "learning_rate": 4.215169324356666e-07, + "lm_loss": 5.4613, + "loss": 1.3319, + "step": 1930, + "text_contrastive_loss": 0.815, + "train_positive_log_prob": -80.5109, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.264, + "epoch": 4.3589164785553045, + "grad_norm": 13.01807975769043, + "learning_rate": 4.186060201014991e-07, + "lm_loss": 5.4787, + "loss": 1.1231, + "step": 1931, + "text_contrastive_loss": 0.6223, + "train_positive_log_prob": -78.7138, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.314, + "epoch": 4.36117381489842, + "grad_norm": 12.192882537841797, + "learning_rate": 4.157047545281029e-07, + "lm_loss": 5.4508, + "loss": 1.2361, + "step": 1932, + "text_contrastive_loss": 0.7541, + "train_positive_log_prob": -80.3515, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.347, + "epoch": 4.363431151241535, + "grad_norm": 13.625689506530762, + "learning_rate": 4.1281314182449405e-07, + "lm_loss": 5.4212, + "loss": 1.3182, + "step": 1933, + "text_contrastive_loss": 0.8582, + "train_positive_log_prob": -79.8801, + "train_positive_token_accuracy": 0.0697, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.3737, + "epoch": 4.3656884875846504, + "grad_norm": 13.674962997436523, + "learning_rate": 4.099311880793655e-07, + "lm_loss": 5.3695, + "loss": 1.3613, + "step": 1934, + "text_contrastive_loss": 0.9014, + "train_positive_log_prob": -80.795, + "train_positive_token_accuracy": 0.0854, + "train_positive_token_prob": 0.0332 + }, + { + "contrastive_loss": 0.2896, + "epoch": 4.367945823927765, + "grad_norm": 10.94249439239502, + "learning_rate": 4.070588993610697e-07, + "lm_loss": 5.4835, + "loss": 1.132, + "step": 1935, + "text_contrastive_loss": 0.5881, + "train_positive_log_prob": -81.2525, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3545, + "epoch": 4.37020316027088, + "grad_norm": 13.138219833374023, + "learning_rate": 4.0419628171760927e-07, + "lm_loss": 5.4221, + "loss": 1.2584, + "step": 1936, + "text_contrastive_loss": 0.7235, + "train_positive_log_prob": -78.8724, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3201, + "epoch": 4.3724604966139955, + "grad_norm": 11.432778358459473, + "learning_rate": 4.0134334117662375e-07, + "lm_loss": 5.428, + "loss": 1.3001, + "step": 1937, + "text_contrastive_loss": 0.8745, + "train_positive_log_prob": -81.5894, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3565, + "epoch": 4.374717832957111, + "grad_norm": 12.832627296447754, + "learning_rate": 3.985000837453756e-07, + "lm_loss": 5.4949, + "loss": 1.2886, + "step": 1938, + "text_contrastive_loss": 0.7651, + "train_positive_log_prob": -80.6834, + "train_positive_token_accuracy": 0.0742, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3247, + "epoch": 4.376975169300226, + "grad_norm": 12.241453170776367, + "learning_rate": 3.9566651541073586e-07, + "lm_loss": 5.3916, + "loss": 1.1856, + "step": 1939, + "text_contrastive_loss": 0.6435, + "train_positive_log_prob": -81.8242, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3462, + "epoch": 4.3792325056433405, + "grad_norm": 11.593427658081055, + "learning_rate": 3.928426421391773e-07, + "lm_loss": 5.4695, + "loss": 1.2415, + "step": 1940, + "text_contrastive_loss": 0.6967, + "train_positive_log_prob": -80.1636, + "train_positive_token_accuracy": 0.0744, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3291, + "epoch": 4.381489841986456, + "grad_norm": 10.992756843566895, + "learning_rate": 3.9002846987675704e-07, + "lm_loss": 5.5485, + "loss": 1.2492, + "step": 1941, + "text_contrastive_loss": 0.7304, + "train_positive_log_prob": -82.7931, + "train_positive_token_accuracy": 0.067, + "train_positive_token_prob": 0.0293 + }, + { + "contrastive_loss": 0.3839, + "epoch": 4.383747178329571, + "grad_norm": 13.100747108459473, + "learning_rate": 3.872240045491055e-07, + "lm_loss": 5.4565, + "loss": 1.2941, + "step": 1942, + "text_contrastive_loss": 0.7292, + "train_positive_log_prob": -80.121, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.2624, + "epoch": 4.3860045146726865, + "grad_norm": 10.662866592407227, + "learning_rate": 3.8442925206141237e-07, + "lm_loss": 5.4237, + "loss": 1.1786, + "step": 1943, + "text_contrastive_loss": 0.7477, + "train_positive_log_prob": -80.6812, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.337, + "epoch": 4.388261851015802, + "grad_norm": 13.129222869873047, + "learning_rate": 3.8164421829841756e-07, + "lm_loss": 5.435, + "loss": 1.2956, + "step": 1944, + "text_contrastive_loss": 0.8303, + "train_positive_log_prob": -81.333, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3272, + "epoch": 4.390519187358916, + "grad_norm": 12.331649780273438, + "learning_rate": 3.7886890912439633e-07, + "lm_loss": 5.4032, + "loss": 1.2842, + "step": 1945, + "text_contrastive_loss": 0.8334, + "train_positive_log_prob": -79.7199, + "train_positive_token_accuracy": 0.0741, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.2781, + "epoch": 4.3927765237020315, + "grad_norm": 10.994354248046875, + "learning_rate": 3.761033303831474e-07, + "lm_loss": 5.404, + "loss": 1.1684, + "step": 1946, + "text_contrastive_loss": 0.6999, + "train_positive_log_prob": -80.743, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3335, + "epoch": 4.395033860045147, + "grad_norm": 12.405427932739258, + "learning_rate": 3.733474878979798e-07, + "lm_loss": 5.5144, + "loss": 1.2436, + "step": 1947, + "text_contrastive_loss": 0.7172, + "train_positive_log_prob": -81.9637, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3193, + "epoch": 4.397291196388262, + "grad_norm": 11.412611961364746, + "learning_rate": 3.706013874717024e-07, + "lm_loss": 5.3838, + "loss": 1.2033, + "step": 1948, + "text_contrastive_loss": 0.6912, + "train_positive_log_prob": -78.0589, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3399, + "epoch": 4.399548532731377, + "grad_norm": 12.102020263671875, + "learning_rate": 3.678650348866114e-07, + "lm_loss": 5.4764, + "loss": 1.2653, + "step": 1949, + "text_contrastive_loss": 0.7555, + "train_positive_log_prob": -82.0168, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3276, + "epoch": 4.401805869074492, + "grad_norm": 11.14810562133789, + "learning_rate": 3.651384359044774e-07, + "lm_loss": 5.4012, + "loss": 1.209, + "step": 1950, + "text_contrastive_loss": 0.6825, + "train_positive_log_prob": -78.6138, + "train_positive_token_accuracy": 0.0859, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3103, + "epoch": 4.404063205417607, + "grad_norm": 11.960348129272461, + "learning_rate": 3.6242159626653004e-07, + "lm_loss": 5.2843, + "loss": 1.2788, + "step": 1951, + "text_contrastive_loss": 0.8803, + "train_positive_log_prob": -79.5112, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.3259, + "epoch": 4.4063205417607225, + "grad_norm": 12.907064437866211, + "learning_rate": 3.597145216934556e-07, + "lm_loss": 5.5265, + "loss": 1.3072, + "step": 1952, + "text_contrastive_loss": 0.8573, + "train_positive_log_prob": -83.2044, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2869, + "epoch": 4.408577878103838, + "grad_norm": 10.999371528625488, + "learning_rate": 3.570172178853731e-07, + "lm_loss": 5.4523, + "loss": 1.221, + "step": 1953, + "text_contrastive_loss": 0.7776, + "train_positive_log_prob": -81.2939, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3081, + "epoch": 4.410835214446952, + "grad_norm": 13.100547790527344, + "learning_rate": 3.5432969052183186e-07, + "lm_loss": 5.4292, + "loss": 1.2051, + "step": 1954, + "text_contrastive_loss": 0.7082, + "train_positive_log_prob": -78.7841, + "train_positive_token_accuracy": 0.0727, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.296, + "epoch": 4.413092550790068, + "grad_norm": 10.83736801147461, + "learning_rate": 3.516519452617922e-07, + "lm_loss": 5.4032, + "loss": 1.192, + "step": 1955, + "text_contrastive_loss": 0.7113, + "train_positive_log_prob": -79.2575, + "train_positive_token_accuracy": 0.0743, + "train_positive_token_prob": 0.0294 + }, + { + "contrastive_loss": 0.3578, + "epoch": 4.415349887133183, + "grad_norm": 12.546442031860352, + "learning_rate": 3.4898398774361854e-07, + "lm_loss": 5.2682, + "loss": 1.24, + "step": 1956, + "text_contrastive_loss": 0.7108, + "train_positive_log_prob": -77.5012, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0326 + }, + { + "contrastive_loss": 0.2611, + "epoch": 4.417607223476298, + "grad_norm": 12.052750587463379, + "learning_rate": 3.463258235850653e-07, + "lm_loss": 5.4688, + "loss": 1.2028, + "step": 1957, + "text_contrastive_loss": 0.7897, + "train_positive_log_prob": -82.3332, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3749, + "epoch": 4.4198645598194135, + "grad_norm": 13.044675827026367, + "learning_rate": 3.4367745838326807e-07, + "lm_loss": 5.4399, + "loss": 1.3229, + "step": 1958, + "text_contrastive_loss": 0.8081, + "train_positive_log_prob": -80.0877, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3659, + "epoch": 4.422121896162528, + "grad_norm": 12.287303924560547, + "learning_rate": 3.410388977147244e-07, + "lm_loss": 5.3685, + "loss": 1.2963, + "step": 1959, + "text_contrastive_loss": 0.7873, + "train_positive_log_prob": -80.1307, + "train_positive_token_accuracy": 0.079, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.2772, + "epoch": 4.424379232505643, + "grad_norm": 11.425829887390137, + "learning_rate": 3.3841014713529184e-07, + "lm_loss": 5.4051, + "loss": 1.1232, + "step": 1960, + "text_contrastive_loss": 0.6109, + "train_positive_log_prob": -80.7355, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.4266, + "epoch": 4.426636568848759, + "grad_norm": 13.178923606872559, + "learning_rate": 3.357912121801682e-07, + "lm_loss": 5.2982, + "loss": 1.4121, + "step": 1961, + "text_contrastive_loss": 0.9113, + "train_positive_log_prob": -77.0727, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3219, + "epoch": 4.428893905191874, + "grad_norm": 11.594698905944824, + "learning_rate": 3.331820983638867e-07, + "lm_loss": 5.3766, + "loss": 1.2769, + "step": 1962, + "text_contrastive_loss": 0.8348, + "train_positive_log_prob": -78.5102, + "train_positive_token_accuracy": 0.0861, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3345, + "epoch": 4.431151241534989, + "grad_norm": 13.706225395202637, + "learning_rate": 3.3058281118029553e-07, + "lm_loss": 5.4998, + "loss": 1.2857, + "step": 1963, + "text_contrastive_loss": 0.8023, + "train_positive_log_prob": -83.3971, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3758, + "epoch": 4.433408577878104, + "grad_norm": 11.285801887512207, + "learning_rate": 3.279933561025567e-07, + "lm_loss": 5.4765, + "loss": 1.3546, + "step": 1964, + "text_contrastive_loss": 0.8623, + "train_positive_log_prob": -81.0579, + "train_positive_token_accuracy": 0.08, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3827, + "epoch": 4.435665914221219, + "grad_norm": 12.76725959777832, + "learning_rate": 3.254137385831263e-07, + "lm_loss": 5.3853, + "loss": 1.2507, + "step": 1965, + "text_contrastive_loss": 0.6589, + "train_positive_log_prob": -80.002, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3678, + "epoch": 4.437923250564334, + "grad_norm": 13.18164348602295, + "learning_rate": 3.2284396405374787e-07, + "lm_loss": 5.4816, + "loss": 1.3896, + "step": 1966, + "text_contrastive_loss": 0.9472, + "train_positive_log_prob": -81.1094, + "train_positive_token_accuracy": 0.0678, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.3055, + "epoch": 4.4401805869074495, + "grad_norm": 12.035500526428223, + "learning_rate": 3.202840379254374e-07, + "lm_loss": 5.449, + "loss": 1.2139, + "step": 1967, + "text_contrastive_loss": 0.7269, + "train_positive_log_prob": -81.3945, + "train_positive_token_accuracy": 0.0892, + "train_positive_token_prob": 0.0328 + }, + { + "contrastive_loss": 0.3154, + "epoch": 4.442437923250564, + "grad_norm": 12.394524574279785, + "learning_rate": 3.177339655884737e-07, + "lm_loss": 5.3774, + "loss": 1.2116, + "step": 1968, + "text_contrastive_loss": 0.7171, + "train_positive_log_prob": -78.2864, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4005, + "epoch": 4.444695259593679, + "grad_norm": 14.561430931091309, + "learning_rate": 3.151937524123905e-07, + "lm_loss": 5.395, + "loss": 1.3717, + "step": 1969, + "text_contrastive_loss": 0.8635, + "train_positive_log_prob": -80.0833, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3152, + "epoch": 4.446952595936795, + "grad_norm": 12.581046104431152, + "learning_rate": 3.1266340374595693e-07, + "lm_loss": 5.3934, + "loss": 1.2718, + "step": 1970, + "text_contrastive_loss": 0.8344, + "train_positive_log_prob": -80.0597, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.281, + "epoch": 4.44920993227991, + "grad_norm": 11.659996032714844, + "learning_rate": 3.1014292491717444e-07, + "lm_loss": 5.4499, + "loss": 1.1697, + "step": 1971, + "text_contrastive_loss": 0.6875, + "train_positive_log_prob": -81.4293, + "train_positive_token_accuracy": 0.0847, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.362, + "epoch": 4.451467268623025, + "grad_norm": 12.612107276916504, + "learning_rate": 3.076323212332605e-07, + "lm_loss": 5.5479, + "loss": 1.3187, + "step": 1972, + "text_contrastive_loss": 0.8039, + "train_positive_log_prob": -82.9572, + "train_positive_token_accuracy": 0.0765, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.2483, + "epoch": 4.45372460496614, + "grad_norm": 10.216300010681152, + "learning_rate": 3.0513159798063906e-07, + "lm_loss": 5.4623, + "loss": 1.1779, + "step": 1973, + "text_contrastive_loss": 0.7669, + "train_positive_log_prob": -81.6564, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.296, + "epoch": 4.455981941309255, + "grad_norm": 11.38882827758789, + "learning_rate": 3.026407604249315e-07, + "lm_loss": 5.4382, + "loss": 1.1883, + "step": 1974, + "text_contrastive_loss": 0.6971, + "train_positive_log_prob": -81.1859, + "train_positive_token_accuracy": 0.0853, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.3527, + "epoch": 4.45823927765237, + "grad_norm": 13.218438148498535, + "learning_rate": 3.0015981381094073e-07, + "lm_loss": 5.3996, + "loss": 1.4143, + "step": 1975, + "text_contrastive_loss": 1.0432, + "train_positive_log_prob": -80.1686, + "train_positive_token_accuracy": 0.0852, + "train_positive_token_prob": 0.033 + }, + { + "contrastive_loss": 0.3633, + "epoch": 4.460496613995486, + "grad_norm": 12.95971965789795, + "learning_rate": 2.976887633626435e-07, + "lm_loss": 5.4176, + "loss": 1.2737, + "step": 1976, + "text_contrastive_loss": 0.7373, + "train_positive_log_prob": -80.5257, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.2797, + "epoch": 4.4627539503386, + "grad_norm": 11.310114860534668, + "learning_rate": 2.952276142831806e-07, + "lm_loss": 5.2999, + "loss": 1.1423, + "step": 1977, + "text_contrastive_loss": 0.6652, + "train_positive_log_prob": -79.2654, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.2921, + "epoch": 4.465011286681715, + "grad_norm": 12.945686340332031, + "learning_rate": 2.9277637175484376e-07, + "lm_loss": 5.4226, + "loss": 1.1923, + "step": 1978, + "text_contrastive_loss": 0.7158, + "train_positive_log_prob": -78.8812, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3662, + "epoch": 4.467268623024831, + "grad_norm": 12.703791618347168, + "learning_rate": 2.9033504093906207e-07, + "lm_loss": 5.3854, + "loss": 1.2561, + "step": 1979, + "text_contrastive_loss": 0.7026, + "train_positive_log_prob": -78.215, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3903, + "epoch": 4.469525959367946, + "grad_norm": 13.860514640808105, + "learning_rate": 2.8790362697639685e-07, + "lm_loss": 5.4648, + "loss": 1.2517, + "step": 1980, + "text_contrastive_loss": 0.6298, + "train_positive_log_prob": -79.6698, + "train_positive_token_accuracy": 0.0823, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3316, + "epoch": 4.471783295711061, + "grad_norm": 12.458382606506348, + "learning_rate": 2.854821349865289e-07, + "lm_loss": 5.3472, + "loss": 1.3289, + "step": 1981, + "text_contrastive_loss": 0.9251, + "train_positive_log_prob": -79.3319, + "train_positive_token_accuracy": 0.0843, + "train_positive_token_prob": 0.0328 + }, + { + "contrastive_loss": 0.3475, + "epoch": 4.474040632054176, + "grad_norm": 12.30320930480957, + "learning_rate": 2.8307057006824514e-07, + "lm_loss": 5.4546, + "loss": 1.3372, + "step": 1982, + "text_contrastive_loss": 0.8884, + "train_positive_log_prob": -81.6035, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.2922, + "epoch": 4.476297968397291, + "grad_norm": 10.997669219970703, + "learning_rate": 2.806689372994292e-07, + "lm_loss": 5.4971, + "loss": 1.22, + "step": 1983, + "text_contrastive_loss": 0.7563, + "train_positive_log_prob": -82.9945, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.4254, + "epoch": 4.478555304740406, + "grad_norm": 14.565074920654297, + "learning_rate": 2.7827724173705273e-07, + "lm_loss": 5.4092, + "loss": 1.4047, + "step": 1984, + "text_contrastive_loss": 0.8769, + "train_positive_log_prob": -78.7708, + "train_positive_token_accuracy": 0.0818, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.404, + "epoch": 4.480812641083522, + "grad_norm": 13.067367553710938, + "learning_rate": 2.7589548841716274e-07, + "lm_loss": 5.3797, + "loss": 1.3821, + "step": 1985, + "text_contrastive_loss": 0.8803, + "train_positive_log_prob": -79.2031, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3045, + "epoch": 4.483069977426637, + "grad_norm": 12.950387954711914, + "learning_rate": 2.735236823548715e-07, + "lm_loss": 5.4557, + "loss": 1.2266, + "step": 1986, + "text_contrastive_loss": 0.753, + "train_positive_log_prob": -81.2182, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.4003, + "epoch": 4.485327313769751, + "grad_norm": 13.112210273742676, + "learning_rate": 2.711618285443457e-07, + "lm_loss": 5.4284, + "loss": 1.3359, + "step": 1987, + "text_contrastive_loss": 0.7855, + "train_positive_log_prob": -80.0897, + "train_positive_token_accuracy": 0.0725, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.394, + "epoch": 4.487584650112867, + "grad_norm": 12.9498929977417, + "learning_rate": 2.6880993195879614e-07, + "lm_loss": 5.3656, + "loss": 1.2849, + "step": 1988, + "text_contrastive_loss": 0.7087, + "train_positive_log_prob": -78.0969, + "train_positive_token_accuracy": 0.0829, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2814, + "epoch": 4.489841986455982, + "grad_norm": 13.281852722167969, + "learning_rate": 2.6646799755046746e-07, + "lm_loss": 5.4174, + "loss": 1.1515, + "step": 1989, + "text_contrastive_loss": 0.6567, + "train_positive_log_prob": -80.6909, + "train_positive_token_accuracy": 0.0766, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.2296, + "epoch": 4.492099322799097, + "grad_norm": 11.3135347366333, + "learning_rate": 2.64136030250628e-07, + "lm_loss": 5.339, + "loss": 1.0447, + "step": 1990, + "text_contrastive_loss": 0.5625, + "train_positive_log_prob": -77.1858, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.326, + "epoch": 4.494356659142213, + "grad_norm": 12.787209510803223, + "learning_rate": 2.618140349695575e-07, + "lm_loss": 5.4107, + "loss": 1.262, + "step": 1991, + "text_contrastive_loss": 0.79, + "train_positive_log_prob": -79.7463, + "train_positive_token_accuracy": 0.0774, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3112, + "epoch": 4.496613995485327, + "grad_norm": 12.628015518188477, + "learning_rate": 2.595020165965401e-07, + "lm_loss": 5.4102, + "loss": 1.1312, + "step": 1992, + "text_contrastive_loss": 0.5579, + "train_positive_log_prob": -79.7188, + "train_positive_token_accuracy": 0.0806, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.4099, + "epoch": 4.498871331828442, + "grad_norm": 14.539176940917969, + "learning_rate": 2.571999799998509e-07, + "lm_loss": 5.4067, + "loss": 1.3512, + "step": 1993, + "text_contrastive_loss": 0.8014, + "train_positive_log_prob": -79.9221, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.4895, + "epoch": 4.501128668171558, + "grad_norm": 14.984796524047852, + "learning_rate": 2.549079300267482e-07, + "lm_loss": 5.4007, + "loss": 1.4427, + "step": 1994, + "text_contrastive_loss": 0.8263, + "train_positive_log_prob": -80.0175, + "train_positive_token_accuracy": 0.0775, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3949, + "epoch": 4.503386004514673, + "grad_norm": 13.221800804138184, + "learning_rate": 2.526258715034602e-07, + "lm_loss": 5.45, + "loss": 1.3708, + "step": 1995, + "text_contrastive_loss": 0.8618, + "train_positive_log_prob": -80.8574, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3484, + "epoch": 4.505643340857787, + "grad_norm": 11.855440139770508, + "learning_rate": 2.503538092351782e-07, + "lm_loss": 5.4445, + "loss": 1.2867, + "step": 1996, + "text_contrastive_loss": 0.7875, + "train_positive_log_prob": -80.6066, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.2704, + "epoch": 4.507900677200903, + "grad_norm": 11.419743537902832, + "learning_rate": 2.480917480060441e-07, + "lm_loss": 5.4885, + "loss": 1.1839, + "step": 1997, + "text_contrastive_loss": 0.7293, + "train_positive_log_prob": -83.8674, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3959, + "epoch": 4.510158013544018, + "grad_norm": 12.124903678894043, + "learning_rate": 2.458396925791434e-07, + "lm_loss": 5.5117, + "loss": 1.3461, + "step": 1998, + "text_contrastive_loss": 0.7982, + "train_positive_log_prob": -82.935, + "train_positive_token_accuracy": 0.0807, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2489, + "epoch": 4.512415349887133, + "grad_norm": 10.868368148803711, + "learning_rate": 2.4359764769648907e-07, + "lm_loss": 5.4809, + "loss": 1.1632, + "step": 1999, + "text_contrastive_loss": 0.7325, + "train_positive_log_prob": -82.149, + "train_positive_token_accuracy": 0.0855, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3645, + "epoch": 4.514672686230249, + "grad_norm": 13.442747116088867, + "learning_rate": 2.4136561807901916e-07, + "lm_loss": 5.4188, + "loss": 1.3239, + "step": 2000, + "text_contrastive_loss": 0.8349, + "train_positive_log_prob": -80.2881, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.2945, + "epoch": 4.516930022573363, + "grad_norm": 11.210578918457031, + "learning_rate": 2.391436084265814e-07, + "lm_loss": 5.3863, + "loss": 1.1724, + "step": 2001, + "text_contrastive_loss": 0.6785, + "train_positive_log_prob": -79.6645, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.2929, + "epoch": 4.519187358916478, + "grad_norm": 11.028879165649414, + "learning_rate": 2.3693162341792532e-07, + "lm_loss": 5.383, + "loss": 1.1758, + "step": 2002, + "text_contrastive_loss": 0.6892, + "train_positive_log_prob": -77.5916, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.339, + "epoch": 4.521444695259594, + "grad_norm": 11.981451034545898, + "learning_rate": 2.347296677106925e-07, + "lm_loss": 5.5208, + "loss": 1.2931, + "step": 2003, + "text_contrastive_loss": 0.8041, + "train_positive_log_prob": -81.8028, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3572, + "epoch": 4.523702031602709, + "grad_norm": 11.984064102172852, + "learning_rate": 2.3253774594140633e-07, + "lm_loss": 5.4036, + "loss": 1.2452, + "step": 2004, + "text_contrastive_loss": 0.6953, + "train_positive_log_prob": -79.7033, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3139, + "epoch": 4.525959367945823, + "grad_norm": 13.003368377685547, + "learning_rate": 2.3035586272546207e-07, + "lm_loss": 5.3358, + "loss": 1.3271, + "step": 2005, + "text_contrastive_loss": 0.9591, + "train_positive_log_prob": -78.0766, + "train_positive_token_accuracy": 0.0734, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.2754, + "epoch": 4.528216704288939, + "grad_norm": 11.14112377166748, + "learning_rate": 2.2818402265711858e-07, + "lm_loss": 5.4949, + "loss": 1.1695, + "step": 2006, + "text_contrastive_loss": 0.6892, + "train_positive_log_prob": -81.1161, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3784, + "epoch": 4.530474040632054, + "grad_norm": 14.746195793151855, + "learning_rate": 2.2602223030948445e-07, + "lm_loss": 5.3958, + "loss": 1.3071, + "step": 2007, + "text_contrastive_loss": 0.7782, + "train_positive_log_prob": -82.159, + "train_positive_token_accuracy": 0.0857, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.2922, + "epoch": 4.532731376975169, + "grad_norm": 11.450067520141602, + "learning_rate": 2.2387049023451458e-07, + "lm_loss": 5.3471, + "loss": 1.1551, + "step": 2008, + "text_contrastive_loss": 0.6565, + "train_positive_log_prob": -79.0172, + "train_positive_token_accuracy": 0.0854, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.3611, + "epoch": 4.534988713318285, + "grad_norm": 16.124773025512695, + "learning_rate": 2.2172880696299692e-07, + "lm_loss": 5.385, + "loss": 1.2354, + "step": 2009, + "text_contrastive_loss": 0.6718, + "train_positive_log_prob": -79.4681, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.2789, + "epoch": 4.5372460496614, + "grad_norm": 10.738547325134277, + "learning_rate": 2.1959718500454196e-07, + "lm_loss": 5.5759, + "loss": 1.1613, + "step": 2010, + "text_contrastive_loss": 0.6497, + "train_positive_log_prob": -82.9651, + "train_positive_token_accuracy": 0.0753, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.311, + "epoch": 4.539503386004514, + "grad_norm": 12.338869094848633, + "learning_rate": 2.17475628847576e-07, + "lm_loss": 5.3815, + "loss": 1.2399, + "step": 2011, + "text_contrastive_loss": 0.7816, + "train_positive_log_prob": -79.5012, + "train_positive_token_accuracy": 0.0724, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2539, + "epoch": 4.54176072234763, + "grad_norm": 11.193196296691895, + "learning_rate": 2.1536414295932896e-07, + "lm_loss": 5.4398, + "loss": 1.1573, + "step": 2012, + "text_contrastive_loss": 0.7189, + "train_positive_log_prob": -79.6395, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.4089, + "epoch": 4.544018058690745, + "grad_norm": 12.990910530090332, + "learning_rate": 2.1326273178582822e-07, + "lm_loss": 5.2879, + "loss": 1.443, + "step": 2013, + "text_contrastive_loss": 1.0106, + "train_positive_log_prob": -77.1943, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.3703, + "epoch": 4.54627539503386, + "grad_norm": 12.169670104980469, + "learning_rate": 2.1117139975188716e-07, + "lm_loss": 5.3766, + "loss": 1.2766, + "step": 2014, + "text_contrastive_loss": 0.7373, + "train_positive_log_prob": -79.6603, + "train_positive_token_accuracy": 0.0786, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2708, + "epoch": 4.548532731376975, + "grad_norm": 11.852605819702148, + "learning_rate": 2.0909015126109488e-07, + "lm_loss": 5.3258, + "loss": 1.1836, + "step": 2015, + "text_contrastive_loss": 0.7604, + "train_positive_log_prob": -80.8, + "train_positive_token_accuracy": 0.0865, + "train_positive_token_prob": 0.0329 + }, + { + "contrastive_loss": 0.2913, + "epoch": 4.55079006772009, + "grad_norm": 11.06964111328125, + "learning_rate": 2.070189906958081e-07, + "lm_loss": 5.434, + "loss": 1.1883, + "step": 2016, + "text_contrastive_loss": 0.7073, + "train_positive_log_prob": -80.6427, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.273, + "epoch": 4.553047404063205, + "grad_norm": 11.538406372070312, + "learning_rate": 2.0495792241714386e-07, + "lm_loss": 5.45, + "loss": 1.2238, + "step": 2017, + "text_contrastive_loss": 0.8116, + "train_positive_log_prob": -80.0896, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3601, + "epoch": 4.555304740406321, + "grad_norm": 14.161224365234375, + "learning_rate": 2.029069507649678e-07, + "lm_loss": 5.4594, + "loss": 1.3431, + "step": 2018, + "text_contrastive_loss": 0.8743, + "train_positive_log_prob": -80.8447, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.2742, + "epoch": 4.557562076749436, + "grad_norm": 10.67629337310791, + "learning_rate": 2.0086608005788376e-07, + "lm_loss": 5.4105, + "loss": 1.1556, + "step": 2019, + "text_contrastive_loss": 0.6806, + "train_positive_log_prob": -78.4072, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.2809, + "epoch": 4.5598194130925505, + "grad_norm": 11.931790351867676, + "learning_rate": 1.988353145932298e-07, + "lm_loss": 5.419, + "loss": 1.1591, + "step": 2020, + "text_contrastive_loss": 0.6726, + "train_positive_log_prob": -79.7825, + "train_positive_token_accuracy": 0.0884, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.2969, + "epoch": 4.562076749435666, + "grad_norm": 12.385994911193848, + "learning_rate": 1.9681465864706372e-07, + "lm_loss": 5.4374, + "loss": 1.2585, + "step": 2021, + "text_contrastive_loss": 0.8358, + "train_positive_log_prob": -81.2872, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3624, + "epoch": 4.564334085778781, + "grad_norm": 12.764066696166992, + "learning_rate": 1.9480411647415708e-07, + "lm_loss": 5.4244, + "loss": 1.2461, + "step": 2022, + "text_contrastive_loss": 0.6824, + "train_positive_log_prob": -79.7129, + "train_positive_token_accuracy": 0.0694, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.4165, + "epoch": 4.566591422121896, + "grad_norm": 12.59554386138916, + "learning_rate": 1.9280369230798568e-07, + "lm_loss": 5.3912, + "loss": 1.3719, + "step": 2023, + "text_contrastive_loss": 0.8325, + "train_positive_log_prob": -77.2836, + "train_positive_token_accuracy": 0.0689, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.2861, + "epoch": 4.568848758465011, + "grad_norm": 12.440619468688965, + "learning_rate": 1.9081339036071956e-07, + "lm_loss": 5.3846, + "loss": 1.2291, + "step": 2024, + "text_contrastive_loss": 0.8091, + "train_positive_log_prob": -79.7763, + "train_positive_token_accuracy": 0.0844, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.2713, + "epoch": 4.571106094808126, + "grad_norm": 10.92240047454834, + "learning_rate": 1.8883321482321583e-07, + "lm_loss": 5.4474, + "loss": 1.1602, + "step": 2025, + "text_contrastive_loss": 0.6884, + "train_positive_log_prob": -79.3279, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3769, + "epoch": 4.573363431151241, + "grad_norm": 13.90052318572998, + "learning_rate": 1.8686316986500974e-07, + "lm_loss": 5.4035, + "loss": 1.3892, + "step": 2026, + "text_contrastive_loss": 0.944, + "train_positive_log_prob": -80.7954, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3136, + "epoch": 4.575620767494357, + "grad_norm": 11.086418151855469, + "learning_rate": 1.8490325963430368e-07, + "lm_loss": 5.401, + "loss": 1.2536, + "step": 2027, + "text_contrastive_loss": 0.7998, + "train_positive_log_prob": -78.3361, + "train_positive_token_accuracy": 0.0843, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3353, + "epoch": 4.577878103837472, + "grad_norm": 11.808106422424316, + "learning_rate": 1.829534882579598e-07, + "lm_loss": 5.5686, + "loss": 1.255, + "step": 2028, + "text_contrastive_loss": 0.7258, + "train_positive_log_prob": -82.1113, + "train_positive_token_accuracy": 0.0739, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3185, + "epoch": 4.580135440180587, + "grad_norm": 12.350231170654297, + "learning_rate": 1.8101385984149343e-07, + "lm_loss": 5.4003, + "loss": 1.2931, + "step": 2029, + "text_contrastive_loss": 0.8692, + "train_positive_log_prob": -79.6757, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3514, + "epoch": 4.582392776523702, + "grad_norm": 12.557411193847656, + "learning_rate": 1.7908437846906158e-07, + "lm_loss": 5.3998, + "loss": 1.2713, + "step": 2030, + "text_contrastive_loss": 0.7599, + "train_positive_log_prob": -80.726, + "train_positive_token_accuracy": 0.0807, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.3957, + "epoch": 4.584650112866817, + "grad_norm": 12.96367359161377, + "learning_rate": 1.7716504820345427e-07, + "lm_loss": 5.3606, + "loss": 1.3437, + "step": 2031, + "text_contrastive_loss": 0.8239, + "train_positive_log_prob": -77.8721, + "train_positive_token_accuracy": 0.0777, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3207, + "epoch": 4.586907449209932, + "grad_norm": 11.46599006652832, + "learning_rate": 1.752558730860876e-07, + "lm_loss": 5.3495, + "loss": 1.2024, + "step": 2032, + "text_contrastive_loss": 0.6935, + "train_positive_log_prob": -77.4624, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.3468, + "epoch": 4.589164785553048, + "grad_norm": 12.887274742126465, + "learning_rate": 1.733568571369948e-07, + "lm_loss": 5.4761, + "loss": 1.2777, + "step": 2033, + "text_contrastive_loss": 0.7666, + "train_positive_log_prob": -80.7713, + "train_positive_token_accuracy": 0.0731, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.318, + "epoch": 4.591422121896162, + "grad_norm": 12.050701141357422, + "learning_rate": 1.7146800435481837e-07, + "lm_loss": 5.3764, + "loss": 1.2242, + "step": 2034, + "text_contrastive_loss": 0.737, + "train_positive_log_prob": -79.0481, + "train_positive_token_accuracy": 0.0752, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4266, + "epoch": 4.5936794582392775, + "grad_norm": 14.312199592590332, + "learning_rate": 1.6958931871679908e-07, + "lm_loss": 5.5388, + "loss": 1.3659, + "step": 2035, + "text_contrastive_loss": 0.7708, + "train_positive_log_prob": -83.2564, + "train_positive_token_accuracy": 0.0748, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3258, + "epoch": 4.595936794582393, + "grad_norm": 12.62662410736084, + "learning_rate": 1.677208041787698e-07, + "lm_loss": 5.4322, + "loss": 1.2119, + "step": 2036, + "text_contrastive_loss": 0.6858, + "train_positive_log_prob": -79.9745, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.329, + "epoch": 4.598194130925508, + "grad_norm": 13.517664909362793, + "learning_rate": 1.6586246467514833e-07, + "lm_loss": 5.4312, + "loss": 1.3098, + "step": 2037, + "text_contrastive_loss": 0.8753, + "train_positive_log_prob": -81.4479, + "train_positive_token_accuracy": 0.0748, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.4059, + "epoch": 4.600451467268623, + "grad_norm": 13.118440628051758, + "learning_rate": 1.6401430411892572e-07, + "lm_loss": 5.3272, + "loss": 1.3827, + "step": 2038, + "text_contrastive_loss": 0.8882, + "train_positive_log_prob": -80.1109, + "train_positive_token_accuracy": 0.0862, + "train_positive_token_prob": 0.0331 + }, + { + "contrastive_loss": 0.3089, + "epoch": 4.602708803611738, + "grad_norm": 11.173444747924805, + "learning_rate": 1.621763264016607e-07, + "lm_loss": 5.4736, + "loss": 1.1329, + "step": 2039, + "text_contrastive_loss": 0.5533, + "train_positive_log_prob": -81.3186, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.363, + "epoch": 4.604966139954853, + "grad_norm": 13.05613899230957, + "learning_rate": 1.603485353934703e-07, + "lm_loss": 5.4485, + "loss": 1.3584, + "step": 2040, + "text_contrastive_loss": 0.9011, + "train_positive_log_prob": -82.6832, + "train_positive_token_accuracy": 0.086, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2156, + "epoch": 4.6072234762979685, + "grad_norm": 10.073893547058105, + "learning_rate": 1.5853093494302195e-07, + "lm_loss": 5.5062, + "loss": 1.0639, + "step": 2041, + "text_contrastive_loss": 0.5955, + "train_positive_log_prob": -81.3034, + "train_positive_token_accuracy": 0.069, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.3748, + "epoch": 4.609480812641084, + "grad_norm": 14.483501434326172, + "learning_rate": 1.567235288775265e-07, + "lm_loss": 5.4257, + "loss": 1.3204, + "step": 2042, + "text_contrastive_loss": 0.806, + "train_positive_log_prob": -79.0582, + "train_positive_token_accuracy": 0.081, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3498, + "epoch": 4.611738148984198, + "grad_norm": 12.530729293823242, + "learning_rate": 1.5492632100272686e-07, + "lm_loss": 5.4334, + "loss": 1.2768, + "step": 2043, + "text_contrastive_loss": 0.7674, + "train_positive_log_prob": -82.3918, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.4447, + "epoch": 4.6139954853273135, + "grad_norm": 13.678736686706543, + "learning_rate": 1.5313931510289482e-07, + "lm_loss": 5.3582, + "loss": 1.3889, + "step": 2044, + "text_contrastive_loss": 0.8167, + "train_positive_log_prob": -78.8048, + "train_positive_token_accuracy": 0.0842, + "train_positive_token_prob": 0.0333 + }, + { + "contrastive_loss": 0.2577, + "epoch": 4.616252821670429, + "grad_norm": 11.134743690490723, + "learning_rate": 1.5136251494081822e-07, + "lm_loss": 5.4582, + "loss": 1.1094, + "step": 2045, + "text_contrastive_loss": 0.6117, + "train_positive_log_prob": -79.7384, + "train_positive_token_accuracy": 0.0751, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3532, + "epoch": 4.618510158013544, + "grad_norm": 12.894429206848145, + "learning_rate": 1.4959592425779768e-07, + "lm_loss": 5.3891, + "loss": 1.2592, + "step": 2046, + "text_contrastive_loss": 0.7342, + "train_positive_log_prob": -79.9502, + "train_positive_token_accuracy": 0.0771, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3642, + "epoch": 4.6207674943566595, + "grad_norm": 14.491817474365234, + "learning_rate": 1.4783954677363376e-07, + "lm_loss": 5.5068, + "loss": 1.313, + "step": 2047, + "text_contrastive_loss": 0.7964, + "train_positive_log_prob": -83.1652, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3862, + "epoch": 4.623024830699774, + "grad_norm": 12.89167594909668, + "learning_rate": 1.4609338618662318e-07, + "lm_loss": 5.4596, + "loss": 1.3343, + "step": 2048, + "text_contrastive_loss": 0.8042, + "train_positive_log_prob": -83.2552, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3278, + "epoch": 4.625282167042889, + "grad_norm": 12.781344413757324, + "learning_rate": 1.4435744617354975e-07, + "lm_loss": 5.4254, + "loss": 1.3567, + "step": 2049, + "text_contrastive_loss": 0.9729, + "train_positive_log_prob": -81.6955, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3036, + "epoch": 4.6275395033860045, + "grad_norm": 12.04308032989502, + "learning_rate": 1.4263173038967627e-07, + "lm_loss": 5.5156, + "loss": 1.2039, + "step": 2050, + "text_contrastive_loss": 0.6976, + "train_positive_log_prob": -81.1201, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.2658, + "epoch": 4.62979683972912, + "grad_norm": 11.717175483703613, + "learning_rate": 1.409162424687366e-07, + "lm_loss": 5.3793, + "loss": 1.2023, + "step": 2051, + "text_contrastive_loss": 0.7973, + "train_positive_log_prob": -78.9227, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.2861, + "epoch": 4.632054176072235, + "grad_norm": 13.031899452209473, + "learning_rate": 1.3921098602292793e-07, + "lm_loss": 5.3976, + "loss": 1.235, + "step": 2052, + "text_contrastive_loss": 0.8183, + "train_positive_log_prob": -79.1087, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.2861, + "epoch": 4.6343115124153496, + "grad_norm": 11.489171981811523, + "learning_rate": 1.3751596464290529e-07, + "lm_loss": 5.4261, + "loss": 1.2229, + "step": 2053, + "text_contrastive_loss": 0.7884, + "train_positive_log_prob": -79.6983, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3604, + "epoch": 4.636568848758465, + "grad_norm": 13.91213321685791, + "learning_rate": 1.358311818977709e-07, + "lm_loss": 5.3773, + "loss": 1.2796, + "step": 2054, + "text_contrastive_loss": 0.763, + "train_positive_log_prob": -77.6338, + "train_positive_token_accuracy": 0.0814, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3569, + "epoch": 4.63882618510158, + "grad_norm": 12.02213191986084, + "learning_rate": 1.3415664133506812e-07, + "lm_loss": 5.4676, + "loss": 1.3383, + "step": 2055, + "text_contrastive_loss": 0.8693, + "train_positive_log_prob": -79.7087, + "train_positive_token_accuracy": 0.0782, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3412, + "epoch": 4.6410835214446955, + "grad_norm": 13.349136352539062, + "learning_rate": 1.324923464807759e-07, + "lm_loss": 5.4068, + "loss": 1.3327, + "step": 2056, + "text_contrastive_loss": 0.9016, + "train_positive_log_prob": -80.4726, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3854, + "epoch": 4.643340857787811, + "grad_norm": 12.684906959533691, + "learning_rate": 1.308383008392977e-07, + "lm_loss": 5.3658, + "loss": 1.3318, + "step": 2057, + "text_contrastive_loss": 0.8197, + "train_positive_log_prob": -78.5145, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3162, + "epoch": 4.645598194130925, + "grad_norm": 11.748915672302246, + "learning_rate": 1.2919450789345477e-07, + "lm_loss": 5.4275, + "loss": 1.2393, + "step": 2058, + "text_contrastive_loss": 0.7606, + "train_positive_log_prob": -79.2094, + "train_positive_token_accuracy": 0.0792, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.276, + "epoch": 4.6478555304740405, + "grad_norm": 11.677273750305176, + "learning_rate": 1.275609711044823e-07, + "lm_loss": 5.298, + "loss": 1.1773, + "step": 2059, + "text_contrastive_loss": 0.743, + "train_positive_log_prob": -78.8064, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3977, + "epoch": 4.650112866817156, + "grad_norm": 12.519454956054688, + "learning_rate": 1.2593769391201827e-07, + "lm_loss": 5.4556, + "loss": 1.4497, + "step": 2060, + "text_contrastive_loss": 1.0128, + "train_positive_log_prob": -81.1783, + "train_positive_token_accuracy": 0.0864, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3482, + "epoch": 4.652370203160271, + "grad_norm": 12.281281471252441, + "learning_rate": 1.2432467973409857e-07, + "lm_loss": 5.3666, + "loss": 1.2925, + "step": 2061, + "text_contrastive_loss": 0.8153, + "train_positive_log_prob": -79.3484, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.2714, + "epoch": 4.654627539503386, + "grad_norm": 11.137235641479492, + "learning_rate": 1.2272193196714854e-07, + "lm_loss": 5.5347, + "loss": 1.1378, + "step": 2062, + "text_contrastive_loss": 0.6259, + "train_positive_log_prob": -82.3114, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3494, + "epoch": 4.656884875846501, + "grad_norm": 12.466343879699707, + "learning_rate": 1.211294539859753e-07, + "lm_loss": 5.4225, + "loss": 1.2811, + "step": 2063, + "text_contrastive_loss": 0.7788, + "train_positive_log_prob": -79.2222, + "train_positive_token_accuracy": 0.0703, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3114, + "epoch": 4.659142212189616, + "grad_norm": 11.6156644821167, + "learning_rate": 1.1954724914376215e-07, + "lm_loss": 5.4074, + "loss": 1.1853, + "step": 2064, + "text_contrastive_loss": 0.6663, + "train_positive_log_prob": -79.6946, + "train_positive_token_accuracy": 0.0856, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.2999, + "epoch": 4.6613995485327315, + "grad_norm": 11.598017692565918, + "learning_rate": 1.1797532077206187e-07, + "lm_loss": 5.4225, + "loss": 1.2047, + "step": 2065, + "text_contrastive_loss": 0.725, + "train_positive_log_prob": -81.7644, + "train_positive_token_accuracy": 0.0749, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.4465, + "epoch": 4.663656884875847, + "grad_norm": 13.34798526763916, + "learning_rate": 1.1641367218078736e-07, + "lm_loss": 5.3576, + "loss": 1.3968, + "step": 2066, + "text_contrastive_loss": 0.8291, + "train_positive_log_prob": -77.4952, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3443, + "epoch": 4.665914221218961, + "grad_norm": 12.143669128417969, + "learning_rate": 1.1486230665820552e-07, + "lm_loss": 5.2549, + "loss": 1.2457, + "step": 2067, + "text_contrastive_loss": 0.7518, + "train_positive_log_prob": -76.422, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.4516, + "epoch": 4.668171557562077, + "grad_norm": 13.302193641662598, + "learning_rate": 1.1332122747093277e-07, + "lm_loss": 5.4034, + "loss": 1.422, + "step": 2068, + "text_contrastive_loss": 0.8601, + "train_positive_log_prob": -83.2248, + "train_positive_token_accuracy": 0.0837, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.3197, + "epoch": 4.670428893905192, + "grad_norm": 12.491353988647461, + "learning_rate": 1.1179043786392507e-07, + "lm_loss": 5.4487, + "loss": 1.2369, + "step": 2069, + "text_contrastive_loss": 0.7447, + "train_positive_log_prob": -83.9906, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.297, + "epoch": 4.672686230248307, + "grad_norm": 11.817214965820312, + "learning_rate": 1.1026994106047296e-07, + "lm_loss": 5.4347, + "loss": 1.2758, + "step": 2070, + "text_contrastive_loss": 0.8707, + "train_positive_log_prob": -79.9113, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.4067, + "epoch": 4.674943566591422, + "grad_norm": 13.521685600280762, + "learning_rate": 1.0875974026219149e-07, + "lm_loss": 5.2724, + "loss": 1.3144, + "step": 2071, + "text_contrastive_loss": 0.7609, + "train_positive_log_prob": -78.2032, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.3383, + "epoch": 4.677200902934537, + "grad_norm": 11.534775733947754, + "learning_rate": 1.0725983864901978e-07, + "lm_loss": 5.4114, + "loss": 1.261, + "step": 2072, + "text_contrastive_loss": 0.7631, + "train_positive_log_prob": -80.4106, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3611, + "epoch": 4.679458239277652, + "grad_norm": 12.537820816040039, + "learning_rate": 1.0577023937920816e-07, + "lm_loss": 5.4556, + "loss": 1.3101, + "step": 2073, + "text_contrastive_loss": 0.8069, + "train_positive_log_prob": -82.11, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3923, + "epoch": 4.681715575620768, + "grad_norm": 14.78995418548584, + "learning_rate": 1.0429094558931485e-07, + "lm_loss": 5.399, + "loss": 1.3764, + "step": 2074, + "text_contrastive_loss": 0.8885, + "train_positive_log_prob": -79.6143, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3337, + "epoch": 4.683972911963883, + "grad_norm": 12.488669395446777, + "learning_rate": 1.0282196039419823e-07, + "lm_loss": 5.4436, + "loss": 1.3253, + "step": 2075, + "text_contrastive_loss": 0.8945, + "train_positive_log_prob": -80.6128, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.3118, + "epoch": 4.686230248306998, + "grad_norm": 10.698948860168457, + "learning_rate": 1.0136328688700958e-07, + "lm_loss": 5.416, + "loss": 1.2181, + "step": 2076, + "text_contrastive_loss": 0.7293, + "train_positive_log_prob": -78.91, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.371, + "epoch": 4.688487584650113, + "grad_norm": 13.605874061584473, + "learning_rate": 9.99149281391898e-08, + "lm_loss": 5.3512, + "loss": 1.2311, + "step": 2077, + "text_contrastive_loss": 0.65, + "train_positive_log_prob": -78.3063, + "train_positive_token_accuracy": 0.0849, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3294, + "epoch": 4.690744920993228, + "grad_norm": 11.778861045837402, + "learning_rate": 9.847688720045878e-08, + "lm_loss": 5.4059, + "loss": 1.2347, + "step": 2078, + "text_contrastive_loss": 0.7296, + "train_positive_log_prob": -79.3809, + "train_positive_token_accuracy": 0.0865, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.2362, + "epoch": 4.693002257336343, + "grad_norm": 10.422747611999512, + "learning_rate": 9.704916709881052e-08, + "lm_loss": 5.4358, + "loss": 1.1355, + "step": 2079, + "text_contrastive_loss": 0.7115, + "train_positive_log_prob": -79.968, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3438, + "epoch": 4.6952595936794586, + "grad_norm": 12.27733325958252, + "learning_rate": 9.5631770840508e-08, + "lm_loss": 5.5362, + "loss": 1.2405, + "step": 2080, + "text_contrastive_loss": 0.6862, + "train_positive_log_prob": -83.4285, + "train_positive_token_accuracy": 0.0731, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3542, + "epoch": 4.697516930022573, + "grad_norm": 12.169941902160645, + "learning_rate": 9.422470141007667e-08, + "lm_loss": 5.4622, + "loss": 1.2447, + "step": 2081, + "text_contrastive_loss": 0.6885, + "train_positive_log_prob": -82.5532, + "train_positive_token_accuracy": 0.0793, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3261, + "epoch": 4.699774266365688, + "grad_norm": 11.984085083007812, + "learning_rate": 9.282796177029596e-08, + "lm_loss": 5.4015, + "loss": 1.2829, + "step": 2082, + "text_contrastive_loss": 0.8334, + "train_positive_log_prob": -79.2095, + "train_positive_token_accuracy": 0.0778, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4099, + "epoch": 4.702031602708804, + "grad_norm": 12.825424194335938, + "learning_rate": 9.144155486219442e-08, + "lm_loss": 5.3908, + "loss": 1.441, + "step": 2083, + "text_contrastive_loss": 0.9841, + "train_positive_log_prob": -79.4113, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.2742, + "epoch": 4.704288939051919, + "grad_norm": 11.136331558227539, + "learning_rate": 9.006548360504463e-08, + "lm_loss": 5.4398, + "loss": 1.123, + "step": 2084, + "text_contrastive_loss": 0.6097, + "train_positive_log_prob": -80.7889, + "train_positive_token_accuracy": 0.0863, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2938, + "epoch": 4.706546275395034, + "grad_norm": 12.386018753051758, + "learning_rate": 8.869975089635552e-08, + "lm_loss": 5.4224, + "loss": 1.2149, + "step": 2085, + "text_contrastive_loss": 0.7577, + "train_positive_log_prob": -81.4412, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3593, + "epoch": 4.708803611738149, + "grad_norm": 11.770545959472656, + "learning_rate": 8.734435961186782e-08, + "lm_loss": 5.3793, + "loss": 1.242, + "step": 2086, + "text_contrastive_loss": 0.6896, + "train_positive_log_prob": -78.7349, + "train_positive_token_accuracy": 0.0838, + "train_positive_token_prob": 0.0334 + }, + { + "contrastive_loss": 0.3282, + "epoch": 4.711060948081264, + "grad_norm": 12.917922019958496, + "learning_rate": 8.599931260554417e-08, + "lm_loss": 5.5304, + "loss": 1.3011, + "step": 2087, + "text_contrastive_loss": 0.8396, + "train_positive_log_prob": -82.4653, + "train_positive_token_accuracy": 0.0799, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.2836, + "epoch": 4.713318284424379, + "grad_norm": 10.346996307373047, + "learning_rate": 8.466461270956794e-08, + "lm_loss": 5.447, + "loss": 1.2308, + "step": 2088, + "text_contrastive_loss": 0.8051, + "train_positive_log_prob": -80.0048, + "train_positive_token_accuracy": 0.0747, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.2666, + "epoch": 4.715575620767495, + "grad_norm": 11.235525131225586, + "learning_rate": 8.334026273433659e-08, + "lm_loss": 5.3524, + "loss": 1.2252, + "step": 2089, + "text_contrastive_loss": 0.8467, + "train_positive_log_prob": -78.4436, + "train_positive_token_accuracy": 0.0898, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.2305, + "epoch": 4.717832957110609, + "grad_norm": 10.909027099609375, + "learning_rate": 8.202626546845172e-08, + "lm_loss": 5.402, + "loss": 1.0679, + "step": 2090, + "text_contrastive_loss": 0.5944, + "train_positive_log_prob": -78.9846, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.4059, + "epoch": 4.720090293453724, + "grad_norm": 12.80093765258789, + "learning_rate": 8.072262367871675e-08, + "lm_loss": 5.3418, + "loss": 1.3479, + "step": 2091, + "text_contrastive_loss": 0.8156, + "train_positive_log_prob": -79.5457, + "train_positive_token_accuracy": 0.0868, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3109, + "epoch": 4.72234762979684, + "grad_norm": 13.146889686584473, + "learning_rate": 7.942934011013037e-08, + "lm_loss": 5.5149, + "loss": 1.2213, + "step": 2092, + "text_contrastive_loss": 0.7178, + "train_positive_log_prob": -82.9217, + "train_positive_token_accuracy": 0.0751, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.2796, + "epoch": 4.724604966139955, + "grad_norm": 10.740571975708008, + "learning_rate": 7.814641748588148e-08, + "lm_loss": 5.2834, + "loss": 1.1596, + "step": 2093, + "text_contrastive_loss": 0.7033, + "train_positive_log_prob": -78.8184, + "train_positive_token_accuracy": 0.0833, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2847, + "epoch": 4.72686230248307, + "grad_norm": 13.129890441894531, + "learning_rate": 7.687385850734086e-08, + "lm_loss": 5.3972, + "loss": 1.2188, + "step": 2094, + "text_contrastive_loss": 0.7889, + "train_positive_log_prob": -80.2631, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0326 + }, + { + "contrastive_loss": 0.3072, + "epoch": 4.729119638826186, + "grad_norm": 13.36591625213623, + "learning_rate": 7.561166585405789e-08, + "lm_loss": 5.4054, + "loss": 1.2501, + "step": 2095, + "text_contrastive_loss": 0.8047, + "train_positive_log_prob": -77.3645, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2978, + "epoch": 4.7313769751693, + "grad_norm": 11.847993850708008, + "learning_rate": 7.435984218375436e-08, + "lm_loss": 5.3862, + "loss": 1.1956, + "step": 2096, + "text_contrastive_loss": 0.7185, + "train_positive_log_prob": -78.4594, + "train_positive_token_accuracy": 0.0866, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3342, + "epoch": 4.733634311512415, + "grad_norm": 14.40293025970459, + "learning_rate": 7.311839013231959e-08, + "lm_loss": 5.4427, + "loss": 1.2971, + "step": 2097, + "text_contrastive_loss": 0.8372, + "train_positive_log_prob": -80.8325, + "train_positive_token_accuracy": 0.0765, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3878, + "epoch": 4.735891647855531, + "grad_norm": 13.98948860168457, + "learning_rate": 7.188731231380253e-08, + "lm_loss": 5.3723, + "loss": 1.2364, + "step": 2098, + "text_contrastive_loss": 0.6229, + "train_positive_log_prob": -79.4283, + "train_positive_token_accuracy": 0.0814, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3948, + "epoch": 4.738148984198646, + "grad_norm": 12.624342918395996, + "learning_rate": 7.066661132040853e-08, + "lm_loss": 5.5111, + "loss": 1.3263, + "step": 2099, + "text_contrastive_loss": 0.7608, + "train_positive_log_prob": -81.3797, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.2587, + "epoch": 4.74040632054176, + "grad_norm": 12.127290725708008, + "learning_rate": 6.945628972249208e-08, + "lm_loss": 5.5143, + "loss": 1.1455, + "step": 2100, + "text_contrastive_loss": 0.6707, + "train_positive_log_prob": -81.6574, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3639, + "epoch": 4.742663656884876, + "grad_norm": 12.897936820983887, + "learning_rate": 6.825635006855458e-08, + "lm_loss": 5.4266, + "loss": 1.322, + "step": 2101, + "text_contrastive_loss": 0.8308, + "train_positive_log_prob": -80.0815, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.3601, + "epoch": 4.744920993227991, + "grad_norm": 14.64498233795166, + "learning_rate": 6.706679488523494e-08, + "lm_loss": 5.4462, + "loss": 1.3274, + "step": 2102, + "text_contrastive_loss": 0.8454, + "train_positive_log_prob": -80.2065, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.2432, + "epoch": 4.747178329571106, + "grad_norm": 11.030097007751465, + "learning_rate": 6.58876266773062e-08, + "lm_loss": 5.3806, + "loss": 1.1177, + "step": 2103, + "text_contrastive_loss": 0.6729, + "train_positive_log_prob": -80.559, + "train_positive_token_accuracy": 0.0866, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3412, + "epoch": 4.749435665914222, + "grad_norm": 14.022589683532715, + "learning_rate": 6.471884792767169e-08, + "lm_loss": 5.5299, + "loss": 1.2617, + "step": 2104, + "text_contrastive_loss": 0.735, + "train_positive_log_prob": -81.7832, + "train_positive_token_accuracy": 0.0779, + "train_positive_token_prob": 0.0297 + }, + { + "contrastive_loss": 0.3563, + "epoch": 4.751693002257336, + "grad_norm": 12.142727851867676, + "learning_rate": 6.356046109735614e-08, + "lm_loss": 5.2795, + "loss": 1.2937, + "step": 2105, + "text_contrastive_loss": 0.819, + "train_positive_log_prob": -78.5531, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3118, + "epoch": 4.753950338600451, + "grad_norm": 12.552433013916016, + "learning_rate": 6.241246862550398e-08, + "lm_loss": 5.3647, + "loss": 1.2759, + "step": 2106, + "text_contrastive_loss": 0.8553, + "train_positive_log_prob": -78.1007, + "train_positive_token_accuracy": 0.0837, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3455, + "epoch": 4.756207674943567, + "grad_norm": 13.206721305847168, + "learning_rate": 6.127487292937328e-08, + "lm_loss": 5.4677, + "loss": 1.3394, + "step": 2107, + "text_contrastive_loss": 0.8942, + "train_positive_log_prob": -79.7266, + "train_positive_token_accuracy": 0.0804, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.2653, + "epoch": 4.758465011286682, + "grad_norm": 10.340980529785156, + "learning_rate": 6.014767640432905e-08, + "lm_loss": 5.5004, + "loss": 1.1627, + "step": 2108, + "text_contrastive_loss": 0.6948, + "train_positive_log_prob": -83.2799, + "train_positive_token_accuracy": 0.0797, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.4235, + "epoch": 4.760722347629796, + "grad_norm": 13.034989356994629, + "learning_rate": 5.903088142384106e-08, + "lm_loss": 5.345, + "loss": 1.3574, + "step": 2109, + "text_contrastive_loss": 0.7987, + "train_positive_log_prob": -81.1397, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3825, + "epoch": 4.762979683972912, + "grad_norm": 13.444584846496582, + "learning_rate": 5.7924490339474335e-08, + "lm_loss": 5.5541, + "loss": 1.3267, + "step": 2110, + "text_contrastive_loss": 0.7774, + "train_positive_log_prob": -81.9874, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3404, + "epoch": 4.765237020316027, + "grad_norm": 12.826142311096191, + "learning_rate": 5.682850548089036e-08, + "lm_loss": 5.4051, + "loss": 1.2624, + "step": 2111, + "text_contrastive_loss": 0.7631, + "train_positive_log_prob": -79.4313, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3342, + "epoch": 4.767494356659142, + "grad_norm": 12.865568161010742, + "learning_rate": 5.574292915583646e-08, + "lm_loss": 5.3782, + "loss": 1.2472, + "step": 2112, + "text_contrastive_loss": 0.7505, + "train_positive_log_prob": -80.5515, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3307, + "epoch": 4.769751693002258, + "grad_norm": 11.538610458374023, + "learning_rate": 5.46677636501447e-08, + "lm_loss": 5.4057, + "loss": 1.2176, + "step": 2113, + "text_contrastive_loss": 0.6927, + "train_positive_log_prob": -78.949, + "train_positive_token_accuracy": 0.0859, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.3209, + "epoch": 4.772009029345372, + "grad_norm": 11.79147720336914, + "learning_rate": 5.3603011227725265e-08, + "lm_loss": 5.3646, + "loss": 1.2123, + "step": 2114, + "text_contrastive_loss": 0.7097, + "train_positive_log_prob": -80.3213, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4171, + "epoch": 4.774266365688487, + "grad_norm": 14.78882122039795, + "learning_rate": 5.2548674130561974e-08, + "lm_loss": 5.3835, + "loss": 1.4208, + "step": 2115, + "text_contrastive_loss": 0.9308, + "train_positive_log_prob": -80.8126, + "train_positive_token_accuracy": 0.0812, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.379, + "epoch": 4.776523702031603, + "grad_norm": 12.893424034118652, + "learning_rate": 5.1504754578707294e-08, + "lm_loss": 5.4195, + "loss": 1.2814, + "step": 2116, + "text_contrastive_loss": 0.7209, + "train_positive_log_prob": -81.1257, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.2946, + "epoch": 4.778781038374718, + "grad_norm": 11.82102108001709, + "learning_rate": 5.047125477027959e-08, + "lm_loss": 5.3773, + "loss": 1.2411, + "step": 2117, + "text_contrastive_loss": 0.8175, + "train_positive_log_prob": -80.5322, + "train_positive_token_accuracy": 0.0869, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2978, + "epoch": 4.781038374717833, + "grad_norm": 13.15385913848877, + "learning_rate": 4.944817688145642e-08, + "lm_loss": 5.4477, + "loss": 1.2161, + "step": 2118, + "text_contrastive_loss": 0.7471, + "train_positive_log_prob": -80.7155, + "train_positive_token_accuracy": 0.083, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.2702, + "epoch": 4.783295711060948, + "grad_norm": 12.829815864562988, + "learning_rate": 4.843552306646904e-08, + "lm_loss": 5.4827, + "loss": 1.2188, + "step": 2119, + "text_contrastive_loss": 0.8007, + "train_positive_log_prob": -79.0729, + "train_positive_token_accuracy": 0.0742, + "train_positive_token_prob": 0.0298 + }, + { + "contrastive_loss": 0.3088, + "epoch": 4.785553047404063, + "grad_norm": 12.342094421386719, + "learning_rate": 4.743329545760122e-08, + "lm_loss": 5.4983, + "loss": 1.3163, + "step": 2120, + "text_contrastive_loss": 0.9154, + "train_positive_log_prob": -80.6104, + "train_positive_token_accuracy": 0.0783, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2909, + "epoch": 4.787810383747178, + "grad_norm": 11.530839920043945, + "learning_rate": 4.644149616518212e-08, + "lm_loss": 5.4692, + "loss": 1.2464, + "step": 2121, + "text_contrastive_loss": 0.817, + "train_positive_log_prob": -83.4939, + "train_positive_token_accuracy": 0.0847, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3463, + "epoch": 4.790067720090294, + "grad_norm": 13.399327278137207, + "learning_rate": 4.5460127277582863e-08, + "lm_loss": 5.5051, + "loss": 1.2915, + "step": 2122, + "text_contrastive_loss": 0.7894, + "train_positive_log_prob": -82.812, + "train_positive_token_accuracy": 0.0761, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2209, + "epoch": 4.792325056433409, + "grad_norm": 12.157471656799316, + "learning_rate": 4.448919086121217e-08, + "lm_loss": 5.4043, + "loss": 1.122, + "step": 2123, + "text_contrastive_loss": 0.7215, + "train_positive_log_prob": -79.2236, + "train_positive_token_accuracy": 0.0794, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.4346, + "epoch": 4.794582392776523, + "grad_norm": 14.263452529907227, + "learning_rate": 4.352868896051077e-08, + "lm_loss": 5.3028, + "loss": 1.4139, + "step": 2124, + "text_contrastive_loss": 0.8981, + "train_positive_log_prob": -76.6859, + "train_positive_token_accuracy": 0.0746, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3684, + "epoch": 4.796839729119639, + "grad_norm": 12.564045906066895, + "learning_rate": 4.2578623597949174e-08, + "lm_loss": 5.4374, + "loss": 1.3509, + "step": 2125, + "text_contrastive_loss": 0.8776, + "train_positive_log_prob": -80.7902, + "train_positive_token_accuracy": 0.0699, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.293, + "epoch": 4.799097065462754, + "grad_norm": 11.367637634277344, + "learning_rate": 4.163899677402161e-08, + "lm_loss": 5.4841, + "loss": 1.2463, + "step": 2126, + "text_contrastive_loss": 0.8097, + "train_positive_log_prob": -81.5988, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.3268, + "epoch": 4.801354401805869, + "grad_norm": 12.6432523727417, + "learning_rate": 4.0709810467243204e-08, + "lm_loss": 5.4087, + "loss": 1.246, + "step": 2127, + "text_contrastive_loss": 0.7567, + "train_positive_log_prob": -78.678, + "train_positive_token_accuracy": 0.0873, + "train_positive_token_prob": 0.0328 + }, + { + "contrastive_loss": 0.3683, + "epoch": 4.803611738148984, + "grad_norm": 14.87887954711914, + "learning_rate": 3.979106663414389e-08, + "lm_loss": 5.4882, + "loss": 1.3044, + "step": 2128, + "text_contrastive_loss": 0.7744, + "train_positive_log_prob": -79.8795, + "train_positive_token_accuracy": 0.0738, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.342, + "epoch": 4.805869074492099, + "grad_norm": 12.392041206359863, + "learning_rate": 3.8882767209266756e-08, + "lm_loss": 5.3037, + "loss": 1.2918, + "step": 2129, + "text_contrastive_loss": 0.8389, + "train_positive_log_prob": -77.9401, + "train_positive_token_accuracy": 0.0901, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.2941, + "epoch": 4.808126410835214, + "grad_norm": 12.006333351135254, + "learning_rate": 3.7984914105162474e-08, + "lm_loss": 5.3922, + "loss": 1.2285, + "step": 2130, + "text_contrastive_loss": 0.7903, + "train_positive_log_prob": -78.6519, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3933, + "epoch": 4.81038374717833, + "grad_norm": 13.169232368469238, + "learning_rate": 3.709750921238486e-08, + "lm_loss": 5.4115, + "loss": 1.3865, + "step": 2131, + "text_contrastive_loss": 0.9043, + "train_positive_log_prob": -80.2832, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.2986, + "epoch": 4.812641083521445, + "grad_norm": 11.802717208862305, + "learning_rate": 3.622055439948813e-08, + "lm_loss": 5.4197, + "loss": 1.2261, + "step": 2132, + "text_contrastive_loss": 0.7711, + "train_positive_log_prob": -80.7264, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.2708, + "epoch": 4.8148984198645595, + "grad_norm": 11.499796867370605, + "learning_rate": 3.5354051513022405e-08, + "lm_loss": 5.5817, + "loss": 1.2406, + "step": 2133, + "text_contrastive_loss": 0.8233, + "train_positive_log_prob": -81.3718, + "train_positive_token_accuracy": 0.0809, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.4612, + "epoch": 4.817155756207675, + "grad_norm": 14.38620662689209, + "learning_rate": 3.449800237753043e-08, + "lm_loss": 5.5156, + "loss": 1.4758, + "step": 2134, + "text_contrastive_loss": 0.9261, + "train_positive_log_prob": -81.3969, + "train_positive_token_accuracy": 0.0736, + "train_positive_token_prob": 0.0295 + }, + { + "contrastive_loss": 0.3066, + "epoch": 4.81941309255079, + "grad_norm": 11.945396423339844, + "learning_rate": 3.365240879554144e-08, + "lm_loss": 5.4086, + "loss": 1.2489, + "step": 2135, + "text_contrastive_loss": 0.8029, + "train_positive_log_prob": -80.5976, + "train_positive_token_accuracy": 0.0805, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3523, + "epoch": 4.821670428893905, + "grad_norm": 12.395230293273926, + "learning_rate": 3.281727254757061e-08, + "lm_loss": 5.4119, + "loss": 1.2925, + "step": 2136, + "text_contrastive_loss": 0.7981, + "train_positive_log_prob": -81.768, + "train_positive_token_accuracy": 0.0757, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2742, + "epoch": 4.82392776523702, + "grad_norm": 11.249955177307129, + "learning_rate": 3.1992595392112966e-08, + "lm_loss": 5.4349, + "loss": 1.1399, + "step": 2137, + "text_contrastive_loss": 0.6444, + "train_positive_log_prob": -78.2337, + "train_positive_token_accuracy": 0.0795, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3057, + "epoch": 4.826185101580135, + "grad_norm": 11.655808448791504, + "learning_rate": 3.117837906564114e-08, + "lm_loss": 5.4418, + "loss": 1.2396, + "step": 2138, + "text_contrastive_loss": 0.7796, + "train_positive_log_prob": -80.5642, + "train_positive_token_accuracy": 0.0773, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.2071, + "epoch": 4.8284424379232505, + "grad_norm": 10.82726001739502, + "learning_rate": 3.0374625282599826e-08, + "lm_loss": 5.4457, + "loss": 1.0798, + "step": 2139, + "text_contrastive_loss": 0.6564, + "train_positive_log_prob": -81.0091, + "train_positive_token_accuracy": 0.0851, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.4151, + "epoch": 4.830699774266366, + "grad_norm": 14.302108764648438, + "learning_rate": 2.9581335735404672e-08, + "lm_loss": 5.4231, + "loss": 1.3838, + "step": 2140, + "text_contrastive_loss": 0.8527, + "train_positive_log_prob": -80.0548, + "train_positive_token_accuracy": 0.0791, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3765, + "epoch": 4.832957110609481, + "grad_norm": 13.385337829589844, + "learning_rate": 2.8798512094436738e-08, + "lm_loss": 5.381, + "loss": 1.3134, + "step": 2141, + "text_contrastive_loss": 0.7976, + "train_positive_log_prob": -78.4286, + "train_positive_token_accuracy": 0.0839, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3792, + "epoch": 4.835214446952596, + "grad_norm": 12.579778671264648, + "learning_rate": 2.802615600804026e-08, + "lm_loss": 5.3899, + "loss": 1.3352, + "step": 2142, + "text_contrastive_loss": 0.8339, + "train_positive_log_prob": -80.0267, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3257, + "epoch": 4.837471783295711, + "grad_norm": 12.47490119934082, + "learning_rate": 2.7264269102517117e-08, + "lm_loss": 5.5677, + "loss": 1.2537, + "step": 2143, + "text_contrastive_loss": 0.7426, + "train_positive_log_prob": -80.9052, + "train_positive_token_accuracy": 0.0745, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3645, + "epoch": 4.839729119638826, + "grad_norm": 12.39420223236084, + "learning_rate": 2.6512852982127357e-08, + "lm_loss": 5.3601, + "loss": 1.3318, + "step": 2144, + "text_contrastive_loss": 0.8627, + "train_positive_log_prob": -79.8826, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.3724, + "epoch": 4.841986455981941, + "grad_norm": 12.97134017944336, + "learning_rate": 2.577190922908035e-08, + "lm_loss": 5.4351, + "loss": 1.4053, + "step": 2145, + "text_contrastive_loss": 0.9787, + "train_positive_log_prob": -81.8633, + "train_positive_token_accuracy": 0.0843, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3364, + "epoch": 4.844243792325057, + "grad_norm": 11.519397735595703, + "learning_rate": 2.5041439403537537e-08, + "lm_loss": 5.3999, + "loss": 1.2268, + "step": 2146, + "text_contrastive_loss": 0.7008, + "train_positive_log_prob": -80.2401, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3098, + "epoch": 4.846501128668171, + "grad_norm": 13.749913215637207, + "learning_rate": 2.4321445043603565e-08, + "lm_loss": 5.3571, + "loss": 1.266, + "step": 2147, + "text_contrastive_loss": 0.8409, + "train_positive_log_prob": -77.2561, + "train_positive_token_accuracy": 0.0802, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3293, + "epoch": 4.8487584650112865, + "grad_norm": 11.50446891784668, + "learning_rate": 2.3611927665326275e-08, + "lm_loss": 5.3241, + "loss": 1.2163, + "step": 2148, + "text_contrastive_loss": 0.7092, + "train_positive_log_prob": -79.9036, + "train_positive_token_accuracy": 0.0836, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3821, + "epoch": 4.851015801354402, + "grad_norm": 13.767017364501953, + "learning_rate": 2.291288876269393e-08, + "lm_loss": 5.4373, + "loss": 1.3445, + "step": 2149, + "text_contrastive_loss": 0.8373, + "train_positive_log_prob": -79.968, + "train_positive_token_accuracy": 0.077, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.3798, + "epoch": 4.853273137697517, + "grad_norm": 12.463318824768066, + "learning_rate": 2.222432980762912e-08, + "lm_loss": 5.4118, + "loss": 1.3745, + "step": 2150, + "text_contrastive_loss": 0.907, + "train_positive_log_prob": -79.8839, + "train_positive_token_accuracy": 0.0741, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3655, + "epoch": 4.855530474040632, + "grad_norm": 14.218618392944336, + "learning_rate": 2.1546252249988186e-08, + "lm_loss": 5.4369, + "loss": 1.2745, + "step": 2151, + "text_contrastive_loss": 0.7305, + "train_positive_log_prob": -80.8525, + "train_positive_token_accuracy": 0.0735, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.2813, + "epoch": 4.857787810383747, + "grad_norm": 11.583148956298828, + "learning_rate": 2.087865751755791e-08, + "lm_loss": 5.5077, + "loss": 1.1521, + "step": 2152, + "text_contrastive_loss": 0.6402, + "train_positive_log_prob": -81.1945, + "train_positive_token_accuracy": 0.0784, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.3226, + "epoch": 4.860045146726862, + "grad_norm": 12.650745391845703, + "learning_rate": 2.0221547016051614e-08, + "lm_loss": 5.3742, + "loss": 1.159, + "step": 2153, + "text_contrastive_loss": 0.5979, + "train_positive_log_prob": -79.5455, + "train_positive_token_accuracy": 0.08, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3925, + "epoch": 4.8623024830699775, + "grad_norm": 13.429669380187988, + "learning_rate": 1.957492212910639e-08, + "lm_loss": 5.4367, + "loss": 1.3888, + "step": 2154, + "text_contrastive_loss": 0.9053, + "train_positive_log_prob": -80.5124, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3347, + "epoch": 4.864559819413093, + "grad_norm": 11.282309532165527, + "learning_rate": 1.8938784218281435e-08, + "lm_loss": 5.3968, + "loss": 1.2359, + "step": 2155, + "text_contrastive_loss": 0.7231, + "train_positive_log_prob": -80.911, + "train_positive_token_accuracy": 0.0847, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3461, + "epoch": 4.866817155756207, + "grad_norm": 12.434335708618164, + "learning_rate": 1.8313134623051955e-08, + "lm_loss": 5.4852, + "loss": 1.2687, + "step": 2156, + "text_contrastive_loss": 0.7483, + "train_positive_log_prob": -80.9058, + "train_positive_token_accuracy": 0.0821, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3367, + "epoch": 4.8690744920993225, + "grad_norm": 12.231938362121582, + "learning_rate": 1.7697974660811357e-08, + "lm_loss": 5.3225, + "loss": 1.1928, + "step": 2157, + "text_contrastive_loss": 0.6478, + "train_positive_log_prob": -80.2946, + "train_positive_token_accuracy": 0.0808, + "train_positive_token_prob": 0.0322 + }, + { + "contrastive_loss": 0.3547, + "epoch": 4.871331828442438, + "grad_norm": 13.297225952148438, + "learning_rate": 1.7093305626864065e-08, + "lm_loss": 5.3583, + "loss": 1.2328, + "step": 2158, + "text_contrastive_loss": 0.6846, + "train_positive_log_prob": -78.7605, + "train_positive_token_accuracy": 0.0853, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.3378, + "epoch": 4.873589164785553, + "grad_norm": 12.964941024780273, + "learning_rate": 1.6499128794423836e-08, + "lm_loss": 5.3851, + "loss": 1.3052, + "step": 2159, + "text_contrastive_loss": 0.8578, + "train_positive_log_prob": -80.3351, + "train_positive_token_accuracy": 0.0759, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.4038, + "epoch": 4.8758465011286685, + "grad_norm": 13.34187126159668, + "learning_rate": 1.5915445414613208e-08, + "lm_loss": 5.3206, + "loss": 1.3374, + "step": 2160, + "text_contrastive_loss": 0.8031, + "train_positive_log_prob": -78.6469, + "train_positive_token_accuracy": 0.0846, + "train_positive_token_prob": 0.0325 + }, + { + "contrastive_loss": 0.3721, + "epoch": 4.878103837471784, + "grad_norm": 12.722206115722656, + "learning_rate": 1.5342256716459058e-08, + "lm_loss": 5.3343, + "loss": 1.2995, + "step": 2161, + "text_contrastive_loss": 0.7879, + "train_positive_log_prob": -80.1384, + "train_positive_token_accuracy": 0.0819, + "train_positive_token_prob": 0.0328 + }, + { + "contrastive_loss": 0.2351, + "epoch": 4.880361173814898, + "grad_norm": 14.811067581176758, + "learning_rate": 1.4779563906888172e-08, + "lm_loss": 5.4101, + "loss": 1.191, + "step": 2162, + "text_contrastive_loss": 0.8297, + "train_positive_log_prob": -80.7462, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.2915, + "epoch": 4.8826185101580135, + "grad_norm": 11.719377517700195, + "learning_rate": 1.4227368170728894e-08, + "lm_loss": 5.2842, + "loss": 1.1665, + "step": 2163, + "text_contrastive_loss": 0.6932, + "train_positive_log_prob": -76.5174, + "train_positive_token_accuracy": 0.0831, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.4242, + "epoch": 4.884875846501129, + "grad_norm": 15.487630844116211, + "learning_rate": 1.3685670670706697e-08, + "lm_loss": 5.4304, + "loss": 1.3944, + "step": 2164, + "text_contrastive_loss": 0.8544, + "train_positive_log_prob": -80.0797, + "train_positive_token_accuracy": 0.0768, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3653, + "epoch": 4.887133182844244, + "grad_norm": 12.214798927307129, + "learning_rate": 1.3154472547440289e-08, + "lm_loss": 5.3911, + "loss": 1.3527, + "step": 2165, + "text_contrastive_loss": 0.8966, + "train_positive_log_prob": -79.1207, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0324 + }, + { + "contrastive_loss": 0.3553, + "epoch": 4.889390519187359, + "grad_norm": 12.921774864196777, + "learning_rate": 1.2633774919441622e-08, + "lm_loss": 5.3553, + "loss": 1.2966, + "step": 2166, + "text_contrastive_loss": 0.8115, + "train_positive_log_prob": -78.8851, + "train_positive_token_accuracy": 0.087, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3289, + "epoch": 4.891647855530474, + "grad_norm": 11.58037281036377, + "learning_rate": 1.2123578883110887e-08, + "lm_loss": 5.432, + "loss": 1.2483, + "step": 2167, + "text_contrastive_loss": 0.7525, + "train_positive_log_prob": -80.7342, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3385, + "epoch": 4.893905191873589, + "grad_norm": 11.600133895874023, + "learning_rate": 1.1623885512737076e-08, + "lm_loss": 5.3721, + "loss": 1.2078, + "step": 2168, + "text_contrastive_loss": 0.6642, + "train_positive_log_prob": -80.3498, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.265, + "epoch": 4.8961625282167045, + "grad_norm": 11.218769073486328, + "learning_rate": 1.1134695860493539e-08, + "lm_loss": 5.2637, + "loss": 1.1192, + "step": 2169, + "text_contrastive_loss": 0.6557, + "train_positive_log_prob": -77.5786, + "train_positive_token_accuracy": 0.0811, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.2801, + "epoch": 4.89841986455982, + "grad_norm": 11.012715339660645, + "learning_rate": 1.0656010956437979e-08, + "lm_loss": 5.4041, + "loss": 1.2497, + "step": 2170, + "text_contrastive_loss": 0.8582, + "train_positive_log_prob": -80.257, + "train_positive_token_accuracy": 0.0713, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3251, + "epoch": 4.900677200902934, + "grad_norm": 11.810998916625977, + "learning_rate": 1.018783180850691e-08, + "lm_loss": 5.4292, + "loss": 1.1797, + "step": 2171, + "text_contrastive_loss": 0.6234, + "train_positive_log_prob": -79.2722, + "train_positive_token_accuracy": 0.0837, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.3524, + "epoch": 4.9029345372460496, + "grad_norm": 13.102461814880371, + "learning_rate": 9.73015940251676e-09, + "lm_loss": 5.4037, + "loss": 1.2426, + "step": 2172, + "text_contrastive_loss": 0.6995, + "train_positive_log_prob": -79.3009, + "train_positive_token_accuracy": 0.0735, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.3162, + "epoch": 4.905191873589165, + "grad_norm": 12.367060661315918, + "learning_rate": 9.282994702159986e-09, + "lm_loss": 5.4207, + "loss": 1.2403, + "step": 2173, + "text_contrastive_loss": 0.764, + "train_positive_log_prob": -79.932, + "train_positive_token_accuracy": 0.0845, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3446, + "epoch": 4.90744920993228, + "grad_norm": 11.949246406555176, + "learning_rate": 8.846338649005082e-09, + "lm_loss": 5.4695, + "loss": 1.2488, + "step": 2174, + "text_contrastive_loss": 0.7145, + "train_positive_log_prob": -80.8258, + "train_positive_token_accuracy": 0.0682, + "train_positive_token_prob": 0.0307 + }, + { + "contrastive_loss": 0.3144, + "epoch": 4.909706546275395, + "grad_norm": 12.648566246032715, + "learning_rate": 8.42019216249046e-09, + "lm_loss": 5.3736, + "loss": 1.1793, + "step": 2175, + "text_contrastive_loss": 0.6551, + "train_positive_log_prob": -78.0226, + "train_positive_token_accuracy": 0.074, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.2898, + "epoch": 4.91196388261851, + "grad_norm": 11.860812187194824, + "learning_rate": 8.004556139927788e-09, + "lm_loss": 5.4132, + "loss": 1.1691, + "step": 2176, + "text_contrastive_loss": 0.6758, + "train_positive_log_prob": -78.5149, + "train_positive_token_accuracy": 0.0762, + "train_positive_token_prob": 0.0303 + }, + { + "contrastive_loss": 0.3164, + "epoch": 4.914221218961625, + "grad_norm": 12.893760681152344, + "learning_rate": 7.599431456495888e-09, + "lm_loss": 5.3637, + "loss": 1.2479, + "step": 2177, + "text_contrastive_loss": 0.7902, + "train_positive_log_prob": -78.2352, + "train_positive_token_accuracy": 0.0833, + "train_positive_token_prob": 0.0327 + }, + { + "contrastive_loss": 0.2526, + "epoch": 4.9164785553047405, + "grad_norm": 10.68089485168457, + "learning_rate": 7.2048189652412784e-09, + "lm_loss": 5.446, + "loss": 1.1645, + "step": 2178, + "text_contrastive_loss": 0.7346, + "train_positive_log_prob": -81.1756, + "train_positive_token_accuracy": 0.0828, + "train_positive_token_prob": 0.0323 + }, + { + "contrastive_loss": 0.3032, + "epoch": 4.918735891647856, + "grad_norm": 12.03073787689209, + "learning_rate": 6.820719497074857e-09, + "lm_loss": 5.4516, + "loss": 1.258, + "step": 2179, + "text_contrastive_loss": 0.8193, + "train_positive_log_prob": -80.1306, + "train_positive_token_accuracy": 0.0763, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3124, + "epoch": 4.92099322799097, + "grad_norm": 11.291508674621582, + "learning_rate": 6.447133860771893e-09, + "lm_loss": 5.421, + "loss": 1.221, + "step": 2180, + "text_contrastive_loss": 0.7329, + "train_positive_log_prob": -80.1504, + "train_positive_token_accuracy": 0.0785, + "train_positive_token_prob": 0.0302 + }, + { + "contrastive_loss": 0.333, + "epoch": 4.923250564334086, + "grad_norm": 13.051501274108887, + "learning_rate": 6.084062842968696e-09, + "lm_loss": 5.3369, + "loss": 1.2691, + "step": 2181, + "text_contrastive_loss": 0.8049, + "train_positive_log_prob": -79.6344, + "train_positive_token_accuracy": 0.0838, + "train_positive_token_prob": 0.0319 + }, + { + "contrastive_loss": 0.3169, + "epoch": 4.925507900677201, + "grad_norm": 14.124873161315918, + "learning_rate": 5.731507208160958e-09, + "lm_loss": 5.5493, + "loss": 1.2184, + "step": 2182, + "text_contrastive_loss": 0.693, + "train_positive_log_prob": -81.3997, + "train_positive_token_accuracy": 0.0722, + "train_positive_token_prob": 0.0291 + }, + { + "contrastive_loss": 0.3927, + "epoch": 4.927765237020316, + "grad_norm": 14.343886375427246, + "learning_rate": 5.389467698704298e-09, + "lm_loss": 5.3734, + "loss": 1.3854, + "step": 2183, + "text_contrastive_loss": 0.9108, + "train_positive_log_prob": -78.1656, + "train_positive_token_accuracy": 0.073, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.3649, + "epoch": 4.9300225733634315, + "grad_norm": 11.616131782531738, + "learning_rate": 5.057945034810385e-09, + "lm_loss": 5.3898, + "loss": 1.2885, + "step": 2184, + "text_contrastive_loss": 0.7692, + "train_positive_log_prob": -79.6931, + "train_positive_token_accuracy": 0.0822, + "train_positive_token_prob": 0.0316 + }, + { + "contrastive_loss": 0.262, + "epoch": 4.932279909706546, + "grad_norm": 11.18310260772705, + "learning_rate": 4.736939914545824e-09, + "lm_loss": 5.357, + "loss": 1.1602, + "step": 2185, + "text_contrastive_loss": 0.725, + "train_positive_log_prob": -76.7979, + "train_positive_token_accuracy": 0.0817, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.3465, + "epoch": 4.934537246049661, + "grad_norm": 14.352568626403809, + "learning_rate": 4.4264530138310445e-09, + "lm_loss": 5.4283, + "loss": 1.2734, + "step": 2186, + "text_contrastive_loss": 0.7681, + "train_positive_log_prob": -80.5959, + "train_positive_token_accuracy": 0.0816, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3247, + "epoch": 4.936794582392777, + "grad_norm": 12.119796752929688, + "learning_rate": 4.1264849864403044e-09, + "lm_loss": 5.4701, + "loss": 1.2244, + "step": 2187, + "text_contrastive_loss": 0.7053, + "train_positive_log_prob": -79.7878, + "train_positive_token_accuracy": 0.0838, + "train_positive_token_prob": 0.0312 + }, + { + "contrastive_loss": 0.3925, + "epoch": 4.939051918735892, + "grad_norm": 13.810029983520508, + "learning_rate": 3.837036463997246e-09, + "lm_loss": 5.4878, + "loss": 1.3281, + "step": 2188, + "text_contrastive_loss": 0.7736, + "train_positive_log_prob": -83.0679, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.03 + }, + { + "contrastive_loss": 0.3321, + "epoch": 4.941309255079007, + "grad_norm": 12.154284477233887, + "learning_rate": 3.558108055976006e-09, + "lm_loss": 5.3822, + "loss": 1.2832, + "step": 2189, + "text_contrastive_loss": 0.8259, + "train_positive_log_prob": -78.1027, + "train_positive_token_accuracy": 0.0826, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.2822, + "epoch": 4.943566591422122, + "grad_norm": 10.892542839050293, + "learning_rate": 3.289700349698999e-09, + "lm_loss": 5.4761, + "loss": 1.1815, + "step": 2190, + "text_contrastive_loss": 0.7033, + "train_positive_log_prob": -84.9446, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.0331 + }, + { + "contrastive_loss": 0.2665, + "epoch": 4.945823927765237, + "grad_norm": 12.452816009521484, + "learning_rate": 3.0318139103363564e-09, + "lm_loss": 5.3599, + "loss": 1.1746, + "step": 2191, + "text_contrastive_loss": 0.7442, + "train_positive_log_prob": -77.9932, + "train_positive_token_accuracy": 0.0787, + "train_positive_token_prob": 0.0313 + }, + { + "contrastive_loss": 0.3655, + "epoch": 4.948081264108352, + "grad_norm": 14.186995506286621, + "learning_rate": 2.7844492809031567e-09, + "lm_loss": 5.4256, + "loss": 1.2447, + "step": 2192, + "text_contrastive_loss": 0.6733, + "train_positive_log_prob": -81.0037, + "train_positive_token_accuracy": 0.0769, + "train_positive_token_prob": 0.0311 + }, + { + "contrastive_loss": 0.3657, + "epoch": 4.950338600451468, + "grad_norm": 15.584464073181152, + "learning_rate": 2.547606982260531e-09, + "lm_loss": 5.33, + "loss": 1.2754, + "step": 2193, + "text_contrastive_loss": 0.7534, + "train_positive_log_prob": -81.8437, + "train_positive_token_accuracy": 0.0832, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3973, + "epoch": 4.952595936794582, + "grad_norm": 14.872355461120605, + "learning_rate": 2.3212875131117805e-09, + "lm_loss": 5.342, + "loss": 1.3545, + "step": 2194, + "text_contrastive_loss": 0.8459, + "train_positive_log_prob": -76.8127, + "train_positive_token_accuracy": 0.0772, + "train_positive_token_prob": 0.0315 + }, + { + "contrastive_loss": 0.3456, + "epoch": 4.954853273137697, + "grad_norm": 12.156463623046875, + "learning_rate": 2.1054913500051512e-09, + "lm_loss": 5.3525, + "loss": 1.2222, + "step": 2195, + "text_contrastive_loss": 0.6827, + "train_positive_log_prob": -80.3929, + "train_positive_token_accuracy": 0.0801, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.4011, + "epoch": 4.957110609480813, + "grad_norm": 13.698193550109863, + "learning_rate": 1.9002189473288356e-09, + "lm_loss": 5.4728, + "loss": 1.3074, + "step": 2196, + "text_contrastive_loss": 0.7182, + "train_positive_log_prob": -82.2002, + "train_positive_token_accuracy": 0.077, + "train_positive_token_prob": 0.0296 + }, + { + "contrastive_loss": 0.3205, + "epoch": 4.959367945823928, + "grad_norm": 11.167879104614258, + "learning_rate": 1.7054707373126423e-09, + "lm_loss": 5.452, + "loss": 1.2133, + "step": 2197, + "text_contrastive_loss": 0.6952, + "train_positive_log_prob": -79.8821, + "train_positive_token_accuracy": 0.0789, + "train_positive_token_prob": 0.0306 + }, + { + "contrastive_loss": 0.2751, + "epoch": 4.961625282167043, + "grad_norm": 12.433218002319336, + "learning_rate": 1.5212471300252163e-09, + "lm_loss": 5.3687, + "loss": 1.1815, + "step": 2198, + "text_contrastive_loss": 0.7391, + "train_positive_log_prob": -77.9923, + "train_positive_token_accuracy": 0.0796, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3191, + "epoch": 4.963882618510158, + "grad_norm": 11.800603866577148, + "learning_rate": 1.347548513375707e-09, + "lm_loss": 5.3941, + "loss": 1.2227, + "step": 2199, + "text_contrastive_loss": 0.7283, + "train_positive_log_prob": -77.7529, + "train_positive_token_accuracy": 0.0713, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3587, + "epoch": 4.966139954853273, + "grad_norm": 13.342528343200684, + "learning_rate": 1.1843752531104368e-09, + "lm_loss": 5.3745, + "loss": 1.2979, + "step": 2200, + "text_contrastive_loss": 0.8036, + "train_positive_log_prob": -81.3307, + "train_positive_token_accuracy": 0.0834, + "train_positive_token_prob": 0.0321 + }, + { + "contrastive_loss": 0.4287, + "epoch": 4.968397291196388, + "grad_norm": 14.24833869934082, + "learning_rate": 1.0317276928134557e-09, + "lm_loss": 5.3427, + "loss": 1.3643, + "step": 2201, + "text_contrastive_loss": 0.8027, + "train_positive_log_prob": -78.5982, + "train_positive_token_accuracy": 0.0815, + "train_positive_token_prob": 0.033 + }, + { + "contrastive_loss": 0.3953, + "epoch": 4.970654627539504, + "grad_norm": 13.420281410217285, + "learning_rate": 8.896061539048762e-10, + "lm_loss": 5.3454, + "loss": 1.3593, + "step": 2202, + "text_contrastive_loss": 0.8591, + "train_positive_log_prob": -78.5512, + "train_positive_token_accuracy": 0.076, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.2726, + "epoch": 4.972911963882618, + "grad_norm": 12.549819946289062, + "learning_rate": 7.580109356419841e-10, + "lm_loss": 5.4239, + "loss": 1.1833, + "step": 2203, + "text_contrastive_loss": 0.7364, + "train_positive_log_prob": -79.4748, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0309 + }, + { + "contrastive_loss": 0.2587, + "epoch": 4.975169300225733, + "grad_norm": 11.20868968963623, + "learning_rate": 6.369423151164622e-10, + "lm_loss": 5.4248, + "loss": 1.1916, + "step": 2204, + "text_contrastive_loss": 0.7808, + "train_positive_log_prob": -79.8284, + "train_positive_token_accuracy": 0.0767, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3095, + "epoch": 4.977426636568849, + "grad_norm": 12.104876518249512, + "learning_rate": 5.264005472549461e-10, + "lm_loss": 5.452, + "loss": 1.2191, + "step": 2205, + "text_contrastive_loss": 0.7289, + "train_positive_log_prob": -81.3962, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0304 + }, + { + "contrastive_loss": 0.2299, + "epoch": 4.979683972911964, + "grad_norm": 11.765388488769531, + "learning_rate": 4.2638586481846823e-10, + "lm_loss": 5.4711, + "loss": 1.094, + "step": 2206, + "text_contrastive_loss": 0.634, + "train_positive_log_prob": -80.418, + "train_positive_token_accuracy": 0.0755, + "train_positive_token_prob": 0.0301 + }, + { + "contrastive_loss": 0.3032, + "epoch": 4.981941309255079, + "grad_norm": 11.97525691986084, + "learning_rate": 3.368984784024587e-10, + "lm_loss": 5.3875, + "loss": 1.2025, + "step": 2207, + "text_contrastive_loss": 0.7212, + "train_positive_log_prob": -79.674, + "train_positive_token_accuracy": 0.0798, + "train_positive_token_prob": 0.0314 + }, + { + "contrastive_loss": 0.3693, + "epoch": 4.984198645598195, + "grad_norm": 13.687864303588867, + "learning_rate": 2.5793857643396924e-10, + "lm_loss": 5.393, + "loss": 1.3051, + "step": 2208, + "text_contrastive_loss": 0.793, + "train_positive_log_prob": -79.8392, + "train_positive_token_accuracy": 0.0781, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.2489, + "epoch": 4.986455981941309, + "grad_norm": 11.94817066192627, + "learning_rate": 1.89506325175004e-10, + "lm_loss": 5.3805, + "loss": 1.1694, + "step": 2209, + "text_contrastive_loss": 0.7649, + "train_positive_log_prob": -77.8959, + "train_positive_token_accuracy": 0.082, + "train_positive_token_prob": 0.0317 + }, + { + "contrastive_loss": 0.2847, + "epoch": 4.988713318284424, + "grad_norm": 11.754454612731934, + "learning_rate": 1.316018687191889e-10, + "lm_loss": 5.4591, + "loss": 1.2278, + "step": 2210, + "text_contrastive_loss": 0.7944, + "train_positive_log_prob": -81.8014, + "train_positive_token_accuracy": 0.0813, + "train_positive_token_prob": 0.0308 + }, + { + "contrastive_loss": 0.3243, + "epoch": 4.99097065462754, + "grad_norm": 12.112828254699707, + "learning_rate": 8.422532899121649e-11, + "lm_loss": 5.4206, + "loss": 1.237, + "step": 2211, + "text_contrastive_loss": 0.7413, + "train_positive_log_prob": -78.3676, + "train_positive_token_accuracy": 0.0754, + "train_positive_token_prob": 0.0305 + }, + { + "contrastive_loss": 0.3193, + "epoch": 4.993227990970655, + "grad_norm": 13.236248016357422, + "learning_rate": 4.737680575017667e-11, + "lm_loss": 5.3945, + "loss": 1.2508, + "step": 2212, + "text_contrastive_loss": 0.7842, + "train_positive_log_prob": -78.5783, + "train_positive_token_accuracy": 0.0842, + "train_positive_token_prob": 0.032 + }, + { + "contrastive_loss": 0.3332, + "epoch": 4.995485327313769, + "grad_norm": 13.329639434814453, + "learning_rate": 2.1056376585115723e-11, + "lm_loss": 5.4133, + "loss": 1.2415, + "step": 2213, + "text_contrastive_loss": 0.7339, + "train_positive_log_prob": -79.6994, + "train_positive_token_accuracy": 0.0833, + "train_positive_token_prob": 0.031 + }, + { + "contrastive_loss": 0.376, + "epoch": 4.997742663656885, + "grad_norm": 13.80856704711914, + "learning_rate": 5.2640969172568225e-12, + "lm_loss": 5.4304, + "loss": 1.3796, + "step": 2214, + "text_contrastive_loss": 0.9211, + "train_positive_log_prob": -77.826, + "train_positive_token_accuracy": 0.0864, + "train_positive_token_prob": 0.0318 + }, + { + "contrastive_loss": 0.2175, + "epoch": 5.0, + "grad_norm": 13.489069938659668, + "learning_rate": 0.0, + "lm_loss": 5.4854, + "loss": 1.0078, + "step": 2215, + "text_contrastive_loss": 0.4835, + "train_positive_log_prob": -82.5201, + "train_positive_token_accuracy": 0.0803, + "train_positive_token_prob": 0.0313 + }, + { + "epoch": 5.0, + "step": 2215, + "total_flos": 1.487319842422784e+17, + "train_loss": 1.4400421684536384, + "train_runtime": 10743.5074, + "train_samples_per_second": 52.723, + "train_steps_per_second": 0.206 + } + ], + "logging_steps": 1, + "max_steps": 2215, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.487319842422784e+17, + "train_batch_size": 256, + "trial_name": null, + "trial_params": null +}