{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 2215, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "contrastive_loss": 1.0729, "epoch": 0.002257336343115124, "grad_norm": 74.65375518798828, "learning_rate": 2.0000000000000002e-07, "lm_loss": 18.1331, "loss": 4.5004, "step": 1, "text_contrastive_loss": 3.2285, "train_positive_log_prob": -273.6295, "train_positive_token_accuracy": 0.0003, "train_positive_token_prob": 0.001 }, { "contrastive_loss": 1.1852, "epoch": 0.004514672686230248, "grad_norm": 88.49497985839844, "learning_rate": 4.0000000000000003e-07, "lm_loss": 18.3479, "loss": 4.7987, "step": 2, "text_contrastive_loss": 3.5573, "train_positive_log_prob": -263.4631, "train_positive_token_accuracy": 0.0014, "train_positive_token_prob": 0.0009 }, { "contrastive_loss": 0.7489, "epoch": 0.006772009029345372, "grad_norm": 67.62373352050781, "learning_rate": 6.000000000000001e-07, "lm_loss": 17.6708, "loss": 4.1388, "step": 3, "text_contrastive_loss": 3.2457, "train_positive_log_prob": -267.5312, "train_positive_token_accuracy": 0.0015, "train_positive_token_prob": 0.0015 }, { "contrastive_loss": 0.7492, "epoch": 0.009029345372460496, "grad_norm": 58.398277282714844, "learning_rate": 8.000000000000001e-07, "lm_loss": 18.0811, "loss": 3.9772, "step": 4, "text_contrastive_loss": 2.8399, "train_positive_log_prob": -267.6794, "train_positive_token_accuracy": 0.0005, "train_positive_token_prob": 0.001 }, { "contrastive_loss": 0.8508, "epoch": 0.011286681715575621, "grad_norm": 61.87724304199219, "learning_rate": 1.0000000000000002e-06, "lm_loss": 17.6707, "loss": 4.1976, "step": 5, "text_contrastive_loss": 3.1595, "train_positive_log_prob": -262.9114, "train_positive_token_accuracy": 0.0016, "train_positive_token_prob": 0.0015 }, { "contrastive_loss": 0.9909, "epoch": 0.013544018058690745, "grad_norm": 58.21712875366211, "learning_rate": 1.2000000000000002e-06, "lm_loss": 17.7713, "loss": 4.2961, "step": 6, "text_contrastive_loss": 3.0561, "train_positive_log_prob": -260.9442, "train_positive_token_accuracy": 0.0019, "train_positive_token_prob": 0.0013 }, { "contrastive_loss": 0.8544, "epoch": 0.01580135440180587, "grad_norm": 57.41749954223633, "learning_rate": 1.4000000000000001e-06, "lm_loss": 17.6181, "loss": 3.7124, "step": 7, "text_contrastive_loss": 2.1923, "train_positive_log_prob": -258.7651, "train_positive_token_accuracy": 0.0016, "train_positive_token_prob": 0.0013 }, { "contrastive_loss": 0.7607, "epoch": 0.01805869074492099, "grad_norm": 53.608184814453125, "learning_rate": 1.6000000000000001e-06, "lm_loss": 17.6923, "loss": 3.8436, "step": 8, "text_contrastive_loss": 2.6273, "train_positive_log_prob": -260.3477, "train_positive_token_accuracy": 0.0021, "train_positive_token_prob": 0.0021 }, { "contrastive_loss": 0.841, "epoch": 0.020316027088036117, "grad_norm": 54.720584869384766, "learning_rate": 1.8000000000000001e-06, "lm_loss": 17.152, "loss": 3.8483, "step": 9, "text_contrastive_loss": 2.5842, "train_positive_log_prob": -257.68, "train_positive_token_accuracy": 0.0016, "train_positive_token_prob": 0.0016 }, { "contrastive_loss": 0.8786, "epoch": 0.022573363431151242, "grad_norm": 49.16249084472656, "learning_rate": 2.0000000000000003e-06, "lm_loss": 17.1386, "loss": 3.6662, "step": 10, "text_contrastive_loss": 2.1474, "train_positive_log_prob": -251.052, "train_positive_token_accuracy": 0.0024, "train_positive_token_prob": 0.0023 }, { "contrastive_loss": 0.8012, "epoch": 0.024830699774266364, "grad_norm": 44.05373764038086, "learning_rate": 2.2e-06, "lm_loss": 16.7479, "loss": 3.3581, "step": 11, "text_contrastive_loss": 1.7641, "train_positive_log_prob": -241.9287, "train_positive_token_accuracy": 0.0022, "train_positive_token_prob": 0.002 }, { "contrastive_loss": 0.875, "epoch": 0.02708803611738149, "grad_norm": 44.777740478515625, "learning_rate": 2.4000000000000003e-06, "lm_loss": 16.5121, "loss": 3.5405, "step": 12, "text_contrastive_loss": 2.0286, "train_positive_log_prob": -246.7872, "train_positive_token_accuracy": 0.0026, "train_positive_token_prob": 0.0021 }, { "contrastive_loss": 0.7919, "epoch": 0.029345372460496615, "grad_norm": 47.89744186401367, "learning_rate": 2.6e-06, "lm_loss": 16.6649, "loss": 3.7886, "step": 13, "text_contrastive_loss": 2.6604, "train_positive_log_prob": -248.2189, "train_positive_token_accuracy": 0.0039, "train_positive_token_prob": 0.0024 }, { "contrastive_loss": 0.5726, "epoch": 0.03160270880361174, "grad_norm": 43.6002082824707, "learning_rate": 2.8000000000000003e-06, "lm_loss": 16.1374, "loss": 3.1962, "step": 14, "text_contrastive_loss": 2.0196, "train_positive_log_prob": -243.8907, "train_positive_token_accuracy": 0.0016, "train_positive_token_prob": 0.0019 }, { "contrastive_loss": 0.7991, "epoch": 0.033860045146726865, "grad_norm": 40.39301681518555, "learning_rate": 3e-06, "lm_loss": 15.683, "loss": 3.4338, "step": 15, "text_contrastive_loss": 2.1329, "train_positive_log_prob": -227.1494, "train_positive_token_accuracy": 0.0024, "train_positive_token_prob": 0.0019 }, { "contrastive_loss": 0.8454, "epoch": 0.03611738148984198, "grad_norm": 38.99760055541992, "learning_rate": 3.2000000000000003e-06, "lm_loss": 15.3319, "loss": 3.4371, "step": 16, "text_contrastive_loss": 2.1172, "train_positive_log_prob": -227.2166, "train_positive_token_accuracy": 0.004, "train_positive_token_prob": 0.0023 }, { "contrastive_loss": 0.8265, "epoch": 0.03837471783295711, "grad_norm": 37.754451751708984, "learning_rate": 3.4000000000000005e-06, "lm_loss": 14.9289, "loss": 3.3596, "step": 17, "text_contrastive_loss": 2.0804, "train_positive_log_prob": -220.669, "train_positive_token_accuracy": 0.0032, "train_positive_token_prob": 0.0028 }, { "contrastive_loss": 0.8901, "epoch": 0.040632054176072234, "grad_norm": 36.27416229248047, "learning_rate": 3.6000000000000003e-06, "lm_loss": 14.8213, "loss": 3.3589, "step": 18, "text_contrastive_loss": 1.9733, "train_positive_log_prob": -225.507, "train_positive_token_accuracy": 0.0033, "train_positive_token_prob": 0.003 }, { "contrastive_loss": 0.7757, "epoch": 0.04288939051918736, "grad_norm": 33.69345474243164, "learning_rate": 3.8000000000000005e-06, "lm_loss": 14.3138, "loss": 3.0299, "step": 19, "text_contrastive_loss": 1.6456, "train_positive_log_prob": -206.9811, "train_positive_token_accuracy": 0.0038, "train_positive_token_prob": 0.0027 }, { "contrastive_loss": 0.8273, "epoch": 0.045146726862302484, "grad_norm": 30.30756187438965, "learning_rate": 4.000000000000001e-06, "lm_loss": 13.9409, "loss": 2.9315, "step": 20, "text_contrastive_loss": 1.4203, "train_positive_log_prob": -204.2179, "train_positive_token_accuracy": 0.0053, "train_positive_token_prob": 0.0035 }, { "contrastive_loss": 0.7968, "epoch": 0.04740406320541761, "grad_norm": 27.533042907714844, "learning_rate": 4.2000000000000004e-06, "lm_loss": 13.582, "loss": 2.8958, "step": 21, "text_contrastive_loss": 1.4817, "train_positive_log_prob": -205.3794, "train_positive_token_accuracy": 0.0031, "train_positive_token_prob": 0.0028 }, { "contrastive_loss": 0.8334, "epoch": 0.04966139954853273, "grad_norm": 27.34656524658203, "learning_rate": 4.4e-06, "lm_loss": 13.4126, "loss": 2.9698, "step": 22, "text_contrastive_loss": 1.5904, "train_positive_log_prob": -195.7889, "train_positive_token_accuracy": 0.0075, "train_positive_token_prob": 0.0042 }, { "contrastive_loss": 0.8397, "epoch": 0.05191873589164785, "grad_norm": 26.550691604614258, "learning_rate": 4.600000000000001e-06, "lm_loss": 13.0318, "loss": 2.8192, "step": 23, "text_contrastive_loss": 1.3528, "train_positive_log_prob": -192.6257, "train_positive_token_accuracy": 0.009, "train_positive_token_prob": 0.0044 }, { "contrastive_loss": 0.7736, "epoch": 0.05417607223476298, "grad_norm": 24.160314559936523, "learning_rate": 4.800000000000001e-06, "lm_loss": 12.7815, "loss": 2.7666, "step": 24, "text_contrastive_loss": 1.4298, "train_positive_log_prob": -187.5415, "train_positive_token_accuracy": 0.008, "train_positive_token_prob": 0.0044 }, { "contrastive_loss": 0.7041, "epoch": 0.056433408577878104, "grad_norm": 23.253559112548828, "learning_rate": 5e-06, "lm_loss": 12.3029, "loss": 2.559, "step": 25, "text_contrastive_loss": 1.2492, "train_positive_log_prob": -182.9034, "train_positive_token_accuracy": 0.0126, "train_positive_token_prob": 0.006 }, { "contrastive_loss": 0.7477, "epoch": 0.05869074492099323, "grad_norm": 22.743453979492188, "learning_rate": 5.2e-06, "lm_loss": 11.9511, "loss": 2.5664, "step": 26, "text_contrastive_loss": 1.2472, "train_positive_log_prob": -179.451, "train_positive_token_accuracy": 0.0177, "train_positive_token_prob": 0.0089 }, { "contrastive_loss": 0.8568, "epoch": 0.060948081264108354, "grad_norm": 22.865564346313477, "learning_rate": 5.400000000000001e-06, "lm_loss": 11.7099, "loss": 2.7531, "step": 27, "text_contrastive_loss": 1.4506, "train_positive_log_prob": -176.3327, "train_positive_token_accuracy": 0.0179, "train_positive_token_prob": 0.0087 }, { "contrastive_loss": 0.8545, "epoch": 0.06320541760722348, "grad_norm": 22.831811904907227, "learning_rate": 5.600000000000001e-06, "lm_loss": 11.4956, "loss": 2.6602, "step": 28, "text_contrastive_loss": 1.3122, "train_positive_log_prob": -170.1002, "train_positive_token_accuracy": 0.0251, "train_positive_token_prob": 0.0117 }, { "contrastive_loss": 0.8978, "epoch": 0.0654627539503386, "grad_norm": 24.4145450592041, "learning_rate": 5.8e-06, "lm_loss": 11.0337, "loss": 2.587, "step": 29, "text_contrastive_loss": 1.1715, "train_positive_log_prob": -158.9954, "train_positive_token_accuracy": 0.0339, "train_positive_token_prob": 0.0133 }, { "contrastive_loss": 0.8086, "epoch": 0.06772009029345373, "grad_norm": 22.760570526123047, "learning_rate": 6e-06, "lm_loss": 10.7794, "loss": 2.4137, "step": 30, "text_contrastive_loss": 1.0544, "train_positive_log_prob": -158.916, "train_positive_token_accuracy": 0.0395, "train_positive_token_prob": 0.0142 }, { "contrastive_loss": 0.706, "epoch": 0.06997742663656885, "grad_norm": 18.718631744384766, "learning_rate": 6.200000000000001e-06, "lm_loss": 10.463, "loss": 2.2648, "step": 31, "text_contrastive_loss": 1.025, "train_positive_log_prob": -155.0657, "train_positive_token_accuracy": 0.0464, "train_positive_token_prob": 0.0166 }, { "contrastive_loss": 0.8945, "epoch": 0.07223476297968397, "grad_norm": 21.011608123779297, "learning_rate": 6.4000000000000006e-06, "lm_loss": 10.1752, "loss": 2.4952, "step": 32, "text_contrastive_loss": 1.1663, "train_positive_log_prob": -148.1847, "train_positive_token_accuracy": 0.0488, "train_positive_token_prob": 0.0187 }, { "contrastive_loss": 0.778, "epoch": 0.0744920993227991, "grad_norm": 20.67786407470703, "learning_rate": 6.600000000000001e-06, "lm_loss": 10.0549, "loss": 2.4966, "step": 33, "text_contrastive_loss": 1.4263, "train_positive_log_prob": -149.3017, "train_positive_token_accuracy": 0.0579, "train_positive_token_prob": 0.0213 }, { "contrastive_loss": 0.7866, "epoch": 0.07674943566591422, "grad_norm": 20.956958770751953, "learning_rate": 6.800000000000001e-06, "lm_loss": 9.8113, "loss": 2.3065, "step": 34, "text_contrastive_loss": 1.0776, "train_positive_log_prob": -145.5954, "train_positive_token_accuracy": 0.0663, "train_positive_token_prob": 0.0236 }, { "contrastive_loss": 0.7381, "epoch": 0.07900677200902935, "grad_norm": 20.436098098754883, "learning_rate": 7e-06, "lm_loss": 9.712, "loss": 2.2814, "step": 35, "text_contrastive_loss": 1.1442, "train_positive_log_prob": -146.7038, "train_positive_token_accuracy": 0.0727, "train_positive_token_prob": 0.0256 }, { "contrastive_loss": 0.8602, "epoch": 0.08126410835214447, "grad_norm": 22.360729217529297, "learning_rate": 7.2000000000000005e-06, "lm_loss": 9.438, "loss": 2.3129, "step": 36, "text_contrastive_loss": 1.0178, "train_positive_log_prob": -143.5192, "train_positive_token_accuracy": 0.0753, "train_positive_token_prob": 0.0252 }, { "contrastive_loss": 0.7534, "epoch": 0.0835214446952596, "grad_norm": 20.295902252197266, "learning_rate": 7.4e-06, "lm_loss": 9.2734, "loss": 2.2474, "step": 37, "text_contrastive_loss": 1.1334, "train_positive_log_prob": -134.3203, "train_positive_token_accuracy": 0.0709, "train_positive_token_prob": 0.026 }, { "contrastive_loss": 0.7088, "epoch": 0.08577878103837472, "grad_norm": 19.866477966308594, "learning_rate": 7.600000000000001e-06, "lm_loss": 9.163, "loss": 2.1854, "step": 38, "text_contrastive_loss": 1.1206, "train_positive_log_prob": -140.95, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0253 }, { "contrastive_loss": 0.6501, "epoch": 0.08803611738148984, "grad_norm": 20.59491729736328, "learning_rate": 7.800000000000002e-06, "lm_loss": 9.0296, "loss": 2.0447, "step": 39, "text_contrastive_loss": 0.9833, "train_positive_log_prob": -134.0005, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0268 }, { "contrastive_loss": 0.8234, "epoch": 0.09029345372460497, "grad_norm": 21.421241760253906, "learning_rate": 8.000000000000001e-06, "lm_loss": 8.9082, "loss": 2.2105, "step": 40, "text_contrastive_loss": 0.9924, "train_positive_log_prob": -133.1723, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0266 }, { "contrastive_loss": 0.633, "epoch": 0.09255079006772009, "grad_norm": 18.046871185302734, "learning_rate": 8.2e-06, "lm_loss": 8.7144, "loss": 2.0768, "step": 41, "text_contrastive_loss": 1.1447, "train_positive_log_prob": -129.3175, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0237 }, { "contrastive_loss": 0.7048, "epoch": 0.09480812641083522, "grad_norm": 21.57222557067871, "learning_rate": 8.400000000000001e-06, "lm_loss": 8.6116, "loss": 2.1106, "step": 42, "text_contrastive_loss": 1.0892, "train_positive_log_prob": -128.9057, "train_positive_token_accuracy": 0.083, "train_positive_token_prob": 0.0233 }, { "contrastive_loss": 0.7261, "epoch": 0.09706546275395034, "grad_norm": 23.071239471435547, "learning_rate": 8.6e-06, "lm_loss": 8.5381, "loss": 2.1658, "step": 43, "text_contrastive_loss": 1.1717, "train_positive_log_prob": -124.6441, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.0226 }, { "contrastive_loss": 0.7527, "epoch": 0.09932279909706546, "grad_norm": 22.00257110595703, "learning_rate": 8.8e-06, "lm_loss": 8.5846, "loss": 2.1009, "step": 44, "text_contrastive_loss": 0.9795, "train_positive_log_prob": -125.0441, "train_positive_token_accuracy": 0.0711, "train_positive_token_prob": 0.0205 }, { "contrastive_loss": 0.6884, "epoch": 0.10158013544018059, "grad_norm": 20.627120971679688, "learning_rate": 9e-06, "lm_loss": 8.2954, "loss": 2.0499, "step": 45, "text_contrastive_loss": 1.064, "train_positive_log_prob": -122.1275, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.0205 }, { "contrastive_loss": 0.7249, "epoch": 0.1038374717832957, "grad_norm": 21.081621170043945, "learning_rate": 9.200000000000002e-06, "lm_loss": 8.4546, "loss": 2.0632, "step": 46, "text_contrastive_loss": 0.9856, "train_positive_log_prob": -127.9356, "train_positive_token_accuracy": 0.0687, "train_positive_token_prob": 0.0178 }, { "contrastive_loss": 0.7611, "epoch": 0.10609480812641084, "grad_norm": 20.1016902923584, "learning_rate": 9.4e-06, "lm_loss": 8.3393, "loss": 2.2063, "step": 47, "text_contrastive_loss": 1.2226, "train_positive_log_prob": -123.2022, "train_positive_token_accuracy": 0.0693, "train_positive_token_prob": 0.0176 }, { "contrastive_loss": 0.6507, "epoch": 0.10835214446952596, "grad_norm": 18.013376235961914, "learning_rate": 9.600000000000001e-06, "lm_loss": 8.1474, "loss": 2.0983, "step": 48, "text_contrastive_loss": 1.2658, "train_positive_log_prob": -117.2156, "train_positive_token_accuracy": 0.0728, "train_positive_token_prob": 0.0181 }, { "contrastive_loss": 0.6709, "epoch": 0.11060948081264109, "grad_norm": 19.485240936279297, "learning_rate": 9.800000000000001e-06, "lm_loss": 8.0471, "loss": 1.9798, "step": 49, "text_contrastive_loss": 1.0085, "train_positive_log_prob": -118.3807, "train_positive_token_accuracy": 0.0672, "train_positive_token_prob": 0.0169 }, { "contrastive_loss": 0.718, "epoch": 0.11286681715575621, "grad_norm": 19.43058967590332, "learning_rate": 1e-05, "lm_loss": 8.0583, "loss": 1.9316, "step": 50, "text_contrastive_loss": 0.8156, "train_positive_log_prob": -121.3418, "train_positive_token_accuracy": 0.0672, "train_positive_token_prob": 0.0177 }, { "contrastive_loss": 0.6668, "epoch": 0.11512415349887133, "grad_norm": 18.925294876098633, "learning_rate": 9.999994735903083e-06, "lm_loss": 8.1304, "loss": 2.0567, "step": 51, "text_contrastive_loss": 1.1538, "train_positive_log_prob": -119.5728, "train_positive_token_accuracy": 0.0622, "train_positive_token_prob": 0.0156 }, { "contrastive_loss": 0.7649, "epoch": 0.11738148984198646, "grad_norm": 22.27410316467285, "learning_rate": 9.999978943623417e-06, "lm_loss": 7.9671, "loss": 2.0171, "step": 52, "text_contrastive_loss": 0.911, "train_positive_log_prob": -116.2374, "train_positive_token_accuracy": 0.0651, "train_positive_token_prob": 0.016 }, { "contrastive_loss": 0.6431, "epoch": 0.11963882618510158, "grad_norm": 18.041515350341797, "learning_rate": 9.999952623194252e-06, "lm_loss": 7.9681, "loss": 1.8983, "step": 53, "text_contrastive_loss": 0.9168, "train_positive_log_prob": -117.9658, "train_positive_token_accuracy": 0.0652, "train_positive_token_prob": 0.0155 }, { "contrastive_loss": 0.6277, "epoch": 0.12189616252821671, "grad_norm": 20.22205352783203, "learning_rate": 9.999915774671009e-06, "lm_loss": 7.8363, "loss": 1.9114, "step": 54, "text_contrastive_loss": 1.0003, "train_positive_log_prob": -114.7927, "train_positive_token_accuracy": 0.0659, "train_positive_token_prob": 0.016 }, { "contrastive_loss": 0.7325, "epoch": 0.12415349887133183, "grad_norm": 23.12201499938965, "learning_rate": 9.999868398131282e-06, "lm_loss": 7.8999, "loss": 2.0765, "step": 55, "text_contrastive_loss": 1.108, "train_positive_log_prob": -119.4265, "train_positive_token_accuracy": 0.0669, "train_positive_token_prob": 0.0164 }, { "contrastive_loss": 0.5996, "epoch": 0.12641083521444696, "grad_norm": 18.512279510498047, "learning_rate": 9.999810493674826e-06, "lm_loss": 7.8678, "loss": 1.8542, "step": 56, "text_contrastive_loss": 0.9356, "train_positive_log_prob": -114.8177, "train_positive_token_accuracy": 0.0701, "train_positive_token_prob": 0.0164 }, { "contrastive_loss": 0.7241, "epoch": 0.12866817155756208, "grad_norm": 20.64830780029297, "learning_rate": 9.999742061423567e-06, "lm_loss": 7.7702, "loss": 1.9937, "step": 57, "text_contrastive_loss": 0.9852, "train_positive_log_prob": -115.5182, "train_positive_token_accuracy": 0.0673, "train_positive_token_prob": 0.0163 }, { "contrastive_loss": 0.6388, "epoch": 0.1309255079006772, "grad_norm": 17.783964157104492, "learning_rate": 9.999663101521599e-06, "lm_loss": 7.7735, "loss": 1.9498, "step": 58, "text_contrastive_loss": 1.0673, "train_positive_log_prob": -114.9075, "train_positive_token_accuracy": 0.0632, "train_positive_token_prob": 0.0157 }, { "contrastive_loss": 0.6919, "epoch": 0.13318284424379231, "grad_norm": 19.88160514831543, "learning_rate": 9.999573614135183e-06, "lm_loss": 7.7051, "loss": 1.9881, "step": 59, "text_contrastive_loss": 1.0513, "train_positive_log_prob": -114.5836, "train_positive_token_accuracy": 0.0612, "train_positive_token_prob": 0.0156 }, { "contrastive_loss": 0.6188, "epoch": 0.13544018058690746, "grad_norm": 18.36577796936035, "learning_rate": 9.999473599452746e-06, "lm_loss": 7.6918, "loss": 1.9356, "step": 60, "text_contrastive_loss": 1.0952, "train_positive_log_prob": -115.5601, "train_positive_token_accuracy": 0.0621, "train_positive_token_prob": 0.016 }, { "contrastive_loss": 0.6115, "epoch": 0.13769751693002258, "grad_norm": 19.831995010375977, "learning_rate": 9.999363057684885e-06, "lm_loss": 7.6733, "loss": 1.8999, "step": 61, "text_contrastive_loss": 1.0422, "train_positive_log_prob": -113.1827, "train_positive_token_accuracy": 0.0638, "train_positive_token_prob": 0.0165 }, { "contrastive_loss": 0.6345, "epoch": 0.1399548532731377, "grad_norm": 18.292102813720703, "learning_rate": 9.999241989064358e-06, "lm_loss": 7.5556, "loss": 1.8898, "step": 62, "text_contrastive_loss": 0.9995, "train_positive_log_prob": -111.4475, "train_positive_token_accuracy": 0.0699, "train_positive_token_prob": 0.0165 }, { "contrastive_loss": 0.7301, "epoch": 0.14221218961625282, "grad_norm": 18.254596710205078, "learning_rate": 9.999110393846097e-06, "lm_loss": 7.5002, "loss": 2.1006, "step": 63, "text_contrastive_loss": 1.241, "train_positive_log_prob": -109.9251, "train_positive_token_accuracy": 0.0642, "train_positive_token_prob": 0.0157 }, { "contrastive_loss": 0.6444, "epoch": 0.14446952595936793, "grad_norm": 17.319774627685547, "learning_rate": 9.998968272307187e-06, "lm_loss": 7.4982, "loss": 1.8944, "step": 64, "text_contrastive_loss": 1.0002, "train_positive_log_prob": -111.0935, "train_positive_token_accuracy": 0.0693, "train_positive_token_prob": 0.0168 }, { "contrastive_loss": 0.7149, "epoch": 0.14672686230248308, "grad_norm": 18.645326614379883, "learning_rate": 9.99881562474689e-06, "lm_loss": 7.5473, "loss": 1.9202, "step": 65, "text_contrastive_loss": 0.9012, "train_positive_log_prob": -111.0243, "train_positive_token_accuracy": 0.0685, "train_positive_token_prob": 0.0164 }, { "contrastive_loss": 0.6409, "epoch": 0.1489841986455982, "grad_norm": 18.37657928466797, "learning_rate": 9.998652451486626e-06, "lm_loss": 7.6032, "loss": 1.8893, "step": 66, "text_contrastive_loss": 0.9761, "train_positive_log_prob": -113.0417, "train_positive_token_accuracy": 0.0633, "train_positive_token_prob": 0.0162 }, { "contrastive_loss": 0.7272, "epoch": 0.15124153498871332, "grad_norm": 18.725929260253906, "learning_rate": 9.998478752869976e-06, "lm_loss": 7.4061, "loss": 1.9524, "step": 67, "text_contrastive_loss": 0.9692, "train_positive_log_prob": -110.0508, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.0184 }, { "contrastive_loss": 0.7003, "epoch": 0.15349887133182843, "grad_norm": 19.5133113861084, "learning_rate": 9.998294529262688e-06, "lm_loss": 7.5117, "loss": 1.977, "step": 68, "text_contrastive_loss": 1.0512, "train_positive_log_prob": -109.5966, "train_positive_token_accuracy": 0.0648, "train_positive_token_prob": 0.0175 }, { "contrastive_loss": 0.7785, "epoch": 0.15575620767494355, "grad_norm": 20.068553924560547, "learning_rate": 9.998099781052673e-06, "lm_loss": 7.3481, "loss": 2.0972, "step": 69, "text_contrastive_loss": 1.1678, "train_positive_log_prob": -111.4008, "train_positive_token_accuracy": 0.0657, "train_positive_token_prob": 0.0162 }, { "contrastive_loss": 0.7231, "epoch": 0.1580135440180587, "grad_norm": 22.557743072509766, "learning_rate": 9.997894508649995e-06, "lm_loss": 7.4716, "loss": 1.9544, "step": 70, "text_contrastive_loss": 0.9681, "train_positive_log_prob": -109.6256, "train_positive_token_accuracy": 0.066, "train_positive_token_prob": 0.0163 }, { "contrastive_loss": 0.5091, "epoch": 0.16027088036117382, "grad_norm": 15.837409019470215, "learning_rate": 9.997678712486889e-06, "lm_loss": 7.4683, "loss": 1.6324, "step": 71, "text_contrastive_loss": 0.753, "train_positive_log_prob": -104.9369, "train_positive_token_accuracy": 0.0659, "train_positive_token_prob": 0.0167 }, { "contrastive_loss": 0.6891, "epoch": 0.16252821670428894, "grad_norm": 17.99678611755371, "learning_rate": 9.99745239301774e-06, "lm_loss": 7.3014, "loss": 1.9611, "step": 72, "text_contrastive_loss": 1.0837, "train_positive_log_prob": -108.8716, "train_positive_token_accuracy": 0.0668, "train_positive_token_prob": 0.0174 }, { "contrastive_loss": 0.7978, "epoch": 0.16478555304740405, "grad_norm": 18.621131896972656, "learning_rate": 9.997215550719097e-06, "lm_loss": 7.2734, "loss": 2.1024, "step": 73, "text_contrastive_loss": 1.1546, "train_positive_log_prob": -107.9936, "train_positive_token_accuracy": 0.0742, "train_positive_token_prob": 0.0189 }, { "contrastive_loss": 0.6444, "epoch": 0.1670428893905192, "grad_norm": 19.960073471069336, "learning_rate": 9.996968186089664e-06, "lm_loss": 7.336, "loss": 1.8888, "step": 74, "text_contrastive_loss": 1.0217, "train_positive_log_prob": -108.2594, "train_positive_token_accuracy": 0.0709, "train_positive_token_prob": 0.0172 }, { "contrastive_loss": 0.8172, "epoch": 0.16930022573363432, "grad_norm": 19.790021896362305, "learning_rate": 9.996710299650302e-06, "lm_loss": 7.2372, "loss": 2.0423, "step": 75, "text_contrastive_loss": 1.0028, "train_positive_log_prob": -105.7312, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.018 }, { "contrastive_loss": 0.7105, "epoch": 0.17155756207674944, "grad_norm": 19.25592041015625, "learning_rate": 9.996441891944023e-06, "lm_loss": 7.2776, "loss": 1.9127, "step": 76, "text_contrastive_loss": 0.9489, "train_positive_log_prob": -106.119, "train_positive_token_accuracy": 0.0651, "train_positive_token_prob": 0.0175 }, { "contrastive_loss": 0.7012, "epoch": 0.17381489841986456, "grad_norm": 17.665687561035156, "learning_rate": 9.996162963536004e-06, "lm_loss": 7.2626, "loss": 1.9637, "step": 77, "text_contrastive_loss": 1.0724, "train_positive_log_prob": -106.6461, "train_positive_token_accuracy": 0.0724, "train_positive_token_prob": 0.0179 }, { "contrastive_loss": 0.7044, "epoch": 0.17607223476297967, "grad_norm": 17.54836082458496, "learning_rate": 9.995873515013562e-06, "lm_loss": 7.1187, "loss": 1.9171, "step": 78, "text_contrastive_loss": 1.0017, "train_positive_log_prob": -104.0536, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0188 }, { "contrastive_loss": 0.6873, "epoch": 0.17832957110609482, "grad_norm": 18.3485050201416, "learning_rate": 9.99557354698617e-06, "lm_loss": 7.1606, "loss": 1.8431, "step": 79, "text_contrastive_loss": 0.8796, "train_positive_log_prob": -106.0165, "train_positive_token_accuracy": 0.0654, "train_positive_token_prob": 0.0176 }, { "contrastive_loss": 0.6392, "epoch": 0.18058690744920994, "grad_norm": 20.063926696777344, "learning_rate": 9.995263060085456e-06, "lm_loss": 7.1678, "loss": 1.8971, "step": 80, "text_contrastive_loss": 1.082, "train_positive_log_prob": -108.6669, "train_positive_token_accuracy": 0.0691, "train_positive_token_prob": 0.0189 }, { "contrastive_loss": 0.7355, "epoch": 0.18284424379232506, "grad_norm": 20.3895320892334, "learning_rate": 9.99494205496519e-06, "lm_loss": 7.1325, "loss": 2.0276, "step": 81, "text_contrastive_loss": 1.1576, "train_positive_log_prob": -106.0987, "train_positive_token_accuracy": 0.068, "train_positive_token_prob": 0.0186 }, { "contrastive_loss": 0.6852, "epoch": 0.18510158013544017, "grad_norm": 18.73487663269043, "learning_rate": 9.994610532301296e-06, "lm_loss": 7.1894, "loss": 1.9248, "step": 82, "text_contrastive_loss": 1.0412, "train_positive_log_prob": -107.9551, "train_positive_token_accuracy": 0.0627, "train_positive_token_prob": 0.0176 }, { "contrastive_loss": 0.6669, "epoch": 0.1873589164785553, "grad_norm": 18.10122299194336, "learning_rate": 9.99426849279184e-06, "lm_loss": 7.1451, "loss": 1.9421, "step": 83, "text_contrastive_loss": 1.1214, "train_positive_log_prob": -106.0044, "train_positive_token_accuracy": 0.0661, "train_positive_token_prob": 0.0184 }, { "contrastive_loss": 0.6057, "epoch": 0.18961625282167044, "grad_norm": 18.441043853759766, "learning_rate": 9.993915937157033e-06, "lm_loss": 7.2141, "loss": 1.8116, "step": 84, "text_contrastive_loss": 0.9691, "train_positive_log_prob": -105.2496, "train_positive_token_accuracy": 0.0718, "train_positive_token_prob": 0.0179 }, { "contrastive_loss": 0.8542, "epoch": 0.19187358916478556, "grad_norm": 20.69758415222168, "learning_rate": 9.99355286613923e-06, "lm_loss": 6.9868, "loss": 2.2421, "step": 85, "text_contrastive_loss": 1.3785, "train_positive_log_prob": -103.302, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0193 }, { "contrastive_loss": 0.68, "epoch": 0.19413092550790068, "grad_norm": 20.38166046142578, "learning_rate": 9.993179280502926e-06, "lm_loss": 7.0946, "loss": 1.8517, "step": 86, "text_contrastive_loss": 0.9244, "train_positive_log_prob": -104.6733, "train_positive_token_accuracy": 0.0739, "train_positive_token_prob": 0.0194 }, { "contrastive_loss": 0.7434, "epoch": 0.1963882618510158, "grad_norm": 20.94504165649414, "learning_rate": 9.99279518103476e-06, "lm_loss": 7.1518, "loss": 1.9784, "step": 87, "text_contrastive_loss": 1.0396, "train_positive_log_prob": -103.5898, "train_positive_token_accuracy": 0.0666, "train_positive_token_prob": 0.0183 }, { "contrastive_loss": 0.7131, "epoch": 0.1986455981941309, "grad_norm": 18.84032440185547, "learning_rate": 9.992400568543506e-06, "lm_loss": 6.9985, "loss": 1.9961, "step": 88, "text_contrastive_loss": 1.1664, "train_positive_log_prob": -104.3755, "train_positive_token_accuracy": 0.0817, "train_positive_token_prob": 0.0218 }, { "contrastive_loss": 0.7467, "epoch": 0.20090293453724606, "grad_norm": 18.880041122436523, "learning_rate": 9.991995443860074e-06, "lm_loss": 7.1445, "loss": 1.9219, "step": 89, "text_contrastive_loss": 0.9216, "train_positive_log_prob": -104.6812, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0204 }, { "contrastive_loss": 0.7152, "epoch": 0.20316027088036118, "grad_norm": 18.664297103881836, "learning_rate": 9.991579807837511e-06, "lm_loss": 7.0983, "loss": 1.9426, "step": 90, "text_contrastive_loss": 1.0351, "train_positive_log_prob": -102.8157, "train_positive_token_accuracy": 0.072, "train_positive_token_prob": 0.0195 }, { "contrastive_loss": 0.5356, "epoch": 0.2054176072234763, "grad_norm": 16.25419044494629, "learning_rate": 9.991153661350996e-06, "lm_loss": 7.1611, "loss": 1.6608, "step": 91, "text_contrastive_loss": 0.818, "train_positive_log_prob": -105.3775, "train_positive_token_accuracy": 0.0735, "train_positive_token_prob": 0.0204 }, { "contrastive_loss": 0.6375, "epoch": 0.2076749435665914, "grad_norm": 19.357620239257812, "learning_rate": 9.990717005297841e-06, "lm_loss": 7.0838, "loss": 1.8145, "step": 92, "text_contrastive_loss": 0.9373, "train_positive_log_prob": -101.9163, "train_positive_token_accuracy": 0.0722, "train_positive_token_prob": 0.02 }, { "contrastive_loss": 0.6711, "epoch": 0.20993227990970656, "grad_norm": 16.920473098754883, "learning_rate": 9.990269840597484e-06, "lm_loss": 6.9962, "loss": 1.8757, "step": 93, "text_contrastive_loss": 1.0101, "train_positive_log_prob": -101.5844, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0204 }, { "contrastive_loss": 0.6933, "epoch": 0.21218961625282168, "grad_norm": 18.641815185546875, "learning_rate": 9.989812168191495e-06, "lm_loss": 6.9525, "loss": 1.9171, "step": 94, "text_contrastive_loss": 1.057, "train_positive_log_prob": -102.7659, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0212 }, { "contrastive_loss": 0.7655, "epoch": 0.2144469525959368, "grad_norm": 18.8684024810791, "learning_rate": 9.989343989043563e-06, "lm_loss": 6.8671, "loss": 1.9745, "step": 95, "text_contrastive_loss": 1.0446, "train_positive_log_prob": -100.0049, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0227 }, { "contrastive_loss": 0.8027, "epoch": 0.21670428893905191, "grad_norm": 20.620410919189453, "learning_rate": 9.988865304139509e-06, "lm_loss": 6.9551, "loss": 2.0692, "step": 96, "text_contrastive_loss": 1.1421, "train_positive_log_prob": -104.0833, "train_positive_token_accuracy": 0.0715, "train_positive_token_prob": 0.0212 }, { "contrastive_loss": 0.7118, "epoch": 0.21896162528216703, "grad_norm": 21.403032302856445, "learning_rate": 9.988376114487264e-06, "lm_loss": 7.0753, "loss": 1.9118, "step": 97, "text_contrastive_loss": 0.9851, "train_positive_log_prob": -103.0357, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.0212 }, { "contrastive_loss": 0.5414, "epoch": 0.22121896162528218, "grad_norm": 14.425271034240723, "learning_rate": 9.98787642111689e-06, "lm_loss": 6.9234, "loss": 1.7147, "step": 98, "text_contrastive_loss": 0.9619, "train_positive_log_prob": -102.0397, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.022 }, { "contrastive_loss": 0.826, "epoch": 0.2234762979683973, "grad_norm": 19.397075653076172, "learning_rate": 9.98736622508056e-06, "lm_loss": 6.983, "loss": 2.0691, "step": 99, "text_contrastive_loss": 1.0896, "train_positive_log_prob": -105.7801, "train_positive_token_accuracy": 0.083, "train_positive_token_prob": 0.0229 }, { "contrastive_loss": 0.822, "epoch": 0.22573363431151242, "grad_norm": 24.32610321044922, "learning_rate": 9.98684552745256e-06, "lm_loss": 6.8964, "loss": 1.9765, "step": 100, "text_contrastive_loss": 0.9296, "train_positive_log_prob": -104.2558, "train_positive_token_accuracy": 0.0736, "train_positive_token_prob": 0.0218 }, { "contrastive_loss": 0.6104, "epoch": 0.22799097065462753, "grad_norm": 20.24129867553711, "learning_rate": 9.986314329329294e-06, "lm_loss": 6.8942, "loss": 1.8582, "step": 101, "text_contrastive_loss": 1.1168, "train_positive_log_prob": -103.4058, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.0212 }, { "contrastive_loss": 0.5948, "epoch": 0.23024830699774265, "grad_norm": 21.55072021484375, "learning_rate": 9.985772631829272e-06, "lm_loss": 6.856, "loss": 1.7716, "step": 102, "text_contrastive_loss": 0.9824, "train_positive_log_prob": -101.0808, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0218 }, { "contrastive_loss": 0.6196, "epoch": 0.2325056433408578, "grad_norm": 17.736631393432617, "learning_rate": 9.985220436093112e-06, "lm_loss": 6.7926, "loss": 1.8132, "step": 103, "text_contrastive_loss": 1.0287, "train_positive_log_prob": -101.809, "train_positive_token_accuracy": 0.0722, "train_positive_token_prob": 0.0222 }, { "contrastive_loss": 0.7161, "epoch": 0.23476297968397292, "grad_norm": 21.27935218811035, "learning_rate": 9.984657743283543e-06, "lm_loss": 6.8636, "loss": 1.9482, "step": 104, "text_contrastive_loss": 1.0915, "train_positive_log_prob": -100.2482, "train_positive_token_accuracy": 0.0687, "train_positive_token_prob": 0.0207 }, { "contrastive_loss": 0.6279, "epoch": 0.23702031602708803, "grad_norm": 18.59088134765625, "learning_rate": 9.984084554585387e-06, "lm_loss": 6.7919, "loss": 1.8682, "step": 105, "text_contrastive_loss": 1.1222, "train_positive_log_prob": -100.372, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.023 }, { "contrastive_loss": 0.5217, "epoch": 0.23927765237020315, "grad_norm": 14.964653015136719, "learning_rate": 9.983500871205577e-06, "lm_loss": 6.8479, "loss": 1.6139, "step": 106, "text_contrastive_loss": 0.8147, "train_positive_log_prob": -101.6839, "train_positive_token_accuracy": 0.0642, "train_positive_token_prob": 0.0196 }, { "contrastive_loss": 0.7429, "epoch": 0.24153498871331827, "grad_norm": 21.03154182434082, "learning_rate": 9.982906694373136e-06, "lm_loss": 6.7614, "loss": 1.9369, "step": 107, "text_contrastive_loss": 1.0357, "train_positive_log_prob": -99.8404, "train_positive_token_accuracy": 0.0741, "train_positive_token_prob": 0.0217 }, { "contrastive_loss": 0.6753, "epoch": 0.24379232505643342, "grad_norm": 19.622690200805664, "learning_rate": 9.98230202533919e-06, "lm_loss": 6.8005, "loss": 1.8331, "step": 108, "text_contrastive_loss": 0.9555, "train_positive_log_prob": -98.8159, "train_positive_token_accuracy": 0.0737, "train_positive_token_prob": 0.0207 }, { "contrastive_loss": 0.6735, "epoch": 0.24604966139954854, "grad_norm": 17.51546859741211, "learning_rate": 9.98168686537695e-06, "lm_loss": 6.8147, "loss": 1.9004, "step": 109, "text_contrastive_loss": 1.0909, "train_positive_log_prob": -102.1914, "train_positive_token_accuracy": 0.0617, "train_positive_token_prob": 0.0203 }, { "contrastive_loss": 0.5209, "epoch": 0.24830699774266365, "grad_norm": 20.796781539916992, "learning_rate": 9.98106121578172e-06, "lm_loss": 6.8527, "loss": 1.6981, "step": 110, "text_contrastive_loss": 0.984, "train_positive_log_prob": -99.4987, "train_positive_token_accuracy": 0.0678, "train_positive_token_prob": 0.0199 }, { "contrastive_loss": 0.5796, "epoch": 0.2505643340857788, "grad_norm": 18.110681533813477, "learning_rate": 9.980425077870895e-06, "lm_loss": 6.7838, "loss": 1.7806, "step": 111, "text_contrastive_loss": 1.0451, "train_positive_log_prob": -99.5029, "train_positive_token_accuracy": 0.0671, "train_positive_token_prob": 0.0197 }, { "contrastive_loss": 0.6582, "epoch": 0.2528216704288939, "grad_norm": 18.792470932006836, "learning_rate": 9.979778452983949e-06, "lm_loss": 6.7502, "loss": 1.8141, "step": 112, "text_contrastive_loss": 0.9618, "train_positive_log_prob": -97.1377, "train_positive_token_accuracy": 0.0725, "train_positive_token_prob": 0.0195 }, { "contrastive_loss": 0.6102, "epoch": 0.255079006772009, "grad_norm": 17.86269760131836, "learning_rate": 9.979121342482442e-06, "lm_loss": 6.7461, "loss": 1.7826, "step": 113, "text_contrastive_loss": 0.9956, "train_positive_log_prob": -100.1923, "train_positive_token_accuracy": 0.0655, "train_positive_token_prob": 0.0206 }, { "contrastive_loss": 0.7073, "epoch": 0.25733634311512416, "grad_norm": 20.309282302856445, "learning_rate": 9.978453747750012e-06, "lm_loss": 6.7841, "loss": 1.8668, "step": 114, "text_contrastive_loss": 0.9621, "train_positive_log_prob": -103.271, "train_positive_token_accuracy": 0.0677, "train_positive_token_prob": 0.0204 }, { "contrastive_loss": 0.584, "epoch": 0.2595936794582393, "grad_norm": 15.47262191772461, "learning_rate": 9.977775670192373e-06, "lm_loss": 6.7531, "loss": 1.7331, "step": 115, "text_contrastive_loss": 0.9477, "train_positive_log_prob": -102.5584, "train_positive_token_accuracy": 0.0756, "train_positive_token_prob": 0.0216 }, { "contrastive_loss": 0.7128, "epoch": 0.2618510158013544, "grad_norm": 21.398481369018555, "learning_rate": 9.977087111237307e-06, "lm_loss": 6.7714, "loss": 1.859, "step": 116, "text_contrastive_loss": 0.9381, "train_positive_log_prob": -99.1344, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0211 }, { "contrastive_loss": 0.7079, "epoch": 0.26410835214446954, "grad_norm": 20.449430465698242, "learning_rate": 9.976388072334674e-06, "lm_loss": 6.7875, "loss": 1.8355, "step": 117, "text_contrastive_loss": 0.8977, "train_positive_log_prob": -102.1042, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0217 }, { "contrastive_loss": 0.6718, "epoch": 0.26636568848758463, "grad_norm": 21.4807071685791, "learning_rate": 9.975678554956397e-06, "lm_loss": 6.707, "loss": 1.857, "step": 118, "text_contrastive_loss": 1.0288, "train_positive_log_prob": -99.3987, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.0227 }, { "contrastive_loss": 0.6264, "epoch": 0.2686230248306998, "grad_norm": 19.080724716186523, "learning_rate": 9.974958560596464e-06, "lm_loss": 6.5665, "loss": 1.7351, "step": 119, "text_contrastive_loss": 0.9041, "train_positive_log_prob": -96.908, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0234 }, { "contrastive_loss": 0.623, "epoch": 0.2708803611738149, "grad_norm": 17.793132781982422, "learning_rate": 9.97422809077092e-06, "lm_loss": 6.7075, "loss": 1.8309, "step": 120, "text_contrastive_loss": 1.0742, "train_positive_log_prob": -97.9892, "train_positive_token_accuracy": 0.0711, "train_positive_token_prob": 0.0212 }, { "contrastive_loss": 0.7049, "epoch": 0.27313769751693, "grad_norm": 20.13200569152832, "learning_rate": 9.973487147017874e-06, "lm_loss": 6.6935, "loss": 1.9066, "step": 121, "text_contrastive_loss": 1.0646, "train_positive_log_prob": -99.6141, "train_positive_token_accuracy": 0.0667, "train_positive_token_prob": 0.0219 }, { "contrastive_loss": 0.6301, "epoch": 0.27539503386004516, "grad_norm": 20.30483055114746, "learning_rate": 9.972735730897484e-06, "lm_loss": 6.7637, "loss": 1.7692, "step": 122, "text_contrastive_loss": 0.9255, "train_positive_log_prob": -99.4747, "train_positive_token_accuracy": 0.0696, "train_positive_token_prob": 0.0219 }, { "contrastive_loss": 0.6715, "epoch": 0.27765237020316025, "grad_norm": 18.955608367919922, "learning_rate": 9.97197384399196e-06, "lm_loss": 6.659, "loss": 1.859, "step": 123, "text_contrastive_loss": 1.0431, "train_positive_log_prob": -99.4913, "train_positive_token_accuracy": 0.0729, "train_positive_token_prob": 0.0228 }, { "contrastive_loss": 0.5571, "epoch": 0.2799097065462754, "grad_norm": 18.156646728515625, "learning_rate": 9.971201487905563e-06, "lm_loss": 6.7414, "loss": 1.6083, "step": 124, "text_contrastive_loss": 0.7542, "train_positive_log_prob": -100.8597, "train_positive_token_accuracy": 0.0697, "train_positive_token_prob": 0.0211 }, { "contrastive_loss": 0.5687, "epoch": 0.28216704288939054, "grad_norm": 16.17840003967285, "learning_rate": 9.970418664264596e-06, "lm_loss": 6.7946, "loss": 1.7881, "step": 125, "text_contrastive_loss": 1.0799, "train_positive_log_prob": -100.4893, "train_positive_token_accuracy": 0.0684, "train_positive_token_prob": 0.022 }, { "contrastive_loss": 0.7128, "epoch": 0.28442437923250563, "grad_norm": 20.05196189880371, "learning_rate": 9.969625374717401e-06, "lm_loss": 6.723, "loss": 1.9051, "step": 126, "text_contrastive_loss": 1.04, "train_positive_log_prob": -98.9507, "train_positive_token_accuracy": 0.0693, "train_positive_token_prob": 0.0208 }, { "contrastive_loss": 0.6544, "epoch": 0.2866817155756208, "grad_norm": 20.08206558227539, "learning_rate": 9.96882162093436e-06, "lm_loss": 6.6549, "loss": 1.7353, "step": 127, "text_contrastive_loss": 0.8308, "train_positive_log_prob": -99.0368, "train_positive_token_accuracy": 0.0735, "train_positive_token_prob": 0.0229 }, { "contrastive_loss": 0.6874, "epoch": 0.28893905191873587, "grad_norm": 19.576711654663086, "learning_rate": 9.968007404607887e-06, "lm_loss": 6.6307, "loss": 1.8066, "step": 128, "text_contrastive_loss": 0.9122, "train_positive_log_prob": -99.4894, "train_positive_token_accuracy": 0.0695, "train_positive_token_prob": 0.0223 }, { "contrastive_loss": 0.5249, "epoch": 0.291196388261851, "grad_norm": 16.32411766052246, "learning_rate": 9.96718272745243e-06, "lm_loss": 6.572, "loss": 1.5725, "step": 129, "text_contrastive_loss": 0.7809, "train_positive_log_prob": -94.3693, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0223 }, { "contrastive_loss": 0.603, "epoch": 0.29345372460496616, "grad_norm": 19.72479248046875, "learning_rate": 9.966347591204459e-06, "lm_loss": 6.4371, "loss": 1.6972, "step": 130, "text_contrastive_loss": 0.901, "train_positive_log_prob": -93.3102, "train_positive_token_accuracy": 0.0741, "train_positive_token_prob": 0.023 }, { "contrastive_loss": 0.4895, "epoch": 0.29571106094808125, "grad_norm": 14.938592910766602, "learning_rate": 9.96550199762247e-06, "lm_loss": 6.7028, "loss": 1.6372, "step": 131, "text_contrastive_loss": 0.9549, "train_positive_log_prob": -99.5458, "train_positive_token_accuracy": 0.071, "train_positive_token_prob": 0.0219 }, { "contrastive_loss": 0.6468, "epoch": 0.2979683972911964, "grad_norm": 18.830659866333008, "learning_rate": 9.964645948486978e-06, "lm_loss": 6.5754, "loss": 1.8032, "step": 132, "text_contrastive_loss": 0.9978, "train_positive_log_prob": -96.2181, "train_positive_token_accuracy": 0.0713, "train_positive_token_prob": 0.0222 }, { "contrastive_loss": 0.7476, "epoch": 0.3002257336343115, "grad_norm": 19.2735538482666, "learning_rate": 9.963779445600512e-06, "lm_loss": 6.7231, "loss": 1.9824, "step": 133, "text_contrastive_loss": 1.1248, "train_positive_log_prob": -102.0271, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0223 }, { "contrastive_loss": 0.5515, "epoch": 0.30248306997742663, "grad_norm": 17.205963134765625, "learning_rate": 9.962902490787616e-06, "lm_loss": 6.5939, "loss": 1.7153, "step": 134, "text_contrastive_loss": 1.0088, "train_positive_log_prob": -96.5667, "train_positive_token_accuracy": 0.0686, "train_positive_token_prob": 0.0224 }, { "contrastive_loss": 0.4688, "epoch": 0.3047404063205418, "grad_norm": 16.025466918945312, "learning_rate": 9.962015085894838e-06, "lm_loss": 6.5824, "loss": 1.5918, "step": 135, "text_contrastive_loss": 0.9294, "train_positive_log_prob": -96.3242, "train_positive_token_accuracy": 0.0718, "train_positive_token_prob": 0.023 }, { "contrastive_loss": 0.6728, "epoch": 0.30699774266365687, "grad_norm": 21.674942016601562, "learning_rate": 9.961117232790734e-06, "lm_loss": 6.5266, "loss": 1.8339, "step": 136, "text_contrastive_loss": 1.0169, "train_positive_log_prob": -96.6234, "train_positive_token_accuracy": 0.0734, "train_positive_token_prob": 0.0229 }, { "contrastive_loss": 0.7276, "epoch": 0.309255079006772, "grad_norm": 17.904706954956055, "learning_rate": 9.960208933365857e-06, "lm_loss": 6.4983, "loss": 1.88, "step": 137, "text_contrastive_loss": 1.0052, "train_positive_log_prob": -96.9173, "train_positive_token_accuracy": 0.0712, "train_positive_token_prob": 0.0231 }, { "contrastive_loss": 0.5621, "epoch": 0.3115124153498871, "grad_norm": 17.837366104125977, "learning_rate": 9.959290189532757e-06, "lm_loss": 6.6279, "loss": 1.7579, "step": 138, "text_contrastive_loss": 1.066, "train_positive_log_prob": -98.5638, "train_positive_token_accuracy": 0.0617, "train_positive_token_prob": 0.021 }, { "contrastive_loss": 0.7193, "epoch": 0.31376975169300225, "grad_norm": 18.712196350097656, "learning_rate": 9.958361003225979e-06, "lm_loss": 6.5824, "loss": 1.8925, "step": 139, "text_contrastive_loss": 1.0299, "train_positive_log_prob": -98.5002, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.023 }, { "contrastive_loss": 0.7363, "epoch": 0.3160270880361174, "grad_norm": 18.103960037231445, "learning_rate": 9.957421376402053e-06, "lm_loss": 6.5925, "loss": 1.8567, "step": 140, "text_contrastive_loss": 0.9223, "train_positive_log_prob": -97.703, "train_positive_token_accuracy": 0.0738, "train_positive_token_prob": 0.0235 }, { "contrastive_loss": 0.6386, "epoch": 0.3182844243792325, "grad_norm": 18.02215576171875, "learning_rate": 9.956471311039491e-06, "lm_loss": 6.5221, "loss": 1.7777, "step": 141, "text_contrastive_loss": 0.9737, "train_positive_log_prob": -96.9393, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0252 }, { "contrastive_loss": 0.6095, "epoch": 0.32054176072234764, "grad_norm": 17.972013473510742, "learning_rate": 9.95551080913879e-06, "lm_loss": 6.5494, "loss": 1.7287, "step": 142, "text_contrastive_loss": 0.9285, "train_positive_log_prob": -97.9343, "train_positive_token_accuracy": 0.0656, "train_positive_token_prob": 0.0225 }, { "contrastive_loss": 0.6191, "epoch": 0.3227990970654628, "grad_norm": 16.62250518798828, "learning_rate": 9.954539872722417e-06, "lm_loss": 6.5831, "loss": 1.7514, "step": 143, "text_contrastive_loss": 0.9479, "train_positive_log_prob": -99.0788, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.023 }, { "contrastive_loss": 0.6935, "epoch": 0.32505643340857787, "grad_norm": 18.340702056884766, "learning_rate": 9.953558503834819e-06, "lm_loss": 6.574, "loss": 1.8538, "step": 144, "text_contrastive_loss": 1.0058, "train_positive_log_prob": -99.1228, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0243 }, { "contrastive_loss": 0.7105, "epoch": 0.327313769751693, "grad_norm": 20.077089309692383, "learning_rate": 9.9525667045424e-06, "lm_loss": 6.4291, "loss": 1.8989, "step": 145, "text_contrastive_loss": 1.0909, "train_positive_log_prob": -96.109, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.0242 }, { "contrastive_loss": 0.5658, "epoch": 0.3295711060948081, "grad_norm": 17.12446403503418, "learning_rate": 9.951564476933534e-06, "lm_loss": 6.6439, "loss": 1.6466, "step": 146, "text_contrastive_loss": 0.8327, "train_positive_log_prob": -100.1287, "train_positive_token_accuracy": 0.0734, "train_positive_token_prob": 0.0227 }, { "contrastive_loss": 0.5777, "epoch": 0.33182844243792325, "grad_norm": 18.200572967529297, "learning_rate": 9.950551823118544e-06, "lm_loss": 6.4616, "loss": 1.7331, "step": 147, "text_contrastive_loss": 1.0184, "train_positive_log_prob": -96.6485, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0227 }, { "contrastive_loss": 0.7021, "epoch": 0.3340857787810384, "grad_norm": 16.875293731689453, "learning_rate": 9.949528745229721e-06, "lm_loss": 6.5708, "loss": 1.8853, "step": 148, "text_contrastive_loss": 1.0521, "train_positive_log_prob": -99.7688, "train_positive_token_accuracy": 0.0707, "train_positive_token_prob": 0.0229 }, { "contrastive_loss": 0.6348, "epoch": 0.3363431151241535, "grad_norm": 19.054262161254883, "learning_rate": 9.948495245421294e-06, "lm_loss": 6.4714, "loss": 1.7528, "step": 149, "text_contrastive_loss": 0.9417, "train_positive_log_prob": -95.0963, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.024 }, { "contrastive_loss": 0.6802, "epoch": 0.33860045146726864, "grad_norm": 17.193363189697266, "learning_rate": 9.94745132586944e-06, "lm_loss": 6.5273, "loss": 1.7791, "step": 150, "text_contrastive_loss": 0.8923, "train_positive_log_prob": -99.2347, "train_positive_token_accuracy": 0.0681, "train_positive_token_prob": 0.0226 }, { "contrastive_loss": 0.6213, "epoch": 0.34085778781038373, "grad_norm": 18.566396713256836, "learning_rate": 9.946396988772275e-06, "lm_loss": 6.6128, "loss": 1.7275, "step": 151, "text_contrastive_loss": 0.89, "train_positive_log_prob": -96.8675, "train_positive_token_accuracy": 0.0624, "train_positive_token_prob": 0.0219 }, { "contrastive_loss": 0.5195, "epoch": 0.3431151241534989, "grad_norm": 19.143999099731445, "learning_rate": 9.945332236349857e-06, "lm_loss": 6.5027, "loss": 1.5934, "step": 152, "text_contrastive_loss": 0.8473, "train_positive_log_prob": -98.3293, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0241 }, { "contrastive_loss": 0.7038, "epoch": 0.345372460496614, "grad_norm": 19.382593154907227, "learning_rate": 9.944257070844165e-06, "lm_loss": 6.5004, "loss": 1.833, "step": 153, "text_contrastive_loss": 0.9582, "train_positive_log_prob": -95.6292, "train_positive_token_accuracy": 0.0733, "train_positive_token_prob": 0.0228 }, { "contrastive_loss": 0.5754, "epoch": 0.3476297968397291, "grad_norm": 17.497941970825195, "learning_rate": 9.943171494519111e-06, "lm_loss": 6.4694, "loss": 1.7106, "step": 154, "text_contrastive_loss": 0.9764, "train_positive_log_prob": -94.1347, "train_positive_token_accuracy": 0.0685, "train_positive_token_prob": 0.0227 }, { "contrastive_loss": 0.578, "epoch": 0.34988713318284426, "grad_norm": 19.888530731201172, "learning_rate": 9.942075509660527e-06, "lm_loss": 6.3295, "loss": 1.6232, "step": 155, "text_contrastive_loss": 0.8245, "train_positive_log_prob": -92.8392, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.0241 }, { "contrastive_loss": 0.705, "epoch": 0.35214446952595935, "grad_norm": 18.749338150024414, "learning_rate": 9.94096911857616e-06, "lm_loss": 6.3607, "loss": 1.8029, "step": 156, "text_contrastive_loss": 0.9237, "train_positive_log_prob": -96.1845, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0238 }, { "contrastive_loss": 0.582, "epoch": 0.3544018058690745, "grad_norm": 19.937015533447266, "learning_rate": 9.939852323595671e-06, "lm_loss": 6.3937, "loss": 1.6613, "step": 157, "text_contrastive_loss": 0.8797, "train_positive_log_prob": -94.1576, "train_positive_token_accuracy": 0.0814, "train_positive_token_prob": 0.0243 }, { "contrastive_loss": 0.5762, "epoch": 0.35665914221218964, "grad_norm": 18.210784912109375, "learning_rate": 9.938725127070628e-06, "lm_loss": 6.3963, "loss": 1.7224, "step": 158, "text_contrastive_loss": 1.0131, "train_positive_log_prob": -97.7453, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0243 }, { "contrastive_loss": 0.5866, "epoch": 0.35891647855530473, "grad_norm": 17.754047393798828, "learning_rate": 9.937587531374497e-06, "lm_loss": 6.4707, "loss": 1.7096, "step": 159, "text_contrastive_loss": 0.9519, "train_positive_log_prob": -95.9966, "train_positive_token_accuracy": 0.0729, "train_positive_token_prob": 0.0242 }, { "contrastive_loss": 0.6803, "epoch": 0.3611738148984199, "grad_norm": 20.1284236907959, "learning_rate": 9.936439538902644e-06, "lm_loss": 6.3979, "loss": 1.7264, "step": 160, "text_contrastive_loss": 0.8128, "train_positive_log_prob": -94.7203, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0239 }, { "contrastive_loss": 0.6147, "epoch": 0.36343115124153497, "grad_norm": 18.447181701660156, "learning_rate": 9.935281152072329e-06, "lm_loss": 6.3058, "loss": 1.7724, "step": 161, "text_contrastive_loss": 1.0541, "train_positive_log_prob": -92.9408, "train_positive_token_accuracy": 0.0753, "train_positive_token_prob": 0.0248 }, { "contrastive_loss": 0.6066, "epoch": 0.3656884875846501, "grad_norm": 16.914417266845703, "learning_rate": 9.934112373322695e-06, "lm_loss": 6.3964, "loss": 1.7119, "step": 162, "text_contrastive_loss": 0.9314, "train_positive_log_prob": -94.6963, "train_positive_token_accuracy": 0.0734, "train_positive_token_prob": 0.024 }, { "contrastive_loss": 0.5901, "epoch": 0.36794582392776526, "grad_norm": 17.683534622192383, "learning_rate": 9.932933205114766e-06, "lm_loss": 6.3766, "loss": 1.6508, "step": 163, "text_contrastive_loss": 0.846, "train_positive_log_prob": -95.7006, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.0246 }, { "contrastive_loss": 0.5814, "epoch": 0.37020316027088035, "grad_norm": 17.29725456237793, "learning_rate": 9.931743649931446e-06, "lm_loss": 6.5045, "loss": 1.653, "step": 164, "text_contrastive_loss": 0.8424, "train_positive_log_prob": -96.6768, "train_positive_token_accuracy": 0.0715, "train_positive_token_prob": 0.0226 }, { "contrastive_loss": 0.5415, "epoch": 0.3724604966139955, "grad_norm": 17.485736846923828, "learning_rate": 9.93054371027751e-06, "lm_loss": 6.3399, "loss": 1.6119, "step": 165, "text_contrastive_loss": 0.8727, "train_positive_log_prob": -90.8089, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.025 }, { "contrastive_loss": 0.5808, "epoch": 0.3747178329571106, "grad_norm": 15.62707233428955, "learning_rate": 9.929333388679593e-06, "lm_loss": 6.3657, "loss": 1.6355, "step": 166, "text_contrastive_loss": 0.8362, "train_positive_log_prob": -92.1776, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0239 }, { "contrastive_loss": 0.6004, "epoch": 0.37697516930022573, "grad_norm": 18.117691040039062, "learning_rate": 9.928112687686197e-06, "lm_loss": 6.3642, "loss": 1.6828, "step": 167, "text_contrastive_loss": 0.8919, "train_positive_log_prob": -91.6888, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0243 }, { "contrastive_loss": 0.5473, "epoch": 0.3792325056433409, "grad_norm": 15.383481979370117, "learning_rate": 9.92688160986768e-06, "lm_loss": 6.3471, "loss": 1.6303, "step": 168, "text_contrastive_loss": 0.8967, "train_positive_log_prob": -94.2657, "train_positive_token_accuracy": 0.0689, "train_positive_token_prob": 0.0237 }, { "contrastive_loss": 0.6605, "epoch": 0.38148984198645597, "grad_norm": 20.276687622070312, "learning_rate": 9.925640157816246e-06, "lm_loss": 6.4145, "loss": 1.7597, "step": 169, "text_contrastive_loss": 0.9155, "train_positive_log_prob": -93.8055, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.023 }, { "contrastive_loss": 0.6517, "epoch": 0.3837471783295711, "grad_norm": 18.185070037841797, "learning_rate": 9.924388334145943e-06, "lm_loss": 6.4825, "loss": 1.7708, "step": 170, "text_contrastive_loss": 0.9418, "train_positive_log_prob": -96.2035, "train_positive_token_accuracy": 0.0719, "train_positive_token_prob": 0.0226 }, { "contrastive_loss": 0.5431, "epoch": 0.3860045146726862, "grad_norm": 19.181861877441406, "learning_rate": 9.92312614149266e-06, "lm_loss": 6.3756, "loss": 1.6018, "step": 171, "text_contrastive_loss": 0.8423, "train_positive_log_prob": -93.6186, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0238 }, { "contrastive_loss": 0.5546, "epoch": 0.38826185101580135, "grad_norm": 16.07084083557129, "learning_rate": 9.92185358251412e-06, "lm_loss": 6.2966, "loss": 1.6335, "step": 172, "text_contrastive_loss": 0.8986, "train_positive_log_prob": -91.7924, "train_positive_token_accuracy": 0.0756, "train_positive_token_prob": 0.0237 }, { "contrastive_loss": 0.6237, "epoch": 0.3905191873589165, "grad_norm": 18.402652740478516, "learning_rate": 9.92057065988987e-06, "lm_loss": 6.3649, "loss": 1.6979, "step": 173, "text_contrastive_loss": 0.8754, "train_positive_log_prob": -94.9973, "train_positive_token_accuracy": 0.0699, "train_positive_token_prob": 0.0234 }, { "contrastive_loss": 0.5243, "epoch": 0.3927765237020316, "grad_norm": 16.405948638916016, "learning_rate": 9.919277376321284e-06, "lm_loss": 6.2933, "loss": 1.643, "step": 174, "text_contrastive_loss": 0.9787, "train_positive_log_prob": -93.1227, "train_positive_token_accuracy": 0.0644, "train_positive_token_prob": 0.023 }, { "contrastive_loss": 0.645, "epoch": 0.39503386004514673, "grad_norm": 17.82147789001465, "learning_rate": 9.917973734531549e-06, "lm_loss": 6.3834, "loss": 1.7463, "step": 175, "text_contrastive_loss": 0.9259, "train_positive_log_prob": -93.3786, "train_positive_token_accuracy": 0.0676, "train_positive_token_prob": 0.0233 }, { "contrastive_loss": 0.5635, "epoch": 0.3972911963882618, "grad_norm": 16.221330642700195, "learning_rate": 9.916659737265664e-06, "lm_loss": 6.3933, "loss": 1.6698, "step": 176, "text_contrastive_loss": 0.9339, "train_positive_log_prob": -96.3004, "train_positive_token_accuracy": 0.0752, "train_positive_token_prob": 0.0243 }, { "contrastive_loss": 0.5209, "epoch": 0.39954853273137697, "grad_norm": 16.04338836669922, "learning_rate": 9.915335387290432e-06, "lm_loss": 6.3647, "loss": 1.5876, "step": 177, "text_contrastive_loss": 0.8605, "train_positive_log_prob": -95.7909, "train_positive_token_accuracy": 0.0737, "train_positive_token_prob": 0.0242 }, { "contrastive_loss": 0.6082, "epoch": 0.4018058690744921, "grad_norm": 19.90717315673828, "learning_rate": 9.914000687394457e-06, "lm_loss": 6.5428, "loss": 1.7621, "step": 178, "text_contrastive_loss": 0.9992, "train_positive_log_prob": -99.5424, "train_positive_token_accuracy": 0.0724, "train_positive_token_prob": 0.0235 }, { "contrastive_loss": 0.6049, "epoch": 0.4040632054176072, "grad_norm": 15.602974891662598, "learning_rate": 9.912655640388134e-06, "lm_loss": 6.2935, "loss": 1.644, "step": 179, "text_contrastive_loss": 0.8196, "train_positive_log_prob": -94.5021, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0254 }, { "contrastive_loss": 0.6542, "epoch": 0.40632054176072235, "grad_norm": 20.750207901000977, "learning_rate": 9.911300249103646e-06, "lm_loss": 6.3268, "loss": 1.7814, "step": 180, "text_contrastive_loss": 0.989, "train_positive_log_prob": -94.0351, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0248 }, { "contrastive_loss": 0.6459, "epoch": 0.40857787810383744, "grad_norm": 17.191402435302734, "learning_rate": 9.909934516394957e-06, "lm_loss": 6.2459, "loss": 1.7891, "step": 181, "text_contrastive_loss": 1.0371, "train_positive_log_prob": -92.1278, "train_positive_token_accuracy": 0.0747, "train_positive_token_prob": 0.0253 }, { "contrastive_loss": 0.6721, "epoch": 0.4108352144469526, "grad_norm": 16.632160186767578, "learning_rate": 9.908558445137807e-06, "lm_loss": 6.3747, "loss": 1.7507, "step": 182, "text_contrastive_loss": 0.8822, "train_positive_log_prob": -93.629, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0253 }, { "contrastive_loss": 0.5982, "epoch": 0.41309255079006774, "grad_norm": 17.62737274169922, "learning_rate": 9.907172038229706e-06, "lm_loss": 6.3935, "loss": 1.6699, "step": 183, "text_contrastive_loss": 0.8648, "train_positive_log_prob": -93.3324, "train_positive_token_accuracy": 0.0709, "train_positive_token_prob": 0.0245 }, { "contrastive_loss": 0.6416, "epoch": 0.4153498871331828, "grad_norm": 17.71466636657715, "learning_rate": 9.905775298589923e-06, "lm_loss": 6.353, "loss": 1.7138, "step": 184, "text_contrastive_loss": 0.8738, "train_positive_log_prob": -92.9928, "train_positive_token_accuracy": 0.0707, "train_positive_token_prob": 0.0244 }, { "contrastive_loss": 0.6686, "epoch": 0.417607223476298, "grad_norm": 16.830604553222656, "learning_rate": 9.904368229159494e-06, "lm_loss": 6.2932, "loss": 1.7439, "step": 185, "text_contrastive_loss": 0.8918, "train_positive_log_prob": -93.4648, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0259 }, { "contrastive_loss": 0.5155, "epoch": 0.4198645598194131, "grad_norm": 17.492033004760742, "learning_rate": 9.90295083290119e-06, "lm_loss": 6.3031, "loss": 1.6393, "step": 186, "text_contrastive_loss": 0.9871, "train_positive_log_prob": -93.1214, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0242 }, { "contrastive_loss": 0.6918, "epoch": 0.4221218961625282, "grad_norm": 17.344141006469727, "learning_rate": 9.901523112799543e-06, "lm_loss": 6.2857, "loss": 1.7474, "step": 187, "text_contrastive_loss": 0.8539, "train_positive_log_prob": -93.4994, "train_positive_token_accuracy": 0.0714, "train_positive_token_prob": 0.0236 }, { "contrastive_loss": 0.5633, "epoch": 0.42437923250564336, "grad_norm": 17.498371124267578, "learning_rate": 9.90008507186081e-06, "lm_loss": 6.2037, "loss": 1.6121, "step": 188, "text_contrastive_loss": 0.8569, "train_positive_log_prob": -91.7199, "train_positive_token_accuracy": 0.07, "train_positive_token_prob": 0.0244 }, { "contrastive_loss": 0.5294, "epoch": 0.42663656884875845, "grad_norm": 16.007226943969727, "learning_rate": 9.898636713112992e-06, "lm_loss": 6.3962, "loss": 1.6096, "step": 189, "text_contrastive_loss": 0.8811, "train_positive_log_prob": -93.7179, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0241 }, { "contrastive_loss": 0.6992, "epoch": 0.4288939051918736, "grad_norm": 19.596721649169922, "learning_rate": 9.897178039605803e-06, "lm_loss": 6.2761, "loss": 1.8828, "step": 190, "text_contrastive_loss": 1.1119, "train_positive_log_prob": -90.9802, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0249 }, { "contrastive_loss": 0.5659, "epoch": 0.43115124153498874, "grad_norm": 17.168418884277344, "learning_rate": 9.895709054410686e-06, "lm_loss": 6.3628, "loss": 1.6115, "step": 191, "text_contrastive_loss": 0.8187, "train_positive_log_prob": -97.2303, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0249 }, { "contrastive_loss": 0.6841, "epoch": 0.43340857787810383, "grad_norm": 20.078487396240234, "learning_rate": 9.894229760620793e-06, "lm_loss": 6.2957, "loss": 1.762, "step": 192, "text_contrastive_loss": 0.8967, "train_positive_log_prob": -93.2803, "train_positive_token_accuracy": 0.0712, "train_positive_token_prob": 0.0238 }, { "contrastive_loss": 0.5674, "epoch": 0.435665914221219, "grad_norm": 16.747314453125, "learning_rate": 9.892740161350981e-06, "lm_loss": 6.3721, "loss": 1.6006, "step": 193, "text_contrastive_loss": 0.792, "train_positive_log_prob": -94.6121, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.024 }, { "contrastive_loss": 0.5034, "epoch": 0.43792325056433407, "grad_norm": 16.926101684570312, "learning_rate": 9.891240259737809e-06, "lm_loss": 6.4089, "loss": 1.5184, "step": 194, "text_contrastive_loss": 0.7484, "train_positive_log_prob": -95.6889, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0232 }, { "contrastive_loss": 0.6301, "epoch": 0.4401805869074492, "grad_norm": 16.07118797302246, "learning_rate": 9.889730058939529e-06, "lm_loss": 6.3629, "loss": 1.7402, "step": 195, "text_contrastive_loss": 0.9476, "train_positive_log_prob": -95.1156, "train_positive_token_accuracy": 0.0679, "train_positive_token_prob": 0.0234 }, { "contrastive_loss": 0.6138, "epoch": 0.44243792325056436, "grad_norm": 18.82052230834961, "learning_rate": 9.888209562136074e-06, "lm_loss": 6.2125, "loss": 1.6899, "step": 196, "text_contrastive_loss": 0.9098, "train_positive_log_prob": -92.8738, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.025 }, { "contrastive_loss": 0.5423, "epoch": 0.44469525959367945, "grad_norm": 15.113842010498047, "learning_rate": 9.886678772529069e-06, "lm_loss": 6.1721, "loss": 1.6142, "step": 197, "text_contrastive_loss": 0.9094, "train_positive_log_prob": -92.171, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.0243 }, { "contrastive_loss": 0.5986, "epoch": 0.4469525959367946, "grad_norm": 16.718223571777344, "learning_rate": 9.885137693341795e-06, "lm_loss": 6.3469, "loss": 1.7091, "step": 198, "text_contrastive_loss": 0.9516, "train_positive_log_prob": -94.9289, "train_positive_token_accuracy": 0.07, "train_positive_token_prob": 0.0241 }, { "contrastive_loss": 0.607, "epoch": 0.4492099322799097, "grad_norm": 18.055437088012695, "learning_rate": 9.883586327819214e-06, "lm_loss": 6.2876, "loss": 1.7736, "step": 199, "text_contrastive_loss": 1.0758, "train_positive_log_prob": -93.7248, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0247 }, { "contrastive_loss": 0.6925, "epoch": 0.45146726862302483, "grad_norm": 17.790586471557617, "learning_rate": 9.88202467922794e-06, "lm_loss": 6.3828, "loss": 1.7746, "step": 200, "text_contrastive_loss": 0.8877, "train_positive_log_prob": -93.5735, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0247 }, { "contrastive_loss": 0.5999, "epoch": 0.45372460496614, "grad_norm": 17.330663681030273, "learning_rate": 9.880452750856239e-06, "lm_loss": 6.3503, "loss": 1.7844, "step": 201, "text_contrastive_loss": 1.099, "train_positive_log_prob": -95.7016, "train_positive_token_accuracy": 0.0726, "train_positive_token_prob": 0.0244 }, { "contrastive_loss": 0.5973, "epoch": 0.45598194130925507, "grad_norm": 18.505414962768555, "learning_rate": 9.878870546014025e-06, "lm_loss": 6.3365, "loss": 1.671, "step": 202, "text_contrastive_loss": 0.88, "train_positive_log_prob": -93.5629, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.025 }, { "contrastive_loss": 0.6106, "epoch": 0.4582392776523702, "grad_norm": 18.446922302246094, "learning_rate": 9.877278068032852e-06, "lm_loss": 6.3287, "loss": 1.6306, "step": 203, "text_contrastive_loss": 0.7743, "train_positive_log_prob": -94.0187, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0255 }, { "contrastive_loss": 0.5605, "epoch": 0.4604966139954853, "grad_norm": 17.675628662109375, "learning_rate": 9.875675320265903e-06, "lm_loss": 6.3369, "loss": 1.6047, "step": 204, "text_contrastive_loss": 0.821, "train_positive_log_prob": -95.2803, "train_positive_token_accuracy": 0.0686, "train_positive_token_prob": 0.0246 }, { "contrastive_loss": 0.5259, "epoch": 0.46275395033860045, "grad_norm": 16.840137481689453, "learning_rate": 9.874062306087983e-06, "lm_loss": 6.1988, "loss": 1.5437, "step": 205, "text_contrastive_loss": 0.7958, "train_positive_log_prob": -91.2163, "train_positive_token_accuracy": 0.0815, "train_positive_token_prob": 0.0262 }, { "contrastive_loss": 0.6749, "epoch": 0.4650112866817156, "grad_norm": 19.432479858398438, "learning_rate": 9.872439028895518e-06, "lm_loss": 6.1775, "loss": 1.7606, "step": 206, "text_contrastive_loss": 0.9359, "train_positive_log_prob": -90.0577, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0254 }, { "contrastive_loss": 0.5187, "epoch": 0.4672686230248307, "grad_norm": 17.64603614807129, "learning_rate": 9.870805492106546e-06, "lm_loss": 6.2887, "loss": 1.5682, "step": 207, "text_contrastive_loss": 0.8411, "train_positive_log_prob": -92.9095, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0248 }, { "contrastive_loss": 0.6564, "epoch": 0.46952595936794583, "grad_norm": 17.92214012145996, "learning_rate": 9.869161699160704e-06, "lm_loss": 6.1896, "loss": 1.7195, "step": 208, "text_contrastive_loss": 0.8883, "train_positive_log_prob": -90.647, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0267 }, { "contrastive_loss": 0.5406, "epoch": 0.4717832957110609, "grad_norm": 15.97896671295166, "learning_rate": 9.867507653519225e-06, "lm_loss": 6.222, "loss": 1.5611, "step": 209, "text_contrastive_loss": 0.7966, "train_positive_log_prob": -91.7062, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0267 }, { "contrastive_loss": 0.5576, "epoch": 0.47404063205417607, "grad_norm": 17.953554153442383, "learning_rate": 9.865843358664933e-06, "lm_loss": 6.311, "loss": 1.5939, "step": 210, "text_contrastive_loss": 0.8105, "train_positive_log_prob": -93.7491, "train_positive_token_accuracy": 0.0739, "train_positive_token_prob": 0.025 }, { "contrastive_loss": 0.7181, "epoch": 0.4762979683972912, "grad_norm": 19.0045223236084, "learning_rate": 9.86416881810223e-06, "lm_loss": 6.3363, "loss": 1.8132, "step": 211, "text_contrastive_loss": 0.923, "train_positive_log_prob": -93.8539, "train_positive_token_accuracy": 0.0707, "train_positive_token_prob": 0.0261 }, { "contrastive_loss": 0.4872, "epoch": 0.4785553047404063, "grad_norm": 16.929540634155273, "learning_rate": 9.862484035357095e-06, "lm_loss": 6.2512, "loss": 1.5339, "step": 212, "text_contrastive_loss": 0.8432, "train_positive_log_prob": -92.4767, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0261 }, { "contrastive_loss": 0.6875, "epoch": 0.48081264108352145, "grad_norm": 19.721738815307617, "learning_rate": 9.860789013977074e-06, "lm_loss": 6.1811, "loss": 1.8533, "step": 213, "text_contrastive_loss": 1.0952, "train_positive_log_prob": -93.8491, "train_positive_token_accuracy": 0.0841, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.5638, "epoch": 0.48306997742663654, "grad_norm": 16.685081481933594, "learning_rate": 9.859083757531265e-06, "lm_loss": 6.2524, "loss": 1.6364, "step": 214, "text_contrastive_loss": 0.8948, "train_positive_log_prob": -91.1474, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0259 }, { "contrastive_loss": 0.6151, "epoch": 0.4853273137697517, "grad_norm": 18.529338836669922, "learning_rate": 9.857368269610325e-06, "lm_loss": 6.1617, "loss": 1.6349, "step": 215, "text_contrastive_loss": 0.8073, "train_positive_log_prob": -92.3024, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.6331, "epoch": 0.48758465011286684, "grad_norm": 16.865758895874023, "learning_rate": 9.85564255382645e-06, "lm_loss": 6.2704, "loss": 1.6857, "step": 216, "text_contrastive_loss": 0.8511, "train_positive_log_prob": -93.0253, "train_positive_token_accuracy": 0.074, "train_positive_token_prob": 0.0257 }, { "contrastive_loss": 0.5651, "epoch": 0.4898419864559819, "grad_norm": 16.25713539123535, "learning_rate": 9.853906613813378e-06, "lm_loss": 6.2145, "loss": 1.6537, "step": 217, "text_contrastive_loss": 0.9342, "train_positive_log_prob": -89.8655, "train_positive_token_accuracy": 0.0702, "train_positive_token_prob": 0.0264 }, { "contrastive_loss": 0.653, "epoch": 0.49209932279909707, "grad_norm": 18.443195343017578, "learning_rate": 9.852160453226367e-06, "lm_loss": 6.1917, "loss": 1.7427, "step": 218, "text_contrastive_loss": 0.9409, "train_positive_log_prob": -92.3912, "train_positive_token_accuracy": 0.0728, "train_positive_token_prob": 0.0262 }, { "contrastive_loss": 0.6124, "epoch": 0.49435665914221216, "grad_norm": 19.45539093017578, "learning_rate": 9.850404075742204e-06, "lm_loss": 6.3033, "loss": 1.7178, "step": 219, "text_contrastive_loss": 0.9501, "train_positive_log_prob": -90.8647, "train_positive_token_accuracy": 0.0715, "train_positive_token_prob": 0.0251 }, { "contrastive_loss": 0.6076, "epoch": 0.4966139954853273, "grad_norm": 17.693368911743164, "learning_rate": 9.848637485059183e-06, "lm_loss": 6.0934, "loss": 1.6177, "step": 220, "text_contrastive_loss": 0.8015, "train_positive_log_prob": -88.1391, "train_positive_token_accuracy": 0.0818, "train_positive_token_prob": 0.0275 }, { "contrastive_loss": 0.5889, "epoch": 0.49887133182844245, "grad_norm": 17.95065689086914, "learning_rate": 9.846860684897107e-06, "lm_loss": 6.2495, "loss": 1.6106, "step": 221, "text_contrastive_loss": 0.7935, "train_positive_log_prob": -93.0622, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0261 }, { "contrastive_loss": 0.5507, "epoch": 0.5011286681715575, "grad_norm": 17.545827865600586, "learning_rate": 9.845073678997275e-06, "lm_loss": 6.2366, "loss": 1.5471, "step": 222, "text_contrastive_loss": 0.7453, "train_positive_log_prob": -95.0166, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.0261 }, { "contrastive_loss": 0.4272, "epoch": 0.5033860045146726, "grad_norm": 14.557065963745117, "learning_rate": 9.843276471122473e-06, "lm_loss": 6.3137, "loss": 1.5591, "step": 223, "text_contrastive_loss": 1.0011, "train_positive_log_prob": -94.4131, "train_positive_token_accuracy": 0.0713, "train_positive_token_prob": 0.0244 }, { "contrastive_loss": 0.5857, "epoch": 0.5056433408577878, "grad_norm": 16.679834365844727, "learning_rate": 9.84146906505698e-06, "lm_loss": 6.019, "loss": 1.5642, "step": 224, "text_contrastive_loss": 0.7532, "train_positive_log_prob": -88.7104, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0264 }, { "contrastive_loss": 0.602, "epoch": 0.5079006772009029, "grad_norm": 17.186115264892578, "learning_rate": 9.83965146460653e-06, "lm_loss": 6.2476, "loss": 1.7351, "step": 225, "text_contrastive_loss": 1.0167, "train_positive_log_prob": -93.3259, "train_positive_token_accuracy": 0.0719, "train_positive_token_prob": 0.0254 }, { "contrastive_loss": 0.5749, "epoch": 0.510158013544018, "grad_norm": 17.85243797302246, "learning_rate": 9.83782367359834e-06, "lm_loss": 6.2224, "loss": 1.6575, "step": 226, "text_contrastive_loss": 0.9207, "train_positive_log_prob": -91.0266, "train_positive_token_accuracy": 0.0678, "train_positive_token_prob": 0.0241 }, { "contrastive_loss": 0.5526, "epoch": 0.5124153498871332, "grad_norm": 16.159278869628906, "learning_rate": 9.835985695881076e-06, "lm_loss": 6.2173, "loss": 1.596, "step": 227, "text_contrastive_loss": 0.8432, "train_positive_log_prob": -90.9736, "train_positive_token_accuracy": 0.0747, "train_positive_token_prob": 0.0256 }, { "contrastive_loss": 0.4397, "epoch": 0.5146726862302483, "grad_norm": 15.639877319335938, "learning_rate": 9.834137535324852e-06, "lm_loss": 6.2489, "loss": 1.4753, "step": 228, "text_contrastive_loss": 0.8213, "train_positive_log_prob": -92.6611, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0253 }, { "contrastive_loss": 0.4949, "epoch": 0.5169300225733634, "grad_norm": 15.5403413772583, "learning_rate": 9.83227919582123e-06, "lm_loss": 6.1217, "loss": 1.5301, "step": 229, "text_contrastive_loss": 0.8462, "train_positive_log_prob": -91.2759, "train_positive_token_accuracy": 0.0715, "train_positive_token_prob": 0.0244 }, { "contrastive_loss": 0.4991, "epoch": 0.5191873589164786, "grad_norm": 14.614914894104004, "learning_rate": 9.830410681283203e-06, "lm_loss": 6.2602, "loss": 1.5834, "step": 230, "text_contrastive_loss": 0.9167, "train_positive_log_prob": -91.4541, "train_positive_token_accuracy": 0.0738, "train_positive_token_prob": 0.0249 }, { "contrastive_loss": 0.6095, "epoch": 0.5214446952595937, "grad_norm": 20.50278091430664, "learning_rate": 9.828531995645183e-06, "lm_loss": 6.2406, "loss": 1.6358, "step": 231, "text_contrastive_loss": 0.8044, "train_positive_log_prob": -91.4917, "train_positive_token_accuracy": 0.069, "train_positive_token_prob": 0.0248 }, { "contrastive_loss": 0.6543, "epoch": 0.5237020316027088, "grad_norm": 18.58837127685547, "learning_rate": 9.826643142863006e-06, "lm_loss": 6.2583, "loss": 1.8025, "step": 232, "text_contrastive_loss": 1.0446, "train_positive_log_prob": -92.2403, "train_positive_token_accuracy": 0.084, "train_positive_token_prob": 0.0255 }, { "contrastive_loss": 0.5722, "epoch": 0.5259593679458239, "grad_norm": 17.41712188720703, "learning_rate": 9.824744126913914e-06, "lm_loss": 6.171, "loss": 1.6904, "step": 233, "text_contrastive_loss": 1.0021, "train_positive_log_prob": -87.9141, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0254 }, { "contrastive_loss": 0.6116, "epoch": 0.5282167042889391, "grad_norm": 16.216840744018555, "learning_rate": 9.822834951796547e-06, "lm_loss": 6.0939, "loss": 1.642, "step": 234, "text_contrastive_loss": 0.842, "train_positive_log_prob": -88.2419, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0256 }, { "contrastive_loss": 0.549, "epoch": 0.5304740406320542, "grad_norm": 17.31159782409668, "learning_rate": 9.820915621530939e-06, "lm_loss": 6.2401, "loss": 1.5438, "step": 235, "text_contrastive_loss": 0.7415, "train_positive_log_prob": -90.8246, "train_positive_token_accuracy": 0.0714, "train_positive_token_prob": 0.0249 }, { "contrastive_loss": 0.6312, "epoch": 0.5327313769751693, "grad_norm": 16.976350784301758, "learning_rate": 9.818986140158507e-06, "lm_loss": 6.1592, "loss": 1.6354, "step": 236, "text_contrastive_loss": 0.7766, "train_positive_log_prob": -90.1244, "train_positive_token_accuracy": 0.0707, "train_positive_token_prob": 0.0245 }, { "contrastive_loss": 0.5212, "epoch": 0.5349887133182845, "grad_norm": 15.452056884765625, "learning_rate": 9.817046511742042e-06, "lm_loss": 6.1457, "loss": 1.4973, "step": 237, "text_contrastive_loss": 0.7231, "train_positive_log_prob": -88.4154, "train_positive_token_accuracy": 0.0695, "train_positive_token_prob": 0.024 }, { "contrastive_loss": 0.601, "epoch": 0.5372460496613995, "grad_norm": 16.932661056518555, "learning_rate": 9.815096740365698e-06, "lm_loss": 6.1435, "loss": 1.6303, "step": 238, "text_contrastive_loss": 0.8299, "train_positive_log_prob": -90.5476, "train_positive_token_accuracy": 0.0739, "train_positive_token_prob": 0.0255 }, { "contrastive_loss": 0.5377, "epoch": 0.5395033860045146, "grad_norm": 14.377546310424805, "learning_rate": 9.81313683013499e-06, "lm_loss": 6.2176, "loss": 1.6099, "step": 239, "text_contrastive_loss": 0.9008, "train_positive_log_prob": -90.4694, "train_positive_token_accuracy": 0.0738, "train_positive_token_prob": 0.0251 }, { "contrastive_loss": 0.6264, "epoch": 0.5417607223476298, "grad_norm": 17.758268356323242, "learning_rate": 9.811166785176785e-06, "lm_loss": 6.1442, "loss": 1.7602, "step": 240, "text_contrastive_loss": 1.0388, "train_positive_log_prob": -89.7901, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0243 }, { "contrastive_loss": 0.5626, "epoch": 0.5440180586907449, "grad_norm": 14.423271179199219, "learning_rate": 9.809186609639281e-06, "lm_loss": 6.1467, "loss": 1.4961, "step": 241, "text_contrastive_loss": 0.6376, "train_positive_log_prob": -90.6907, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0271 }, { "contrastive_loss": 0.6618, "epoch": 0.54627539503386, "grad_norm": 18.368301391601562, "learning_rate": 9.807196307692015e-06, "lm_loss": 6.092, "loss": 1.7655, "step": 242, "text_contrastive_loss": 0.9889, "train_positive_log_prob": -89.1187, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.0265 }, { "contrastive_loss": 0.663, "epoch": 0.5485327313769752, "grad_norm": 19.776853561401367, "learning_rate": 9.805195883525844e-06, "lm_loss": 6.246, "loss": 1.7617, "step": 243, "text_contrastive_loss": 0.9481, "train_positive_log_prob": -91.979, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0257 }, { "contrastive_loss": 0.5471, "epoch": 0.5507900677200903, "grad_norm": 15.385061264038086, "learning_rate": 9.803185341352936e-06, "lm_loss": 6.0717, "loss": 1.5648, "step": 244, "text_contrastive_loss": 0.8211, "train_positive_log_prob": -90.7888, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0262 }, { "contrastive_loss": 0.6105, "epoch": 0.5530474040632054, "grad_norm": 19.231651306152344, "learning_rate": 9.80116468540677e-06, "lm_loss": 6.1306, "loss": 1.6456, "step": 245, "text_contrastive_loss": 0.8441, "train_positive_log_prob": -90.2334, "train_positive_token_accuracy": 0.0717, "train_positive_token_prob": 0.0262 }, { "contrastive_loss": 0.5546, "epoch": 0.5553047404063205, "grad_norm": 17.634424209594727, "learning_rate": 9.799133919942117e-06, "lm_loss": 6.2802, "loss": 1.574, "step": 246, "text_contrastive_loss": 0.7826, "train_positive_log_prob": -93.5424, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0257 }, { "contrastive_loss": 0.483, "epoch": 0.5575620767494357, "grad_norm": 17.656702041625977, "learning_rate": 9.797093049235034e-06, "lm_loss": 6.3099, "loss": 1.5714, "step": 247, "text_contrastive_loss": 0.9149, "train_positive_log_prob": -95.4399, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0254 }, { "contrastive_loss": 0.6128, "epoch": 0.5598194130925508, "grad_norm": 18.952295303344727, "learning_rate": 9.795042077582856e-06, "lm_loss": 6.1993, "loss": 1.6999, "step": 248, "text_contrastive_loss": 0.9345, "train_positive_log_prob": -90.1788, "train_positive_token_accuracy": 0.073, "train_positive_token_prob": 0.0262 }, { "contrastive_loss": 0.5971, "epoch": 0.5620767494356659, "grad_norm": 16.89877700805664, "learning_rate": 9.792981009304192e-06, "lm_loss": 6.1549, "loss": 1.6729, "step": 249, "text_contrastive_loss": 0.9206, "train_positive_log_prob": -88.6223, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0272 }, { "contrastive_loss": 0.6054, "epoch": 0.5643340857787811, "grad_norm": 19.338794708251953, "learning_rate": 9.790909848738907e-06, "lm_loss": 6.1171, "loss": 1.74, "step": 250, "text_contrastive_loss": 1.0457, "train_positive_log_prob": -90.9953, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0273 }, { "contrastive_loss": 0.6219, "epoch": 0.5665914221218962, "grad_norm": 18.177352905273438, "learning_rate": 9.788828600248114e-06, "lm_loss": 6.0648, "loss": 1.6477, "step": 251, "text_contrastive_loss": 0.8387, "train_positive_log_prob": -89.4804, "train_positive_token_accuracy": 0.0839, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.5716, "epoch": 0.5688487584650113, "grad_norm": 16.08686637878418, "learning_rate": 9.786737268214172e-06, "lm_loss": 6.1455, "loss": 1.6651, "step": 252, "text_contrastive_loss": 0.9579, "train_positive_log_prob": -91.418, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.5646, "epoch": 0.5711060948081265, "grad_norm": 15.260428428649902, "learning_rate": 9.784635857040672e-06, "lm_loss": 6.0727, "loss": 1.6243, "step": 253, "text_contrastive_loss": 0.9048, "train_positive_log_prob": -88.6699, "train_positive_token_accuracy": 0.0725, "train_positive_token_prob": 0.0265 }, { "contrastive_loss": 0.6481, "epoch": 0.5733634311512416, "grad_norm": 17.00851058959961, "learning_rate": 9.782524371152425e-06, "lm_loss": 6.1111, "loss": 1.7243, "step": 254, "text_contrastive_loss": 0.9302, "train_positive_log_prob": -89.567, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.0263 }, { "contrastive_loss": 0.6073, "epoch": 0.5756207674943566, "grad_norm": 18.875898361206055, "learning_rate": 9.780402814995458e-06, "lm_loss": 6.1165, "loss": 1.6024, "step": 255, "text_contrastive_loss": 0.7667, "train_positive_log_prob": -93.2308, "train_positive_token_accuracy": 0.072, "train_positive_token_prob": 0.0265 }, { "contrastive_loss": 0.505, "epoch": 0.5778781038374717, "grad_norm": 14.658570289611816, "learning_rate": 9.778271193037003e-06, "lm_loss": 6.0952, "loss": 1.6059, "step": 256, "text_contrastive_loss": 0.9828, "train_positive_log_prob": -91.6629, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.5923, "epoch": 0.5801354401805869, "grad_norm": 15.614409446716309, "learning_rate": 9.776129509765487e-06, "lm_loss": 6.1658, "loss": 1.6292, "step": 257, "text_contrastive_loss": 0.8406, "train_positive_log_prob": -92.0102, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0273 }, { "contrastive_loss": 0.7068, "epoch": 0.582392776523702, "grad_norm": 18.77219581604004, "learning_rate": 9.773977769690517e-06, "lm_loss": 6.0314, "loss": 1.7734, "step": 258, "text_contrastive_loss": 0.9269, "train_positive_log_prob": -88.1188, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0262 }, { "contrastive_loss": 0.6451, "epoch": 0.5846501128668171, "grad_norm": 16.3891658782959, "learning_rate": 9.771815977342882e-06, "lm_loss": 6.1078, "loss": 1.7128, "step": 259, "text_contrastive_loss": 0.9138, "train_positive_log_prob": -89.9655, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0262 }, { "contrastive_loss": 0.5838, "epoch": 0.5869074492099323, "grad_norm": 19.314834594726562, "learning_rate": 9.76964413727454e-06, "lm_loss": 6.1401, "loss": 1.6666, "step": 260, "text_contrastive_loss": 0.9377, "train_positive_log_prob": -93.8053, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0273 }, { "contrastive_loss": 0.7249, "epoch": 0.5891647855530474, "grad_norm": 20.733478546142578, "learning_rate": 9.767462254058593e-06, "lm_loss": 6.0405, "loss": 1.8595, "step": 261, "text_contrastive_loss": 1.0611, "train_positive_log_prob": -87.4025, "train_positive_token_accuracy": 0.0724, "train_positive_token_prob": 0.0259 }, { "contrastive_loss": 0.521, "epoch": 0.5914221218961625, "grad_norm": 15.232973098754883, "learning_rate": 9.765270332289307e-06, "lm_loss": 6.1078, "loss": 1.4998, "step": 262, "text_contrastive_loss": 0.736, "train_positive_log_prob": -91.445, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0263 }, { "contrastive_loss": 0.7181, "epoch": 0.5936794582392777, "grad_norm": 17.208232879638672, "learning_rate": 9.763068376582075e-06, "lm_loss": 6.1621, "loss": 1.9106, "step": 263, "text_contrastive_loss": 1.1527, "train_positive_log_prob": -91.2295, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.4552, "epoch": 0.5959367945823928, "grad_norm": 14.609410285949707, "learning_rate": 9.76085639157342e-06, "lm_loss": 6.0441, "loss": 1.4435, "step": 264, "text_contrastive_loss": 0.7678, "train_positive_log_prob": -90.1864, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.5018, "epoch": 0.5981941309255079, "grad_norm": 16.79071617126465, "learning_rate": 9.758634381920982e-06, "lm_loss": 6.1323, "loss": 1.5243, "step": 265, "text_contrastive_loss": 0.8185, "train_positive_log_prob": -91.6752, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0265 }, { "contrastive_loss": 0.6111, "epoch": 0.600451467268623, "grad_norm": 17.34381866455078, "learning_rate": 9.756402352303513e-06, "lm_loss": 6.2033, "loss": 1.7488, "step": 266, "text_contrastive_loss": 1.0348, "train_positive_log_prob": -93.725, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0266 }, { "contrastive_loss": 0.5962, "epoch": 0.6027088036117382, "grad_norm": 16.367290496826172, "learning_rate": 9.754160307420858e-06, "lm_loss": 6.1638, "loss": 1.6855, "step": 267, "text_contrastive_loss": 0.946, "train_positive_log_prob": -90.6276, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0266 }, { "contrastive_loss": 0.6552, "epoch": 0.6049661399548533, "grad_norm": 18.06829261779785, "learning_rate": 9.751908251993956e-06, "lm_loss": 6.1996, "loss": 1.7855, "step": 268, "text_contrastive_loss": 1.0206, "train_positive_log_prob": -91.3954, "train_positive_token_accuracy": 0.0753, "train_positive_token_prob": 0.0264 }, { "contrastive_loss": 0.5985, "epoch": 0.6072234762979684, "grad_norm": 18.242778778076172, "learning_rate": 9.749646190764823e-06, "lm_loss": 6.1059, "loss": 1.7115, "step": 269, "text_contrastive_loss": 1.0049, "train_positive_log_prob": -90.995, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0273 }, { "contrastive_loss": 0.6986, "epoch": 0.6094808126410836, "grad_norm": 19.880321502685547, "learning_rate": 9.747374128496541e-06, "lm_loss": 6.1151, "loss": 1.8399, "step": 270, "text_contrastive_loss": 1.0596, "train_positive_log_prob": -92.3053, "train_positive_token_accuracy": 0.0815, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.4825, "epoch": 0.6117381489841986, "grad_norm": 13.749444007873535, "learning_rate": 9.745092069973254e-06, "lm_loss": 6.0851, "loss": 1.5329, "step": 271, "text_contrastive_loss": 0.8839, "train_positive_log_prob": -90.4711, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0266 }, { "contrastive_loss": 0.4315, "epoch": 0.6139954853273137, "grad_norm": 15.846484184265137, "learning_rate": 9.74280002000015e-06, "lm_loss": 6.0431, "loss": 1.4397, "step": 272, "text_contrastive_loss": 0.8078, "train_positive_log_prob": -88.2874, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0271 }, { "contrastive_loss": 0.6614, "epoch": 0.6162528216704289, "grad_norm": 17.792285919189453, "learning_rate": 9.74049798340346e-06, "lm_loss": 6.0391, "loss": 1.7364, "step": 273, "text_contrastive_loss": 0.9422, "train_positive_log_prob": -91.1986, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.5268, "epoch": 0.618510158013544, "grad_norm": 15.569907188415527, "learning_rate": 9.738185965030444e-06, "lm_loss": 6.1219, "loss": 1.6263, "step": 274, "text_contrastive_loss": 0.9747, "train_positive_log_prob": -91.7319, "train_positive_token_accuracy": 0.0756, "train_positive_token_prob": 0.0256 }, { "contrastive_loss": 0.5985, "epoch": 0.6207674943566591, "grad_norm": 20.254619598388672, "learning_rate": 9.735863969749373e-06, "lm_loss": 6.0733, "loss": 1.6646, "step": 275, "text_contrastive_loss": 0.9175, "train_positive_log_prob": -93.1891, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.526, "epoch": 0.6230248306997742, "grad_norm": 17.3663272857666, "learning_rate": 9.733532002449533e-06, "lm_loss": 6.0813, "loss": 1.4958, "step": 276, "text_contrastive_loss": 0.7232, "train_positive_log_prob": -91.3601, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0277 }, { "contrastive_loss": 0.7095, "epoch": 0.6252821670428894, "grad_norm": 18.9420223236084, "learning_rate": 9.731190068041205e-06, "lm_loss": 5.9635, "loss": 1.7941, "step": 277, "text_contrastive_loss": 0.9764, "train_positive_log_prob": -88.9673, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0269 }, { "contrastive_loss": 0.6617, "epoch": 0.6275395033860045, "grad_norm": 18.74312400817871, "learning_rate": 9.728838171455655e-06, "lm_loss": 6.1322, "loss": 1.7378, "step": 278, "text_contrastive_loss": 0.9256, "train_positive_log_prob": -92.0784, "train_positive_token_accuracy": 0.0739, "train_positive_token_prob": 0.0245 }, { "contrastive_loss": 0.4955, "epoch": 0.6297968397291196, "grad_norm": 16.41619873046875, "learning_rate": 9.72647631764513e-06, "lm_loss": 6.094, "loss": 1.4873, "step": 279, "text_contrastive_loss": 0.7647, "train_positive_log_prob": -88.8882, "train_positive_token_accuracy": 0.0728, "train_positive_token_prob": 0.0257 }, { "contrastive_loss": 0.5533, "epoch": 0.6320541760722348, "grad_norm": 16.351163864135742, "learning_rate": 9.724104511582838e-06, "lm_loss": 6.0338, "loss": 1.5977, "step": 280, "text_contrastive_loss": 0.882, "train_positive_log_prob": -89.4713, "train_positive_token_accuracy": 0.0735, "train_positive_token_prob": 0.0252 }, { "contrastive_loss": 0.683, "epoch": 0.6343115124153499, "grad_norm": 19.409170150756836, "learning_rate": 9.721722758262948e-06, "lm_loss": 6.127, "loss": 1.7248, "step": 281, "text_contrastive_loss": 0.8581, "train_positive_log_prob": -91.4759, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0251 }, { "contrastive_loss": 0.5802, "epoch": 0.636568848758465, "grad_norm": 17.677600860595703, "learning_rate": 9.719331062700572e-06, "lm_loss": 6.1104, "loss": 1.5941, "step": 282, "text_contrastive_loss": 0.8057, "train_positive_log_prob": -90.5352, "train_positive_token_accuracy": 0.0699, "train_positive_token_prob": 0.0262 }, { "contrastive_loss": 0.6348, "epoch": 0.6388261851015802, "grad_norm": 17.54817771911621, "learning_rate": 9.716929429931757e-06, "lm_loss": 6.0538, "loss": 1.7448, "step": 283, "text_contrastive_loss": 1.0093, "train_positive_log_prob": -89.4645, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0249 }, { "contrastive_loss": 0.6313, "epoch": 0.6410835214446953, "grad_norm": 18.95325469970703, "learning_rate": 9.714517865013473e-06, "lm_loss": 6.1678, "loss": 1.6922, "step": 284, "text_contrastive_loss": 0.8882, "train_positive_log_prob": -91.0495, "train_positive_token_accuracy": 0.0728, "train_positive_token_prob": 0.0257 }, { "contrastive_loss": 0.6045, "epoch": 0.6433408577878104, "grad_norm": 18.337867736816406, "learning_rate": 9.712096373023603e-06, "lm_loss": 6.1029, "loss": 1.6479, "step": 285, "text_contrastive_loss": 0.8663, "train_positive_log_prob": -94.3844, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0263 }, { "contrastive_loss": 0.735, "epoch": 0.6455981941309256, "grad_norm": 19.584972381591797, "learning_rate": 9.70966495906094e-06, "lm_loss": 6.0201, "loss": 1.9011, "step": 286, "text_contrastive_loss": 1.1282, "train_positive_log_prob": -89.2677, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0271 }, { "contrastive_loss": 0.5304, "epoch": 0.6478555304740407, "grad_norm": 17.35251235961914, "learning_rate": 9.707223628245157e-06, "lm_loss": 5.9482, "loss": 1.5545, "step": 287, "text_contrastive_loss": 0.8585, "train_positive_log_prob": -88.387, "train_positive_token_accuracy": 0.087, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.5458, "epoch": 0.6501128668171557, "grad_norm": 18.532630920410156, "learning_rate": 9.70477238571682e-06, "lm_loss": 6.1462, "loss": 1.6045, "step": 288, "text_contrastive_loss": 0.8882, "train_positive_log_prob": -91.0414, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0249 }, { "contrastive_loss": 0.7276, "epoch": 0.6523702031602708, "grad_norm": 18.21759605407715, "learning_rate": 9.702311236637357e-06, "lm_loss": 6.0582, "loss": 1.814, "step": 289, "text_contrastive_loss": 0.961, "train_positive_log_prob": -90.4924, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0268 }, { "contrastive_loss": 0.6422, "epoch": 0.654627539503386, "grad_norm": 17.986358642578125, "learning_rate": 9.699840186189061e-06, "lm_loss": 6.0396, "loss": 1.6586, "step": 290, "text_contrastive_loss": 0.8247, "train_positive_log_prob": -91.0663, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0265 }, { "contrastive_loss": 0.5277, "epoch": 0.6568848758465011, "grad_norm": 16.52487564086914, "learning_rate": 9.697359239575069e-06, "lm_loss": 6.1162, "loss": 1.6542, "step": 291, "text_contrastive_loss": 1.0297, "train_positive_log_prob": -93.8718, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0275 }, { "contrastive_loss": 0.5414, "epoch": 0.6591422121896162, "grad_norm": 16.79463005065918, "learning_rate": 9.694868402019362e-06, "lm_loss": 6.0678, "loss": 1.647, "step": 292, "text_contrastive_loss": 0.9975, "train_positive_log_prob": -89.9989, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0255 }, { "contrastive_loss": 0.7381, "epoch": 0.6613995485327314, "grad_norm": 16.14636993408203, "learning_rate": 9.69236767876674e-06, "lm_loss": 5.9773, "loss": 1.8722, "step": 293, "text_contrastive_loss": 1.0728, "train_positive_log_prob": -89.5195, "train_positive_token_accuracy": 0.0715, "train_positive_token_prob": 0.0256 }, { "contrastive_loss": 0.5277, "epoch": 0.6636568848758465, "grad_norm": 14.656967163085938, "learning_rate": 9.689857075082828e-06, "lm_loss": 6.0224, "loss": 1.6384, "step": 294, "text_contrastive_loss": 1.0168, "train_positive_log_prob": -90.3835, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.7446, "epoch": 0.6659142212189616, "grad_norm": 17.57710075378418, "learning_rate": 9.687336596254045e-06, "lm_loss": 6.1203, "loss": 1.8206, "step": 295, "text_contrastive_loss": 0.928, "train_positive_log_prob": -88.887, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.0263 }, { "contrastive_loss": 0.6219, "epoch": 0.6681715575620768, "grad_norm": 16.115657806396484, "learning_rate": 9.68480624758761e-06, "lm_loss": 6.0482, "loss": 1.6568, "step": 296, "text_contrastive_loss": 0.8601, "train_positive_log_prob": -87.8189, "train_positive_token_accuracy": 0.0815, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.5262, "epoch": 0.6704288939051919, "grad_norm": 16.029603958129883, "learning_rate": 9.682266034411527e-06, "lm_loss": 6.0701, "loss": 1.5084, "step": 297, "text_contrastive_loss": 0.7502, "train_positive_log_prob": -89.1563, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0258 }, { "contrastive_loss": 0.5522, "epoch": 0.672686230248307, "grad_norm": 16.295137405395508, "learning_rate": 9.679715962074566e-06, "lm_loss": 6.0496, "loss": 1.6329, "step": 298, "text_contrastive_loss": 0.9515, "train_positive_log_prob": -88.3593, "train_positive_token_accuracy": 0.0717, "train_positive_token_prob": 0.0272 }, { "contrastive_loss": 0.5422, "epoch": 0.6749435665914221, "grad_norm": 15.412567138671875, "learning_rate": 9.677156035946253e-06, "lm_loss": 5.9921, "loss": 1.6354, "step": 299, "text_contrastive_loss": 0.9878, "train_positive_log_prob": -88.6905, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.5602, "epoch": 0.6772009029345373, "grad_norm": 16.239044189453125, "learning_rate": 9.674586261416874e-06, "lm_loss": 6.0446, "loss": 1.5681, "step": 300, "text_contrastive_loss": 0.8069, "train_positive_log_prob": -88.4974, "train_positive_token_accuracy": 0.0782, "train_positive_token_prob": 0.0268 }, { "contrastive_loss": 0.6695, "epoch": 0.6794582392776524, "grad_norm": 17.917858123779297, "learning_rate": 9.672006643897444e-06, "lm_loss": 6.0657, "loss": 1.8314, "step": 301, "text_contrastive_loss": 1.1107, "train_positive_log_prob": -89.0193, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0261 }, { "contrastive_loss": 0.5875, "epoch": 0.6817155756207675, "grad_norm": 15.977974891662598, "learning_rate": 9.669417188819704e-06, "lm_loss": 6.1059, "loss": 1.6711, "step": 302, "text_contrastive_loss": 0.9461, "train_positive_log_prob": -91.2699, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.0269 }, { "contrastive_loss": 0.5363, "epoch": 0.6839729119638827, "grad_norm": 16.47319221496582, "learning_rate": 9.666817901636115e-06, "lm_loss": 6.0319, "loss": 1.6197, "step": 303, "text_contrastive_loss": 0.9603, "train_positive_log_prob": -88.4529, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.5232, "epoch": 0.6862302483069977, "grad_norm": 16.018712997436523, "learning_rate": 9.664208787819833e-06, "lm_loss": 6.0542, "loss": 1.5921, "step": 304, "text_contrastive_loss": 0.9269, "train_positive_log_prob": -88.3527, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0257 }, { "contrastive_loss": 0.5353, "epoch": 0.6884875846501128, "grad_norm": 14.660124778747559, "learning_rate": 9.66158985286471e-06, "lm_loss": 5.9663, "loss": 1.563, "step": 305, "text_contrastive_loss": 0.8621, "train_positive_log_prob": -87.9518, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.5071, "epoch": 0.690744920993228, "grad_norm": 15.332354545593262, "learning_rate": 9.658961102285276e-06, "lm_loss": 5.9832, "loss": 1.5337, "step": 306, "text_contrastive_loss": 0.8565, "train_positive_log_prob": -87.9938, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.6551, "epoch": 0.6930022573363431, "grad_norm": 15.486719131469727, "learning_rate": 9.656322541616734e-06, "lm_loss": 6.0244, "loss": 1.6758, "step": 307, "text_contrastive_loss": 0.8365, "train_positive_log_prob": -91.1411, "train_positive_token_accuracy": 0.0736, "train_positive_token_prob": 0.0268 }, { "contrastive_loss": 0.6736, "epoch": 0.6952595936794582, "grad_norm": 20.303863525390625, "learning_rate": 9.653674176414936e-06, "lm_loss": 6.0129, "loss": 1.7671, "step": 308, "text_contrastive_loss": 0.9845, "train_positive_log_prob": -88.1039, "train_positive_token_accuracy": 0.0744, "train_positive_token_prob": 0.0269 }, { "contrastive_loss": 0.5507, "epoch": 0.6975169300225733, "grad_norm": 15.779223442077637, "learning_rate": 9.651016012256382e-06, "lm_loss": 5.9777, "loss": 1.5803, "step": 309, "text_contrastive_loss": 0.8637, "train_positive_log_prob": -86.7909, "train_positive_token_accuracy": 0.0812, "train_positive_token_prob": 0.0275 }, { "contrastive_loss": 0.6486, "epoch": 0.6997742663656885, "grad_norm": 16.559284210205078, "learning_rate": 9.648348054738208e-06, "lm_loss": 6.1013, "loss": 1.8002, "step": 310, "text_contrastive_loss": 1.0828, "train_positive_log_prob": -88.926, "train_positive_token_accuracy": 0.0708, "train_positive_token_prob": 0.0259 }, { "contrastive_loss": 0.4758, "epoch": 0.7020316027088036, "grad_norm": 15.927628517150879, "learning_rate": 9.64567030947817e-06, "lm_loss": 6.0964, "loss": 1.4774, "step": 311, "text_contrastive_loss": 0.784, "train_positive_log_prob": -90.2778, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.5897, "epoch": 0.7042889390519187, "grad_norm": 16.94893455505371, "learning_rate": 9.642982782114628e-06, "lm_loss": 5.9471, "loss": 1.6554, "step": 312, "text_contrastive_loss": 0.9419, "train_positive_log_prob": -86.047, "train_positive_token_accuracy": 0.0815, "train_positive_token_prob": 0.0269 }, { "contrastive_loss": 0.6094, "epoch": 0.7065462753950339, "grad_norm": 16.972312927246094, "learning_rate": 9.640285478306546e-06, "lm_loss": 5.9373, "loss": 1.7118, "step": 313, "text_contrastive_loss": 1.0174, "train_positive_log_prob": -86.931, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0272 }, { "contrastive_loss": 0.4797, "epoch": 0.708803611738149, "grad_norm": 15.580543518066406, "learning_rate": 9.63757840373347e-06, "lm_loss": 5.9767, "loss": 1.4474, "step": 314, "text_contrastive_loss": 0.7402, "train_positive_log_prob": -86.7767, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.5058, "epoch": 0.7110609480812641, "grad_norm": 17.959449768066406, "learning_rate": 9.634861564095525e-06, "lm_loss": 6.1116, "loss": 1.5566, "step": 315, "text_contrastive_loss": 0.8794, "train_positive_log_prob": -87.5941, "train_positive_token_accuracy": 0.0856, "train_positive_token_prob": 0.0269 }, { "contrastive_loss": 0.6315, "epoch": 0.7133182844243793, "grad_norm": 18.653593063354492, "learning_rate": 9.632134965113389e-06, "lm_loss": 5.8974, "loss": 1.8327, "step": 316, "text_contrastive_loss": 1.2229, "train_positive_log_prob": -88.2529, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0275 }, { "contrastive_loss": 0.6441, "epoch": 0.7155756207674944, "grad_norm": 19.774742126464844, "learning_rate": 9.629398612528299e-06, "lm_loss": 6.1244, "loss": 1.7442, "step": 317, "text_contrastive_loss": 0.9754, "train_positive_log_prob": -93.2565, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.5449, "epoch": 0.7178329571106095, "grad_norm": 15.776238441467285, "learning_rate": 9.626652512102021e-06, "lm_loss": 6.0455, "loss": 1.6211, "step": 318, "text_contrastive_loss": 0.9434, "train_positive_log_prob": -89.6836, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0271 }, { "contrastive_loss": 0.5327, "epoch": 0.7200902934537246, "grad_norm": 15.428654670715332, "learning_rate": 9.623896669616855e-06, "lm_loss": 5.9935, "loss": 1.595, "step": 319, "text_contrastive_loss": 0.926, "train_positive_log_prob": -87.5854, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0268 }, { "contrastive_loss": 0.5538, "epoch": 0.7223476297968398, "grad_norm": 15.515487670898438, "learning_rate": 9.621131090875603e-06, "lm_loss": 5.9955, "loss": 1.5415, "step": 320, "text_contrastive_loss": 0.7762, "train_positive_log_prob": -87.0324, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0271 }, { "contrastive_loss": 0.7214, "epoch": 0.7246049661399548, "grad_norm": 16.490665435791016, "learning_rate": 9.618355781701584e-06, "lm_loss": 5.9352, "loss": 1.719, "step": 321, "text_contrastive_loss": 0.8082, "train_positive_log_prob": -87.7036, "train_positive_token_accuracy": 0.0656, "train_positive_token_prob": 0.0261 }, { "contrastive_loss": 0.6069, "epoch": 0.7268623024830699, "grad_norm": 16.947256088256836, "learning_rate": 9.61557074793859e-06, "lm_loss": 6.0106, "loss": 1.7047, "step": 322, "text_contrastive_loss": 0.9934, "train_positive_log_prob": -87.4139, "train_positive_token_accuracy": 0.0851, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.7181, "epoch": 0.7291196388261851, "grad_norm": 17.712127685546875, "learning_rate": 9.612775995450896e-06, "lm_loss": 6.1349, "loss": 1.8107, "step": 323, "text_contrastive_loss": 0.9581, "train_positive_log_prob": -91.6418, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0269 }, { "contrastive_loss": 0.5646, "epoch": 0.7313769751693002, "grad_norm": 17.118139266967773, "learning_rate": 9.609971530123243e-06, "lm_loss": 5.973, "loss": 1.5571, "step": 324, "text_contrastive_loss": 0.7903, "train_positive_log_prob": -88.3355, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.5351, "epoch": 0.7336343115124153, "grad_norm": 18.682863235473633, "learning_rate": 9.607157357860823e-06, "lm_loss": 6.1304, "loss": 1.6209, "step": 325, "text_contrastive_loss": 0.9455, "train_positive_log_prob": -91.7654, "train_positive_token_accuracy": 0.0812, "train_positive_token_prob": 0.0273 }, { "contrastive_loss": 0.614, "epoch": 0.7358916478555305, "grad_norm": 20.833709716796875, "learning_rate": 9.604333484589266e-06, "lm_loss": 5.931, "loss": 1.6868, "step": 326, "text_contrastive_loss": 0.9595, "train_positive_log_prob": -88.2185, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0274 }, { "contrastive_loss": 0.5644, "epoch": 0.7381489841986456, "grad_norm": 17.01936912536621, "learning_rate": 9.601499916254626e-06, "lm_loss": 6.0155, "loss": 1.6586, "step": 327, "text_contrastive_loss": 0.9854, "train_positive_log_prob": -89.2936, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.5838, "epoch": 0.7404063205417607, "grad_norm": 16.37017822265625, "learning_rate": 9.598656658823378e-06, "lm_loss": 6.0186, "loss": 1.6197, "step": 328, "text_contrastive_loss": 0.8681, "train_positive_log_prob": -87.4516, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0274 }, { "contrastive_loss": 0.4752, "epoch": 0.7426636568848759, "grad_norm": 13.509170532226562, "learning_rate": 9.595803718282391e-06, "lm_loss": 6.0354, "loss": 1.462, "step": 329, "text_contrastive_loss": 0.7667, "train_positive_log_prob": -90.4357, "train_positive_token_accuracy": 0.0696, "train_positive_token_prob": 0.0261 }, { "contrastive_loss": 0.5827, "epoch": 0.744920993227991, "grad_norm": 15.919811248779297, "learning_rate": 9.59294110063893e-06, "lm_loss": 5.9231, "loss": 1.57, "step": 330, "text_contrastive_loss": 0.79, "train_positive_log_prob": -88.336, "train_positive_token_accuracy": 0.0705, "train_positive_token_prob": 0.0271 }, { "contrastive_loss": 0.6355, "epoch": 0.7471783295711061, "grad_norm": 15.94876766204834, "learning_rate": 9.590068811920637e-06, "lm_loss": 6.018, "loss": 1.7106, "step": 331, "text_contrastive_loss": 0.9466, "train_positive_log_prob": -87.8264, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.4436, "epoch": 0.7494356659142212, "grad_norm": 12.93568229675293, "learning_rate": 9.587186858175507e-06, "lm_loss": 5.9285, "loss": 1.4444, "step": 332, "text_contrastive_loss": 0.8159, "train_positive_log_prob": -87.7702, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0273 }, { "contrastive_loss": 0.5154, "epoch": 0.7516930022573364, "grad_norm": 13.687461853027344, "learning_rate": 9.584295245471898e-06, "lm_loss": 6.047, "loss": 1.5184, "step": 333, "text_contrastive_loss": 0.7965, "train_positive_log_prob": -89.6451, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0269 }, { "contrastive_loss": 0.6214, "epoch": 0.7539503386004515, "grad_norm": 16.401796340942383, "learning_rate": 9.581393979898502e-06, "lm_loss": 6.08, "loss": 1.6637, "step": 334, "text_contrastive_loss": 0.8686, "train_positive_log_prob": -90.8207, "train_positive_token_accuracy": 0.0724, "train_positive_token_prob": 0.0262 }, { "contrastive_loss": 0.5947, "epoch": 0.7562076749435666, "grad_norm": 16.64836311340332, "learning_rate": 9.578483067564335e-06, "lm_loss": 5.9331, "loss": 1.6623, "step": 335, "text_contrastive_loss": 0.9486, "train_positive_log_prob": -88.5124, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0264 }, { "contrastive_loss": 0.5481, "epoch": 0.7584650112866818, "grad_norm": 15.983649253845215, "learning_rate": 9.575562514598727e-06, "lm_loss": 6.044, "loss": 1.5863, "step": 336, "text_contrastive_loss": 0.8677, "train_positive_log_prob": -90.2519, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.5021, "epoch": 0.7607223476297968, "grad_norm": 16.104372024536133, "learning_rate": 9.572632327151309e-06, "lm_loss": 5.9966, "loss": 1.4857, "step": 337, "text_contrastive_loss": 0.7679, "train_positive_log_prob": -87.5348, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0267 }, { "contrastive_loss": 0.5592, "epoch": 0.7629796839729119, "grad_norm": 15.639152526855469, "learning_rate": 9.569692511391995e-06, "lm_loss": 6.0392, "loss": 1.6813, "step": 338, "text_contrastive_loss": 1.0363, "train_positive_log_prob": -89.8587, "train_positive_token_accuracy": 0.0738, "train_positive_token_prob": 0.0263 }, { "contrastive_loss": 0.6336, "epoch": 0.7652370203160271, "grad_norm": 18.634450912475586, "learning_rate": 9.566743073510976e-06, "lm_loss": 5.8507, "loss": 1.6356, "step": 339, "text_contrastive_loss": 0.8337, "train_positive_log_prob": -84.6849, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0273 }, { "contrastive_loss": 0.55, "epoch": 0.7674943566591422, "grad_norm": 14.68127727508545, "learning_rate": 9.563784019718704e-06, "lm_loss": 5.979, "loss": 1.543, "step": 340, "text_contrastive_loss": 0.7901, "train_positive_log_prob": -87.0723, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.6238, "epoch": 0.7697516930022573, "grad_norm": 16.066402435302734, "learning_rate": 9.560815356245875e-06, "lm_loss": 5.8746, "loss": 1.6467, "step": 341, "text_contrastive_loss": 0.8709, "train_positive_log_prob": -86.8341, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.5541, "epoch": 0.7720090293453724, "grad_norm": 16.494718551635742, "learning_rate": 9.557837089343424e-06, "lm_loss": 6.0958, "loss": 1.5481, "step": 342, "text_contrastive_loss": 0.7688, "train_positive_log_prob": -92.3658, "train_positive_token_accuracy": 0.0817, "train_positive_token_prob": 0.0269 }, { "contrastive_loss": 0.6157, "epoch": 0.7742663656884876, "grad_norm": 16.388378143310547, "learning_rate": 9.554849225282503e-06, "lm_loss": 6.1625, "loss": 1.6874, "step": 343, "text_contrastive_loss": 0.9109, "train_positive_log_prob": -92.0531, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.6033, "epoch": 0.7765237020316027, "grad_norm": 15.661701202392578, "learning_rate": 9.551851770354477e-06, "lm_loss": 6.1156, "loss": 1.6969, "step": 344, "text_contrastive_loss": 0.964, "train_positive_log_prob": -89.2016, "train_positive_token_accuracy": 0.0715, "train_positive_token_prob": 0.0257 }, { "contrastive_loss": 0.6114, "epoch": 0.7787810383747178, "grad_norm": 19.551776885986328, "learning_rate": 9.548844730870903e-06, "lm_loss": 6.0139, "loss": 1.7044, "step": 345, "text_contrastive_loss": 0.9831, "train_positive_log_prob": -89.7135, "train_positive_token_accuracy": 0.0702, "train_positive_token_prob": 0.0265 }, { "contrastive_loss": 0.6824, "epoch": 0.781038374717833, "grad_norm": 18.047565460205078, "learning_rate": 9.545828113163516e-06, "lm_loss": 6.0352, "loss": 1.7321, "step": 346, "text_contrastive_loss": 0.8924, "train_positive_log_prob": -88.4301, "train_positive_token_accuracy": 0.0733, "train_positive_token_prob": 0.0272 }, { "contrastive_loss": 0.5448, "epoch": 0.7832957110609481, "grad_norm": 16.720369338989258, "learning_rate": 9.542801923584228e-06, "lm_loss": 5.9956, "loss": 1.5758, "step": 347, "text_contrastive_loss": 0.8628, "train_positive_log_prob": -88.8306, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.6701, "epoch": 0.7855530474040632, "grad_norm": 16.758535385131836, "learning_rate": 9.5397661685051e-06, "lm_loss": 5.9736, "loss": 1.7021, "step": 348, "text_contrastive_loss": 0.8693, "train_positive_log_prob": -86.5943, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0271 }, { "contrastive_loss": 0.6332, "epoch": 0.7878103837471784, "grad_norm": 16.43297576904297, "learning_rate": 9.536720854318333e-06, "lm_loss": 6.0055, "loss": 1.7895, "step": 349, "text_contrastive_loss": 1.1115, "train_positive_log_prob": -86.9145, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.026 }, { "contrastive_loss": 0.571, "epoch": 0.7900677200902935, "grad_norm": 16.152088165283203, "learning_rate": 9.533665987436262e-06, "lm_loss": 6.0885, "loss": 1.634, "step": 350, "text_contrastive_loss": 0.9083, "train_positive_log_prob": -91.7586, "train_positive_token_accuracy": 0.0741, "train_positive_token_prob": 0.0257 }, { "contrastive_loss": 0.4889, "epoch": 0.7923250564334086, "grad_norm": 14.552695274353027, "learning_rate": 9.530601574291331e-06, "lm_loss": 5.9661, "loss": 1.4834, "step": 351, "text_contrastive_loss": 0.7957, "train_positive_log_prob": -88.2302, "train_positive_token_accuracy": 0.0681, "train_positive_token_prob": 0.0253 }, { "contrastive_loss": 0.5776, "epoch": 0.7945823927765236, "grad_norm": 16.552778244018555, "learning_rate": 9.527527621336087e-06, "lm_loss": 6.033, "loss": 1.6147, "step": 352, "text_contrastive_loss": 0.8675, "train_positive_log_prob": -90.2359, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0267 }, { "contrastive_loss": 0.5818, "epoch": 0.7968397291196389, "grad_norm": 14.326408386230469, "learning_rate": 9.524444135043168e-06, "lm_loss": 5.9204, "loss": 1.6186, "step": 353, "text_contrastive_loss": 0.8896, "train_positive_log_prob": -86.4452, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.5184, "epoch": 0.7990970654627539, "grad_norm": 15.130330085754395, "learning_rate": 9.521351121905278e-06, "lm_loss": 5.9565, "loss": 1.5699, "step": 354, "text_contrastive_loss": 0.9116, "train_positive_log_prob": -87.9751, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0269 }, { "contrastive_loss": 0.4493, "epoch": 0.801354401805869, "grad_norm": 17.21162223815918, "learning_rate": 9.518248588435185e-06, "lm_loss": 6.1181, "loss": 1.4731, "step": 355, "text_contrastive_loss": 0.8239, "train_positive_log_prob": -90.6486, "train_positive_token_accuracy": 0.0707, "train_positive_token_prob": 0.0263 }, { "contrastive_loss": 0.6361, "epoch": 0.8036117381489842, "grad_norm": 16.533164978027344, "learning_rate": 9.515136541165708e-06, "lm_loss": 5.8704, "loss": 1.6609, "step": 356, "text_contrastive_loss": 0.8754, "train_positive_log_prob": -85.9868, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0267 }, { "contrastive_loss": 0.5051, "epoch": 0.8058690744920993, "grad_norm": 14.658692359924316, "learning_rate": 9.512014986649691e-06, "lm_loss": 5.9747, "loss": 1.5898, "step": 357, "text_contrastive_loss": 0.9744, "train_positive_log_prob": -89.1526, "train_positive_token_accuracy": 0.0717, "train_positive_token_prob": 0.0265 }, { "contrastive_loss": 0.4978, "epoch": 0.8081264108352144, "grad_norm": 17.369964599609375, "learning_rate": 9.50888393146e-06, "lm_loss": 5.9468, "loss": 1.5709, "step": 358, "text_contrastive_loss": 0.9568, "train_positive_log_prob": -85.2053, "train_positive_token_accuracy": 0.0722, "train_positive_token_prob": 0.0271 }, { "contrastive_loss": 0.572, "epoch": 0.8103837471783296, "grad_norm": 16.059120178222656, "learning_rate": 9.50574338218951e-06, "lm_loss": 5.9644, "loss": 1.6031, "step": 359, "text_contrastive_loss": 0.8693, "train_positive_log_prob": -87.2093, "train_positive_token_accuracy": 0.0737, "train_positive_token_prob": 0.0274 }, { "contrastive_loss": 0.6199, "epoch": 0.8126410835214447, "grad_norm": 16.445810317993164, "learning_rate": 9.502593345451078e-06, "lm_loss": 5.9581, "loss": 1.6576, "step": 360, "text_contrastive_loss": 0.8838, "train_positive_log_prob": -88.2559, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0274 }, { "contrastive_loss": 0.4576, "epoch": 0.8148984198645598, "grad_norm": 14.760526657104492, "learning_rate": 9.499433827877547e-06, "lm_loss": 5.9476, "loss": 1.5293, "step": 361, "text_contrastive_loss": 0.9539, "train_positive_log_prob": -84.9882, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.4922, "epoch": 0.8171557562076749, "grad_norm": 16.34233856201172, "learning_rate": 9.49626483612172e-06, "lm_loss": 5.8911, "loss": 1.4942, "step": 362, "text_contrastive_loss": 0.8257, "train_positive_log_prob": -86.315, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.5968, "epoch": 0.8194130925507901, "grad_norm": 18.121335983276367, "learning_rate": 9.493086376856346e-06, "lm_loss": 5.9198, "loss": 1.7071, "step": 363, "text_contrastive_loss": 1.0368, "train_positive_log_prob": -87.2706, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0272 }, { "contrastive_loss": 0.5812, "epoch": 0.8216704288939052, "grad_norm": 17.509723663330078, "learning_rate": 9.489898456774116e-06, "lm_loss": 5.9005, "loss": 1.6531, "step": 364, "text_contrastive_loss": 0.9639, "train_positive_log_prob": -86.044, "train_positive_token_accuracy": 0.0723, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.5617, "epoch": 0.8239277652370203, "grad_norm": 15.32075023651123, "learning_rate": 9.486701082587635e-06, "lm_loss": 5.9199, "loss": 1.5451, "step": 365, "text_contrastive_loss": 0.7828, "train_positive_log_prob": -86.8076, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.5565, "epoch": 0.8261851015801355, "grad_norm": 18.541889190673828, "learning_rate": 9.483494261029418e-06, "lm_loss": 5.8448, "loss": 1.545, "step": 366, "text_contrastive_loss": 0.8081, "train_positive_log_prob": -85.7326, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.665, "epoch": 0.8284424379232506, "grad_norm": 17.35003662109375, "learning_rate": 9.480277998851875e-06, "lm_loss": 5.9089, "loss": 1.7371, "step": 367, "text_contrastive_loss": 0.9623, "train_positive_log_prob": -87.941, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.5716, "epoch": 0.8306997742663657, "grad_norm": 16.31109046936035, "learning_rate": 9.47705230282729e-06, "lm_loss": 5.887, "loss": 1.6179, "step": 368, "text_contrastive_loss": 0.9152, "train_positive_log_prob": -87.7974, "train_positive_token_accuracy": 0.0699, "train_positive_token_prob": 0.0265 }, { "contrastive_loss": 0.5609, "epoch": 0.8329571106094809, "grad_norm": 15.051897048950195, "learning_rate": 9.473817179747815e-06, "lm_loss": 5.9945, "loss": 1.5513, "step": 369, "text_contrastive_loss": 0.782, "train_positive_log_prob": -88.5668, "train_positive_token_accuracy": 0.064, "train_positive_token_prob": 0.0261 }, { "contrastive_loss": 0.7267, "epoch": 0.835214446952596, "grad_norm": 17.49429702758789, "learning_rate": 9.470572636425451e-06, "lm_loss": 6.0031, "loss": 1.8776, "step": 370, "text_contrastive_loss": 1.1012, "train_positive_log_prob": -89.394, "train_positive_token_accuracy": 0.0832, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.5684, "epoch": 0.837471783295711, "grad_norm": 17.741493225097656, "learning_rate": 9.467318679692031e-06, "lm_loss": 5.854, "loss": 1.6599, "step": 371, "text_contrastive_loss": 1.0122, "train_positive_log_prob": -87.42, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.4824, "epoch": 0.8397291196388262, "grad_norm": 15.962331771850586, "learning_rate": 9.464055316399217e-06, "lm_loss": 5.934, "loss": 1.489, "step": 372, "text_contrastive_loss": 0.8265, "train_positive_log_prob": -88.3151, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0274 }, { "contrastive_loss": 0.5473, "epoch": 0.8419864559819413, "grad_norm": 16.54502296447754, "learning_rate": 9.46078255341847e-06, "lm_loss": 5.919, "loss": 1.5184, "step": 373, "text_contrastive_loss": 0.7583, "train_positive_log_prob": -86.8454, "train_positive_token_accuracy": 0.0892, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.5836, "epoch": 0.8442437923250564, "grad_norm": 15.227729797363281, "learning_rate": 9.457500397641049e-06, "lm_loss": 5.9626, "loss": 1.6041, "step": 374, "text_contrastive_loss": 0.8483, "train_positive_log_prob": -90.3974, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.5116, "epoch": 0.8465011286681715, "grad_norm": 16.31110191345215, "learning_rate": 9.454208855977986e-06, "lm_loss": 5.8935, "loss": 1.5011, "step": 375, "text_contrastive_loss": 0.8003, "train_positive_log_prob": -86.6806, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.476, "epoch": 0.8487584650112867, "grad_norm": 16.87764549255371, "learning_rate": 9.450907935360081e-06, "lm_loss": 5.7913, "loss": 1.5026, "step": 376, "text_contrastive_loss": 0.8949, "train_positive_log_prob": -85.0093, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.6421, "epoch": 0.8510158013544018, "grad_norm": 16.616607666015625, "learning_rate": 9.447597642737878e-06, "lm_loss": 6.0081, "loss": 1.7121, "step": 377, "text_contrastive_loss": 0.9384, "train_positive_log_prob": -90.167, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0274 }, { "contrastive_loss": 0.5528, "epoch": 0.8532731376975169, "grad_norm": 17.461790084838867, "learning_rate": 9.44427798508166e-06, "lm_loss": 5.9656, "loss": 1.5774, "step": 378, "text_contrastive_loss": 0.8561, "train_positive_log_prob": -86.3173, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0269 }, { "contrastive_loss": 0.5893, "epoch": 0.8555304740406321, "grad_norm": 16.50901985168457, "learning_rate": 9.440948969381425e-06, "lm_loss": 5.8756, "loss": 1.6351, "step": 379, "text_contrastive_loss": 0.9163, "train_positive_log_prob": -87.2872, "train_positive_token_accuracy": 0.086, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.5734, "epoch": 0.8577878103837472, "grad_norm": 15.030078887939453, "learning_rate": 9.437610602646878e-06, "lm_loss": 5.9427, "loss": 1.565, "step": 380, "text_contrastive_loss": 0.7946, "train_positive_log_prob": -88.9976, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0272 }, { "contrastive_loss": 0.5823, "epoch": 0.8600451467268623, "grad_norm": 14.964461326599121, "learning_rate": 9.434262891907413e-06, "lm_loss": 5.9993, "loss": 1.5932, "step": 381, "text_contrastive_loss": 0.8221, "train_positive_log_prob": -88.2041, "train_positive_token_accuracy": 0.0731, "train_positive_token_prob": 0.0265 }, { "contrastive_loss": 0.5695, "epoch": 0.8623024830699775, "grad_norm": 16.77033042907715, "learning_rate": 9.430905844212102e-06, "lm_loss": 5.9179, "loss": 1.6473, "step": 382, "text_contrastive_loss": 0.9719, "train_positive_log_prob": -86.2016, "train_positive_token_accuracy": 0.0737, "train_positive_token_prob": 0.0268 }, { "contrastive_loss": 0.6778, "epoch": 0.8645598194130926, "grad_norm": 19.320667266845703, "learning_rate": 9.427539466629672e-06, "lm_loss": 5.8167, "loss": 1.6839, "step": 383, "text_contrastive_loss": 0.8489, "train_positive_log_prob": -86.042, "train_positive_token_accuracy": 0.0832, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.4745, "epoch": 0.8668171557562077, "grad_norm": 15.715731620788574, "learning_rate": 9.424163766248499e-06, "lm_loss": 5.9824, "loss": 1.4254, "step": 384, "text_contrastive_loss": 0.7053, "train_positive_log_prob": -89.9199, "train_positive_token_accuracy": 0.085, "train_positive_token_prob": 0.0269 }, { "contrastive_loss": 0.6555, "epoch": 0.8690744920993227, "grad_norm": 17.485872268676758, "learning_rate": 9.420778750176588e-06, "lm_loss": 5.8326, "loss": 1.6989, "step": 385, "text_contrastive_loss": 0.9202, "train_positive_log_prob": -85.9371, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0261 }, { "contrastive_loss": 0.6277, "epoch": 0.871331828442438, "grad_norm": 17.302583694458008, "learning_rate": 9.41738442554156e-06, "lm_loss": 5.941, "loss": 1.6261, "step": 386, "text_contrastive_loss": 0.8084, "train_positive_log_prob": -89.8349, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.6555, "epoch": 0.873589164785553, "grad_norm": 16.820449829101562, "learning_rate": 9.41398079949064e-06, "lm_loss": 5.7898, "loss": 1.6802, "step": 387, "text_contrastive_loss": 0.8915, "train_positive_log_prob": -83.5024, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0273 }, { "contrastive_loss": 0.6577, "epoch": 0.8758465011286681, "grad_norm": 16.29709243774414, "learning_rate": 9.41056787919063e-06, "lm_loss": 5.9397, "loss": 1.6741, "step": 388, "text_contrastive_loss": 0.8449, "train_positive_log_prob": -88.4692, "train_positive_token_accuracy": 0.074, "train_positive_token_prob": 0.0272 }, { "contrastive_loss": 0.5398, "epoch": 0.8781038374717833, "grad_norm": 14.739457130432129, "learning_rate": 9.407145671827909e-06, "lm_loss": 5.8599, "loss": 1.5127, "step": 389, "text_contrastive_loss": 0.7739, "train_positive_log_prob": -87.0304, "train_positive_token_accuracy": 0.0818, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.512, "epoch": 0.8803611738148984, "grad_norm": 16.041366577148438, "learning_rate": 9.403714184608411e-06, "lm_loss": 5.8536, "loss": 1.5133, "step": 390, "text_contrastive_loss": 0.8318, "train_positive_log_prob": -87.7598, "train_positive_token_accuracy": 0.0844, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.6058, "epoch": 0.8826185101580135, "grad_norm": 15.920665740966797, "learning_rate": 9.400273424757607e-06, "lm_loss": 5.7786, "loss": 1.6172, "step": 391, "text_contrastive_loss": 0.867, "train_positive_log_prob": -86.4933, "train_positive_token_accuracy": 0.0864, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.552, "epoch": 0.8848758465011287, "grad_norm": 15.671189308166504, "learning_rate": 9.396823399520495e-06, "lm_loss": 5.9443, "loss": 1.6113, "step": 392, "text_contrastive_loss": 0.9297, "train_positive_log_prob": -89.9789, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0267 }, { "contrastive_loss": 0.6316, "epoch": 0.8871331828442438, "grad_norm": 18.04585075378418, "learning_rate": 9.393364116161582e-06, "lm_loss": 5.981, "loss": 1.7371, "step": 393, "text_contrastive_loss": 1.0147, "train_positive_log_prob": -87.4024, "train_positive_token_accuracy": 0.0692, "train_positive_token_prob": 0.0264 }, { "contrastive_loss": 0.5855, "epoch": 0.8893905191873589, "grad_norm": 16.53270149230957, "learning_rate": 9.38989558196487e-06, "lm_loss": 5.8239, "loss": 1.645, "step": 394, "text_contrastive_loss": 0.9542, "train_positive_log_prob": -85.8799, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.6909, "epoch": 0.891647855530474, "grad_norm": 18.113792419433594, "learning_rate": 9.386417804233836e-06, "lm_loss": 5.9493, "loss": 1.6901, "step": 395, "text_contrastive_loss": 0.8086, "train_positive_log_prob": -88.4249, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0268 }, { "contrastive_loss": 0.6344, "epoch": 0.8939051918735892, "grad_norm": 16.652870178222656, "learning_rate": 9.382930790291426e-06, "lm_loss": 5.9354, "loss": 1.6869, "step": 396, "text_contrastive_loss": 0.9179, "train_positive_log_prob": -88.9619, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.4617, "epoch": 0.8961625282167043, "grad_norm": 12.797913551330566, "learning_rate": 9.37943454748003e-06, "lm_loss": 6.0363, "loss": 1.4401, "step": 397, "text_contrastive_loss": 0.7496, "train_positive_log_prob": -89.4817, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0267 }, { "contrastive_loss": 0.5744, "epoch": 0.8984198645598194, "grad_norm": 16.787803649902344, "learning_rate": 9.375929083161475e-06, "lm_loss": 5.9057, "loss": 1.6225, "step": 398, "text_contrastive_loss": 0.9151, "train_positive_log_prob": -86.6751, "train_positive_token_accuracy": 0.0719, "train_positive_token_prob": 0.0268 }, { "contrastive_loss": 0.5262, "epoch": 0.9006772009029346, "grad_norm": 16.495084762573242, "learning_rate": 9.372414404717001e-06, "lm_loss": 5.7736, "loss": 1.5032, "step": 399, "text_contrastive_loss": 0.7992, "train_positive_log_prob": -85.8852, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.5468, "epoch": 0.9029345372460497, "grad_norm": 16.830516815185547, "learning_rate": 9.36889051954725e-06, "lm_loss": 5.884, "loss": 1.5455, "step": 400, "text_contrastive_loss": 0.8206, "train_positive_log_prob": -85.6623, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.6198, "epoch": 0.9051918735891648, "grad_norm": 17.817441940307617, "learning_rate": 9.365357435072255e-06, "lm_loss": 5.8643, "loss": 1.6149, "step": 401, "text_contrastive_loss": 0.8172, "train_positive_log_prob": -88.1916, "train_positive_token_accuracy": 0.086, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.5756, "epoch": 0.90744920993228, "grad_norm": 16.032751083374023, "learning_rate": 9.361815158731413e-06, "lm_loss": 5.9592, "loss": 1.6194, "step": 402, "text_contrastive_loss": 0.8957, "train_positive_log_prob": -88.2699, "train_positive_token_accuracy": 0.0752, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.6894, "epoch": 0.909706546275395, "grad_norm": 20.829038619995117, "learning_rate": 9.358263697983479e-06, "lm_loss": 5.8097, "loss": 1.7285, "step": 403, "text_contrastive_loss": 0.9163, "train_positive_log_prob": -85.4369, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.5692, "epoch": 0.9119638826185101, "grad_norm": 17.53789520263672, "learning_rate": 9.354703060306546e-06, "lm_loss": 5.8393, "loss": 1.6301, "step": 404, "text_contrastive_loss": 0.954, "train_positive_log_prob": -84.8315, "train_positive_token_accuracy": 0.0742, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.5173, "epoch": 0.9142212189616253, "grad_norm": 18.327421188354492, "learning_rate": 9.351133253198027e-06, "lm_loss": 5.8648, "loss": 1.5791, "step": 405, "text_contrastive_loss": 0.9506, "train_positive_log_prob": -87.812, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.5595, "epoch": 0.9164785553047404, "grad_norm": 15.92544937133789, "learning_rate": 9.347554284174654e-06, "lm_loss": 5.8902, "loss": 1.6165, "step": 406, "text_contrastive_loss": 0.9359, "train_positive_log_prob": -86.93, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0271 }, { "contrastive_loss": 0.6278, "epoch": 0.9187358916478555, "grad_norm": 17.234172821044922, "learning_rate": 9.343966160772438e-06, "lm_loss": 5.89, "loss": 1.6909, "step": 407, "text_contrastive_loss": 0.9483, "train_positive_log_prob": -85.5923, "train_positive_token_accuracy": 0.0761, "train_positive_token_prob": 0.0271 }, { "contrastive_loss": 0.5346, "epoch": 0.9209932279909706, "grad_norm": 14.652546882629395, "learning_rate": 9.340368890546672e-06, "lm_loss": 5.9793, "loss": 1.5382, "step": 408, "text_contrastive_loss": 0.8115, "train_positive_log_prob": -89.6913, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0263 }, { "contrastive_loss": 0.4637, "epoch": 0.9232505643340858, "grad_norm": 14.887948989868164, "learning_rate": 9.336762481071906e-06, "lm_loss": 5.9219, "loss": 1.4281, "step": 409, "text_contrastive_loss": 0.7445, "train_positive_log_prob": -87.2568, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0272 }, { "contrastive_loss": 0.6062, "epoch": 0.9255079006772009, "grad_norm": 16.95090103149414, "learning_rate": 9.333146939941938e-06, "lm_loss": 6.0113, "loss": 1.6833, "step": 410, "text_contrastive_loss": 0.952, "train_positive_log_prob": -89.957, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0262 }, { "contrastive_loss": 0.598, "epoch": 0.927765237020316, "grad_norm": 14.304362297058105, "learning_rate": 9.329522274769791e-06, "lm_loss": 5.8227, "loss": 1.5791, "step": 411, "text_contrastive_loss": 0.7976, "train_positive_log_prob": -83.8585, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.631, "epoch": 0.9300225733634312, "grad_norm": 17.765522003173828, "learning_rate": 9.325888493187699e-06, "lm_loss": 6.051, "loss": 1.72, "step": 412, "text_contrastive_loss": 0.9678, "train_positive_log_prob": -91.0685, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0265 }, { "contrastive_loss": 0.577, "epoch": 0.9322799097065463, "grad_norm": 17.135438919067383, "learning_rate": 9.322245602847094e-06, "lm_loss": 5.9059, "loss": 1.6223, "step": 413, "text_contrastive_loss": 0.9095, "train_positive_log_prob": -87.7117, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.0275 }, { "contrastive_loss": 0.5003, "epoch": 0.9345372460496614, "grad_norm": 14.984756469726562, "learning_rate": 9.31859361141859e-06, "lm_loss": 5.8585, "loss": 1.4603, "step": 414, "text_contrastive_loss": 0.7482, "train_positive_log_prob": -82.9794, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.5006, "epoch": 0.9367945823927766, "grad_norm": 15.582765579223633, "learning_rate": 9.314932526591956e-06, "lm_loss": 5.9528, "loss": 1.4835, "step": 415, "text_contrastive_loss": 0.7753, "train_positive_log_prob": -89.5734, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.0268 }, { "contrastive_loss": 0.6021, "epoch": 0.9390519187358917, "grad_norm": 16.197175979614258, "learning_rate": 9.311262356076118e-06, "lm_loss": 5.779, "loss": 1.6585, "step": 416, "text_contrastive_loss": 0.9572, "train_positive_log_prob": -84.5415, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0272 }, { "contrastive_loss": 0.5616, "epoch": 0.9413092550790068, "grad_norm": 17.086196899414062, "learning_rate": 9.30758310759913e-06, "lm_loss": 5.899, "loss": 1.5774, "step": 417, "text_contrastive_loss": 0.8519, "train_positive_log_prob": -89.5021, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.5974, "epoch": 0.9435665914221218, "grad_norm": 17.816383361816406, "learning_rate": 9.303894788908158e-06, "lm_loss": 5.8604, "loss": 1.6101, "step": 418, "text_contrastive_loss": 0.8533, "train_positive_log_prob": -86.6261, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.59, "epoch": 0.945823927765237, "grad_norm": 17.613948822021484, "learning_rate": 9.300197407769472e-06, "lm_loss": 5.7805, "loss": 1.6857, "step": 419, "text_contrastive_loss": 1.0353, "train_positive_log_prob": -84.2001, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.4702, "epoch": 0.9480812641083521, "grad_norm": 16.38786506652832, "learning_rate": 9.296490971968416e-06, "lm_loss": 5.8173, "loss": 1.4483, "step": 420, "text_contrastive_loss": 0.7927, "train_positive_log_prob": -84.5788, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.5483, "epoch": 0.9503386004514672, "grad_norm": 18.509519577026367, "learning_rate": 9.292775489309409e-06, "lm_loss": 5.8759, "loss": 1.5136, "step": 421, "text_contrastive_loss": 0.7555, "train_positive_log_prob": -88.1345, "train_positive_token_accuracy": 0.074, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.4699, "epoch": 0.9525959367945824, "grad_norm": 16.658477783203125, "learning_rate": 9.289050967615914e-06, "lm_loss": 5.9344, "loss": 1.4823, "step": 422, "text_contrastive_loss": 0.838, "train_positive_log_prob": -89.5952, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.5532, "epoch": 0.9548532731376975, "grad_norm": 15.291207313537598, "learning_rate": 9.285317414730427e-06, "lm_loss": 6.0415, "loss": 1.6304, "step": 423, "text_contrastive_loss": 0.9461, "train_positive_log_prob": -90.5035, "train_positive_token_accuracy": 0.073, "train_positive_token_prob": 0.0259 }, { "contrastive_loss": 0.5333, "epoch": 0.9571106094808126, "grad_norm": 15.894474029541016, "learning_rate": 9.281574838514464e-06, "lm_loss": 5.9181, "loss": 1.5743, "step": 424, "text_contrastive_loss": 0.8984, "train_positive_log_prob": -89.7682, "train_positive_token_accuracy": 0.0873, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.5708, "epoch": 0.9593679458239278, "grad_norm": 16.08817481994629, "learning_rate": 9.277823246848537e-06, "lm_loss": 6.0141, "loss": 1.6473, "step": 425, "text_contrastive_loss": 0.9503, "train_positive_log_prob": -86.6867, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.612, "epoch": 0.9616252821670429, "grad_norm": 15.471580505371094, "learning_rate": 9.274062647632144e-06, "lm_loss": 5.8701, "loss": 1.6208, "step": 426, "text_contrastive_loss": 0.8436, "train_positive_log_prob": -87.7061, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.6127, "epoch": 0.963882618510158, "grad_norm": 17.222087860107422, "learning_rate": 9.270293048783747e-06, "lm_loss": 5.9028, "loss": 1.6201, "step": 427, "text_contrastive_loss": 0.8343, "train_positive_log_prob": -86.9072, "train_positive_token_accuracy": 0.074, "train_positive_token_prob": 0.0275 }, { "contrastive_loss": 0.5228, "epoch": 0.9661399548532731, "grad_norm": 17.653852462768555, "learning_rate": 9.266514458240762e-06, "lm_loss": 5.8615, "loss": 1.4908, "step": 428, "text_contrastive_loss": 0.7636, "train_positive_log_prob": -86.3916, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.5396, "epoch": 0.9683972911963883, "grad_norm": 16.23486328125, "learning_rate": 9.262726883959535e-06, "lm_loss": 5.8543, "loss": 1.5029, "step": 429, "text_contrastive_loss": 0.7557, "train_positive_log_prob": -88.9117, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.5503, "epoch": 0.9706546275395034, "grad_norm": 15.685914039611816, "learning_rate": 9.258930333915325e-06, "lm_loss": 5.8241, "loss": 1.5681, "step": 430, "text_contrastive_loss": 0.8707, "train_positive_log_prob": -86.4459, "train_positive_token_accuracy": 0.0861, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4516, "epoch": 0.9729119638826185, "grad_norm": 15.291813850402832, "learning_rate": 9.2551248161023e-06, "lm_loss": 5.8405, "loss": 1.4498, "step": 431, "text_contrastive_loss": 0.8281, "train_positive_log_prob": -86.9725, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.5189, "epoch": 0.9751693002257337, "grad_norm": 14.862266540527344, "learning_rate": 9.251310338533504e-06, "lm_loss": 5.83, "loss": 1.5722, "step": 432, "text_contrastive_loss": 0.9407, "train_positive_log_prob": -85.3116, "train_positive_token_accuracy": 0.0726, "train_positive_token_prob": 0.0267 }, { "contrastive_loss": 0.5968, "epoch": 0.9774266365688488, "grad_norm": 16.92352867126465, "learning_rate": 9.247486909240849e-06, "lm_loss": 6.0227, "loss": 1.6701, "step": 433, "text_contrastive_loss": 0.942, "train_positive_log_prob": -86.9785, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0273 }, { "contrastive_loss": 0.6644, "epoch": 0.9796839729119639, "grad_norm": 19.324182510375977, "learning_rate": 9.243654536275095e-06, "lm_loss": 5.8235, "loss": 1.655, "step": 434, "text_contrastive_loss": 0.8166, "train_positive_log_prob": -87.3978, "train_positive_token_accuracy": 0.0898, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.6476, "epoch": 0.981941309255079, "grad_norm": 16.82012176513672, "learning_rate": 9.23981322770584e-06, "lm_loss": 5.7523, "loss": 1.6639, "step": 435, "text_contrastive_loss": 0.882, "train_positive_log_prob": -85.5649, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.6013, "epoch": 0.9841986455981941, "grad_norm": 16.209348678588867, "learning_rate": 9.235962991621484e-06, "lm_loss": 5.7624, "loss": 1.7297, "step": 436, "text_contrastive_loss": 1.1045, "train_positive_log_prob": -86.0311, "train_positive_token_accuracy": 0.0924, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.6078, "epoch": 0.9864559819413092, "grad_norm": 15.903900146484375, "learning_rate": 9.232103836129239e-06, "lm_loss": 5.9091, "loss": 1.6515, "step": 437, "text_contrastive_loss": 0.9056, "train_positive_log_prob": -89.1635, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.5193, "epoch": 0.9887133182844243, "grad_norm": 14.519559860229492, "learning_rate": 9.22823576935509e-06, "lm_loss": 5.864, "loss": 1.5648, "step": 438, "text_contrastive_loss": 0.9181, "train_positive_log_prob": -87.7092, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.6269, "epoch": 0.9909706546275395, "grad_norm": 16.698381423950195, "learning_rate": 9.224358799443791e-06, "lm_loss": 5.8001, "loss": 1.6402, "step": 439, "text_contrastive_loss": 0.8666, "train_positive_log_prob": -84.8054, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.5207, "epoch": 0.9932279909706546, "grad_norm": 14.701775550842285, "learning_rate": 9.220472934558838e-06, "lm_loss": 5.937, "loss": 1.5043, "step": 440, "text_contrastive_loss": 0.7798, "train_positive_log_prob": -88.6172, "train_positive_token_accuracy": 0.0701, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.5213, "epoch": 0.9954853273137697, "grad_norm": 15.258160591125488, "learning_rate": 9.216578182882459e-06, "lm_loss": 5.7891, "loss": 1.5187, "step": 441, "text_contrastive_loss": 0.8369, "train_positive_log_prob": -86.4508, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.6021, "epoch": 0.9977426636568849, "grad_norm": 16.111305236816406, "learning_rate": 9.212674552615594e-06, "lm_loss": 5.9081, "loss": 1.6279, "step": 442, "text_contrastive_loss": 0.8701, "train_positive_log_prob": -86.1487, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.4096, "epoch": 1.0, "grad_norm": 20.45907974243164, "learning_rate": 9.208762051977879e-06, "lm_loss": 5.918, "loss": 1.349, "step": 443, "text_contrastive_loss": 0.6952, "train_positive_log_prob": -89.6557, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0273 }, { "contrastive_loss": 0.4947, "epoch": 1.002257336343115, "grad_norm": 12.947877883911133, "learning_rate": 9.204840689207626e-06, "lm_loss": 5.8088, "loss": 1.5178, "step": 444, "text_contrastive_loss": 0.8845, "train_positive_log_prob": -84.2974, "train_positive_token_accuracy": 0.074, "train_positive_token_prob": 0.0277 }, { "contrastive_loss": 0.6478, "epoch": 1.0045146726862302, "grad_norm": 14.91596794128418, "learning_rate": 9.20091047256181e-06, "lm_loss": 5.7999, "loss": 1.7457, "step": 445, "text_contrastive_loss": 1.0357, "train_positive_log_prob": -86.7551, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0269 }, { "contrastive_loss": 0.5065, "epoch": 1.0067720090293453, "grad_norm": 14.268144607543945, "learning_rate": 9.196971410316047e-06, "lm_loss": 5.8751, "loss": 1.5296, "step": 446, "text_contrastive_loss": 0.871, "train_positive_log_prob": -88.0539, "train_positive_token_accuracy": 0.0831, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.4398, "epoch": 1.0090293453724606, "grad_norm": 12.608469009399414, "learning_rate": 9.193023510764578e-06, "lm_loss": 5.793, "loss": 1.396, "step": 447, "text_contrastive_loss": 0.7539, "train_positive_log_prob": -85.8542, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.5217, "epoch": 1.0112866817155757, "grad_norm": 15.451228141784668, "learning_rate": 9.189066782220253e-06, "lm_loss": 5.9104, "loss": 1.5425, "step": 448, "text_contrastive_loss": 0.8596, "train_positive_log_prob": -86.4638, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.5485, "epoch": 1.0135440180586908, "grad_norm": 15.606856346130371, "learning_rate": 9.185101233014516e-06, "lm_loss": 5.9077, "loss": 1.6274, "step": 449, "text_contrastive_loss": 0.9763, "train_positive_log_prob": -88.8938, "train_positive_token_accuracy": 0.0846, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.519, "epoch": 1.0158013544018059, "grad_norm": 13.44371223449707, "learning_rate": 9.181126871497378e-06, "lm_loss": 5.8679, "loss": 1.5475, "step": 450, "text_contrastive_loss": 0.8833, "train_positive_log_prob": -88.6146, "train_positive_token_accuracy": 0.0706, "train_positive_token_prob": 0.0268 }, { "contrastive_loss": 0.4338, "epoch": 1.018058690744921, "grad_norm": 12.92390251159668, "learning_rate": 9.177143706037411e-06, "lm_loss": 5.8065, "loss": 1.3952, "step": 451, "text_contrastive_loss": 0.7617, "train_positive_log_prob": -86.5998, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.4333, "epoch": 1.020316027088036, "grad_norm": 14.111056327819824, "learning_rate": 9.173151745021722e-06, "lm_loss": 5.7691, "loss": 1.4061, "step": 452, "text_contrastive_loss": 0.7918, "train_positive_log_prob": -84.734, "train_positive_token_accuracy": 0.083, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.3544, "epoch": 1.0225733634311513, "grad_norm": 12.541257858276367, "learning_rate": 9.169150996855939e-06, "lm_loss": 5.8573, "loss": 1.3147, "step": 453, "text_contrastive_loss": 0.7492, "train_positive_log_prob": -87.4012, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.5402, "epoch": 1.0248306997742664, "grad_norm": 14.790188789367676, "learning_rate": 9.16514146996419e-06, "lm_loss": 5.8104, "loss": 1.5138, "step": 454, "text_contrastive_loss": 0.7851, "train_positive_log_prob": -87.9943, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.4695, "epoch": 1.0270880361173815, "grad_norm": 15.407713890075684, "learning_rate": 9.161123172789091e-06, "lm_loss": 5.758, "loss": 1.4119, "step": 455, "text_contrastive_loss": 0.7332, "train_positive_log_prob": -85.8075, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.5372, "epoch": 1.0293453724604966, "grad_norm": 16.16050910949707, "learning_rate": 9.157096113791727e-06, "lm_loss": 5.8291, "loss": 1.5989, "step": 456, "text_contrastive_loss": 0.9578, "train_positive_log_prob": -85.2038, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.496, "epoch": 1.0316027088036117, "grad_norm": 14.388401985168457, "learning_rate": 9.153060301451629e-06, "lm_loss": 5.9564, "loss": 1.5376, "step": 457, "text_contrastive_loss": 0.8919, "train_positive_log_prob": -86.6673, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0272 }, { "contrastive_loss": 0.5267, "epoch": 1.0338600451467268, "grad_norm": 16.01051139831543, "learning_rate": 9.149015744266759e-06, "lm_loss": 5.729, "loss": 1.6183, "step": 458, "text_contrastive_loss": 1.0374, "train_positive_log_prob": -85.9048, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.4585, "epoch": 1.036117381489842, "grad_norm": 17.170934677124023, "learning_rate": 9.144962450753491e-06, "lm_loss": 5.9998, "loss": 1.4977, "step": 459, "text_contrastive_loss": 0.8784, "train_positive_log_prob": -88.2386, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.5279, "epoch": 1.0383747178329572, "grad_norm": 15.702168464660645, "learning_rate": 9.140900429446601e-06, "lm_loss": 5.8052, "loss": 1.5142, "step": 460, "text_contrastive_loss": 0.8115, "train_positive_log_prob": -83.5433, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.564, "epoch": 1.0406320541760723, "grad_norm": 13.882556915283203, "learning_rate": 9.136829688899236e-06, "lm_loss": 5.8419, "loss": 1.6218, "step": 461, "text_contrastive_loss": 0.9472, "train_positive_log_prob": -86.0309, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.396, "epoch": 1.0428893905191874, "grad_norm": 13.365018844604492, "learning_rate": 9.132750237682907e-06, "lm_loss": 5.8004, "loss": 1.3386, "step": 462, "text_contrastive_loss": 0.7252, "train_positive_log_prob": -86.9392, "train_positive_token_accuracy": 0.087, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.5425, "epoch": 1.0451467268623025, "grad_norm": 16.31186294555664, "learning_rate": 9.128662084387462e-06, "lm_loss": 5.8817, "loss": 1.524, "step": 463, "text_contrastive_loss": 0.7866, "train_positive_log_prob": -85.8379, "train_positive_token_accuracy": 0.0782, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.582, "epoch": 1.0474040632054176, "grad_norm": 14.941473960876465, "learning_rate": 9.12456523762108e-06, "lm_loss": 5.7214, "loss": 1.6567, "step": 464, "text_contrastive_loss": 1.0052, "train_positive_log_prob": -82.0933, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.4046, "epoch": 1.0496613995485327, "grad_norm": 12.706572532653809, "learning_rate": 9.120459706010233e-06, "lm_loss": 5.8326, "loss": 1.3838, "step": 465, "text_contrastive_loss": 0.7918, "train_positive_log_prob": -86.5063, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.3926, "epoch": 1.0519187358916477, "grad_norm": 15.198698997497559, "learning_rate": 9.116345498199693e-06, "lm_loss": 5.7933, "loss": 1.3493, "step": 466, "text_contrastive_loss": 0.7547, "train_positive_log_prob": -84.9983, "train_positive_token_accuracy": 0.0873, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4502, "epoch": 1.054176072234763, "grad_norm": 13.700481414794922, "learning_rate": 9.112222622852494e-06, "lm_loss": 5.805, "loss": 1.4557, "step": 467, "text_contrastive_loss": 0.8499, "train_positive_log_prob": -85.4643, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.4237, "epoch": 1.0564334085778782, "grad_norm": 13.614365577697754, "learning_rate": 9.108091088649922e-06, "lm_loss": 5.7999, "loss": 1.4297, "step": 468, "text_contrastive_loss": 0.852, "train_positive_log_prob": -86.592, "train_positive_token_accuracy": 0.0761, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.5719, "epoch": 1.0586907449209932, "grad_norm": 16.305103302001953, "learning_rate": 9.103950904291496e-06, "lm_loss": 5.6919, "loss": 1.5975, "step": 469, "text_contrastive_loss": 0.913, "train_positive_log_prob": -84.6675, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.4606, "epoch": 1.0609480812641083, "grad_norm": 13.533616065979004, "learning_rate": 9.099802078494947e-06, "lm_loss": 5.6829, "loss": 1.479, "step": 470, "text_contrastive_loss": 0.9002, "train_positive_log_prob": -82.9966, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.5167, "epoch": 1.0632054176072234, "grad_norm": 14.10770320892334, "learning_rate": 9.095644619996206e-06, "lm_loss": 5.9261, "loss": 1.6337, "step": 471, "text_contrastive_loss": 1.0488, "train_positive_log_prob": -87.8771, "train_positive_token_accuracy": 0.0685, "train_positive_token_prob": 0.0262 }, { "contrastive_loss": 0.4674, "epoch": 1.0654627539503385, "grad_norm": 14.255465507507324, "learning_rate": 9.09147853754938e-06, "lm_loss": 5.81, "loss": 1.4521, "step": 472, "text_contrastive_loss": 0.8076, "train_positive_log_prob": -85.3749, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.5032, "epoch": 1.0677200902934538, "grad_norm": 15.463451385498047, "learning_rate": 9.087303839926727e-06, "lm_loss": 5.8396, "loss": 1.5136, "step": 473, "text_contrastive_loss": 0.8528, "train_positive_log_prob": -86.4972, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.521, "epoch": 1.069977426636569, "grad_norm": 14.949661254882812, "learning_rate": 9.08312053591866e-06, "lm_loss": 5.8701, "loss": 1.5653, "step": 474, "text_contrastive_loss": 0.9145, "train_positive_log_prob": -85.7861, "train_positive_token_accuracy": 0.073, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.5747, "epoch": 1.072234762979684, "grad_norm": 14.694694519042969, "learning_rate": 9.0789286343337e-06, "lm_loss": 5.8051, "loss": 1.5711, "step": 475, "text_contrastive_loss": 0.8318, "train_positive_log_prob": -85.7406, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0277 }, { "contrastive_loss": 0.4812, "epoch": 1.074492099322799, "grad_norm": 16.750852584838867, "learning_rate": 9.07472814399848e-06, "lm_loss": 5.7319, "loss": 1.5312, "step": 476, "text_contrastive_loss": 0.9536, "train_positive_log_prob": -86.2696, "train_positive_token_accuracy": 0.0831, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.5897, "epoch": 1.0767494356659142, "grad_norm": 15.837838172912598, "learning_rate": 9.070519073757717e-06, "lm_loss": 5.704, "loss": 1.5723, "step": 477, "text_contrastive_loss": 0.8244, "train_positive_log_prob": -84.8902, "train_positive_token_accuracy": 0.0837, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.567, "epoch": 1.0790067720090293, "grad_norm": 14.569334030151367, "learning_rate": 9.06630143247419e-06, "lm_loss": 5.8504, "loss": 1.6406, "step": 478, "text_contrastive_loss": 0.9772, "train_positive_log_prob": -83.4529, "train_positive_token_accuracy": 0.0761, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.4571, "epoch": 1.0812641083521444, "grad_norm": 14.058526992797852, "learning_rate": 9.062075229028728e-06, "lm_loss": 5.8129, "loss": 1.4873, "step": 479, "text_contrastive_loss": 0.8978, "train_positive_log_prob": -86.3999, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.5346, "epoch": 1.0835214446952597, "grad_norm": 14.769458770751953, "learning_rate": 9.057840472320192e-06, "lm_loss": 5.7671, "loss": 1.477, "step": 480, "text_contrastive_loss": 0.7315, "train_positive_log_prob": -85.201, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0274 }, { "contrastive_loss": 0.4574, "epoch": 1.0857787810383748, "grad_norm": 13.187503814697266, "learning_rate": 9.053597171265447e-06, "lm_loss": 5.8272, "loss": 1.4742, "step": 481, "text_contrastive_loss": 0.8683, "train_positive_log_prob": -88.2313, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.447, "epoch": 1.0880361173814899, "grad_norm": 14.901713371276855, "learning_rate": 9.04934533479935e-06, "lm_loss": 5.8659, "loss": 1.4509, "step": 482, "text_contrastive_loss": 0.8345, "train_positive_log_prob": -86.9744, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0271 }, { "contrastive_loss": 0.4869, "epoch": 1.090293453724605, "grad_norm": 16.12311553955078, "learning_rate": 9.045084971874738e-06, "lm_loss": 5.8612, "loss": 1.5182, "step": 483, "text_contrastive_loss": 0.8905, "train_positive_log_prob": -89.0412, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.4039, "epoch": 1.09255079006772, "grad_norm": 15.610627174377441, "learning_rate": 9.040816091462393e-06, "lm_loss": 5.8886, "loss": 1.394, "step": 484, "text_contrastive_loss": 0.8024, "train_positive_log_prob": -87.6831, "train_positive_token_accuracy": 0.086, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.5351, "epoch": 1.0948081264108351, "grad_norm": 14.818351745605469, "learning_rate": 9.036538702551037e-06, "lm_loss": 5.7065, "loss": 1.5589, "step": 485, "text_contrastive_loss": 0.9065, "train_positive_log_prob": -82.4999, "train_positive_token_accuracy": 0.0851, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.4522, "epoch": 1.0970654627539504, "grad_norm": 13.92127513885498, "learning_rate": 9.032252814147302e-06, "lm_loss": 5.8093, "loss": 1.4679, "step": 486, "text_contrastive_loss": 0.8695, "train_positive_log_prob": -85.7787, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.4749, "epoch": 1.0993227990970655, "grad_norm": 15.42867374420166, "learning_rate": 9.027958435275726e-06, "lm_loss": 5.8436, "loss": 1.4605, "step": 487, "text_contrastive_loss": 0.8026, "train_positive_log_prob": -87.102, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0275 }, { "contrastive_loss": 0.4627, "epoch": 1.1015801354401806, "grad_norm": 14.040528297424316, "learning_rate": 9.023655574978716e-06, "lm_loss": 5.7913, "loss": 1.5035, "step": 488, "text_contrastive_loss": 0.9234, "train_positive_log_prob": -88.9274, "train_positive_token_accuracy": 0.0697, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.3857, "epoch": 1.1038374717832957, "grad_norm": 13.681938171386719, "learning_rate": 9.019344242316542e-06, "lm_loss": 5.7986, "loss": 1.408, "step": 489, "text_contrastive_loss": 0.8849, "train_positive_log_prob": -86.6199, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.4946, "epoch": 1.1060948081264108, "grad_norm": 16.23743438720703, "learning_rate": 9.015024446367315e-06, "lm_loss": 5.7592, "loss": 1.4949, "step": 490, "text_contrastive_loss": 0.8488, "train_positive_log_prob": -84.2068, "train_positive_token_accuracy": 0.0836, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4335, "epoch": 1.108352144469526, "grad_norm": 15.248078346252441, "learning_rate": 9.010696196226963e-06, "lm_loss": 5.7904, "loss": 1.5199, "step": 491, "text_contrastive_loss": 1.0148, "train_positive_log_prob": -82.6063, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.4875, "epoch": 1.110609480812641, "grad_norm": 15.537084579467773, "learning_rate": 9.00635950100922e-06, "lm_loss": 5.7493, "loss": 1.5141, "step": 492, "text_contrastive_loss": 0.9034, "train_positive_log_prob": -88.9566, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.5147, "epoch": 1.1128668171557563, "grad_norm": 15.767317771911621, "learning_rate": 9.002014369845592e-06, "lm_loss": 5.8588, "loss": 1.5701, "step": 493, "text_contrastive_loss": 0.9391, "train_positive_log_prob": -86.4833, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.5019, "epoch": 1.1151241534988714, "grad_norm": 15.360732078552246, "learning_rate": 8.997660811885367e-06, "lm_loss": 5.9284, "loss": 1.5486, "step": 494, "text_contrastive_loss": 0.9077, "train_positive_log_prob": -84.8963, "train_positive_token_accuracy": 0.0747, "train_positive_token_prob": 0.0277 }, { "contrastive_loss": 0.5222, "epoch": 1.1173814898419865, "grad_norm": 14.968240737915039, "learning_rate": 8.993298836295556e-06, "lm_loss": 5.8004, "loss": 1.5103, "step": 495, "text_contrastive_loss": 0.8162, "train_positive_log_prob": -84.1571, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.4355, "epoch": 1.1196388261851016, "grad_norm": 14.490927696228027, "learning_rate": 8.988928452260909e-06, "lm_loss": 5.8659, "loss": 1.3729, "step": 496, "text_contrastive_loss": 0.7017, "train_positive_log_prob": -88.259, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.4653, "epoch": 1.1218961625282167, "grad_norm": 15.169868469238281, "learning_rate": 8.984549668983875e-06, "lm_loss": 5.8415, "loss": 1.4662, "step": 497, "text_contrastive_loss": 0.8333, "train_positive_log_prob": -87.096, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4849, "epoch": 1.1241534988713318, "grad_norm": 16.12611961364746, "learning_rate": 8.980162495684587e-06, "lm_loss": 5.6943, "loss": 1.4348, "step": 498, "text_contrastive_loss": 0.761, "train_positive_log_prob": -84.3932, "train_positive_token_accuracy": 0.0817, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4739, "epoch": 1.1264108352144468, "grad_norm": 14.112802505493164, "learning_rate": 8.975766941600852e-06, "lm_loss": 5.6894, "loss": 1.4551, "step": 499, "text_contrastive_loss": 0.8245, "train_positive_log_prob": -81.3625, "train_positive_token_accuracy": 0.0817, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.5062, "epoch": 1.1286681715575622, "grad_norm": 15.006860733032227, "learning_rate": 8.971363015988115e-06, "lm_loss": 5.7202, "loss": 1.4517, "step": 500, "text_contrastive_loss": 0.7469, "train_positive_log_prob": -86.2701, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4546, "epoch": 1.1309255079006773, "grad_norm": 14.931099891662598, "learning_rate": 8.966950728119453e-06, "lm_loss": 5.7483, "loss": 1.4561, "step": 501, "text_contrastive_loss": 0.8532, "train_positive_log_prob": -86.7438, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.5577, "epoch": 1.1331828442437923, "grad_norm": 14.916293144226074, "learning_rate": 8.962530087285552e-06, "lm_loss": 5.7719, "loss": 1.5366, "step": 502, "text_contrastive_loss": 0.8034, "train_positive_log_prob": -84.6355, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.3864, "epoch": 1.1354401805869074, "grad_norm": 12.708629608154297, "learning_rate": 8.958101102794686e-06, "lm_loss": 5.7789, "loss": 1.4053, "step": 503, "text_contrastive_loss": 0.8821, "train_positive_log_prob": -86.2771, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.5313, "epoch": 1.1376975169300225, "grad_norm": 15.195137023925781, "learning_rate": 8.953663783972692e-06, "lm_loss": 5.752, "loss": 1.5259, "step": 504, "text_contrastive_loss": 0.8388, "train_positive_log_prob": -86.0143, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.3863, "epoch": 1.1399548532731376, "grad_norm": 14.520092964172363, "learning_rate": 8.949218140162965e-06, "lm_loss": 5.733, "loss": 1.4052, "step": 505, "text_contrastive_loss": 0.8912, "train_positive_log_prob": -84.1584, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0277 }, { "contrastive_loss": 0.4617, "epoch": 1.1422121896162527, "grad_norm": 14.958356857299805, "learning_rate": 8.944764180726423e-06, "lm_loss": 5.7915, "loss": 1.5506, "step": 506, "text_contrastive_loss": 1.0196, "train_positive_log_prob": -85.0416, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.6412, "epoch": 1.144469525959368, "grad_norm": 17.7445068359375, "learning_rate": 8.940301915041496e-06, "lm_loss": 5.7276, "loss": 1.6771, "step": 507, "text_contrastive_loss": 0.9263, "train_positive_log_prob": -84.1906, "train_positive_token_accuracy": 0.0874, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.5004, "epoch": 1.146726862302483, "grad_norm": 14.941315650939941, "learning_rate": 8.935831352504103e-06, "lm_loss": 5.7347, "loss": 1.4447, "step": 508, "text_contrastive_loss": 0.7416, "train_positive_log_prob": -83.6701, "train_positive_token_accuracy": 0.0782, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.5204, "epoch": 1.1489841986455982, "grad_norm": 17.664854049682617, "learning_rate": 8.931352502527633e-06, "lm_loss": 5.7146, "loss": 1.5246, "step": 509, "text_contrastive_loss": 0.8656, "train_positive_log_prob": -82.9064, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.548, "epoch": 1.1512415349887133, "grad_norm": 17.793493270874023, "learning_rate": 8.926865374542928e-06, "lm_loss": 5.7054, "loss": 1.6607, "step": 510, "text_contrastive_loss": 1.0843, "train_positive_log_prob": -82.4141, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.5057, "epoch": 1.1534988713318284, "grad_norm": 16.280363082885742, "learning_rate": 8.922369977998257e-06, "lm_loss": 5.8599, "loss": 1.5999, "step": 511, "text_contrastive_loss": 1.0164, "train_positive_log_prob": -88.3531, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.4861, "epoch": 1.1557562076749435, "grad_norm": 17.49152183532715, "learning_rate": 8.917866322359303e-06, "lm_loss": 5.8364, "loss": 1.5015, "step": 512, "text_contrastive_loss": 0.8635, "train_positive_log_prob": -87.2909, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4601, "epoch": 1.1580135440180588, "grad_norm": 13.432955741882324, "learning_rate": 8.913354417109136e-06, "lm_loss": 5.7393, "loss": 1.3987, "step": 513, "text_contrastive_loss": 0.7295, "train_positive_log_prob": -83.783, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4495, "epoch": 1.1602708803611739, "grad_norm": 14.593243598937988, "learning_rate": 8.908834271748202e-06, "lm_loss": 5.7306, "loss": 1.4098, "step": 514, "text_contrastive_loss": 0.7744, "train_positive_log_prob": -85.9168, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.4307, "epoch": 1.162528216704289, "grad_norm": 14.839759826660156, "learning_rate": 8.904305895794292e-06, "lm_loss": 5.7948, "loss": 1.4511, "step": 515, "text_contrastive_loss": 0.8819, "train_positive_log_prob": -86.7461, "train_positive_token_accuracy": 0.0861, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.6057, "epoch": 1.164785553047404, "grad_norm": 15.436018943786621, "learning_rate": 8.899769298782528e-06, "lm_loss": 5.8003, "loss": 1.6617, "step": 516, "text_contrastive_loss": 0.9521, "train_positive_log_prob": -86.7558, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.5433, "epoch": 1.1670428893905191, "grad_norm": 16.551664352416992, "learning_rate": 8.895224490265346e-06, "lm_loss": 5.6868, "loss": 1.5589, "step": 517, "text_contrastive_loss": 0.8938, "train_positive_log_prob": -82.3282, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.463, "epoch": 1.1693002257336342, "grad_norm": 13.993252754211426, "learning_rate": 8.890671479812472e-06, "lm_loss": 5.7785, "loss": 1.3917, "step": 518, "text_contrastive_loss": 0.7018, "train_positive_log_prob": -84.2373, "train_positive_token_accuracy": 0.0756, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.5316, "epoch": 1.1715575620767495, "grad_norm": 15.224872589111328, "learning_rate": 8.886110277010902e-06, "lm_loss": 5.7554, "loss": 1.5065, "step": 519, "text_contrastive_loss": 0.7989, "train_positive_log_prob": -86.3766, "train_positive_token_accuracy": 0.0737, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.5913, "epoch": 1.1738148984198646, "grad_norm": 15.95876693725586, "learning_rate": 8.88154089146488e-06, "lm_loss": 5.6424, "loss": 1.606, "step": 520, "text_contrastive_loss": 0.9009, "train_positive_log_prob": -80.9136, "train_positive_token_accuracy": 0.0877, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4302, "epoch": 1.1760722347629797, "grad_norm": 12.770486831665039, "learning_rate": 8.876963332795881e-06, "lm_loss": 5.7211, "loss": 1.4309, "step": 521, "text_contrastive_loss": 0.8572, "train_positive_log_prob": -85.3662, "train_positive_token_accuracy": 0.0832, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.5878, "epoch": 1.1783295711060948, "grad_norm": 15.525639533996582, "learning_rate": 8.87237761064259e-06, "lm_loss": 5.6982, "loss": 1.5839, "step": 522, "text_contrastive_loss": 0.8525, "train_positive_log_prob": -83.6426, "train_positive_token_accuracy": 0.0875, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.4203, "epoch": 1.18058690744921, "grad_norm": 14.036768913269043, "learning_rate": 8.867783734660883e-06, "lm_loss": 5.7352, "loss": 1.4365, "step": 523, "text_contrastive_loss": 0.8852, "train_positive_log_prob": -82.3299, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4719, "epoch": 1.182844243792325, "grad_norm": 14.465782165527344, "learning_rate": 8.8631817145238e-06, "lm_loss": 5.8459, "loss": 1.516, "step": 524, "text_contrastive_loss": 0.9191, "train_positive_log_prob": -87.0917, "train_positive_token_accuracy": 0.0829, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.4598, "epoch": 1.18510158013544, "grad_norm": 14.271872520446777, "learning_rate": 8.858571559921539e-06, "lm_loss": 5.6123, "loss": 1.4508, "step": 525, "text_contrastive_loss": 0.8597, "train_positive_log_prob": -83.13, "train_positive_token_accuracy": 0.0833, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.5269, "epoch": 1.1873589164785554, "grad_norm": 14.845014572143555, "learning_rate": 8.853953280561412e-06, "lm_loss": 5.6389, "loss": 1.5117, "step": 526, "text_contrastive_loss": 0.8418, "train_positive_log_prob": -81.1695, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.3811, "epoch": 1.1896162528216705, "grad_norm": 12.324769973754883, "learning_rate": 8.849326886167854e-06, "lm_loss": 5.7842, "loss": 1.3254, "step": 527, "text_contrastive_loss": 0.7317, "train_positive_log_prob": -85.9033, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.4383, "epoch": 1.1918735891647856, "grad_norm": 14.406002044677734, "learning_rate": 8.844692386482379e-06, "lm_loss": 5.6966, "loss": 1.3858, "step": 528, "text_contrastive_loss": 0.7558, "train_positive_log_prob": -84.6946, "train_positive_token_accuracy": 0.0838, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.3975, "epoch": 1.1941309255079007, "grad_norm": 12.731202125549316, "learning_rate": 8.840049791263567e-06, "lm_loss": 5.7486, "loss": 1.3348, "step": 529, "text_contrastive_loss": 0.7249, "train_positive_log_prob": -84.5876, "train_positive_token_accuracy": 0.0836, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4899, "epoch": 1.1963882618510158, "grad_norm": 15.5960054397583, "learning_rate": 8.835399110287046e-06, "lm_loss": 5.729, "loss": 1.5556, "step": 530, "text_contrastive_loss": 0.9857, "train_positive_log_prob": -84.167, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.5163, "epoch": 1.1986455981941309, "grad_norm": 14.73003101348877, "learning_rate": 8.830740353345475e-06, "lm_loss": 5.6938, "loss": 1.5633, "step": 531, "text_contrastive_loss": 0.9551, "train_positive_log_prob": -83.7345, "train_positive_token_accuracy": 0.0866, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4693, "epoch": 1.200902934537246, "grad_norm": 14.445740699768066, "learning_rate": 8.826073530248508e-06, "lm_loss": 5.6517, "loss": 1.3906, "step": 532, "text_contrastive_loss": 0.7122, "train_positive_log_prob": -82.456, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.3856, "epoch": 1.2031602708803613, "grad_norm": 13.313594818115234, "learning_rate": 8.82139865082279e-06, "lm_loss": 5.7347, "loss": 1.3076, "step": 533, "text_contrastive_loss": 0.6969, "train_positive_log_prob": -85.2587, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.5691, "epoch": 1.2054176072234764, "grad_norm": 15.315177917480469, "learning_rate": 8.81671572491193e-06, "lm_loss": 5.866, "loss": 1.6133, "step": 534, "text_contrastive_loss": 0.9153, "train_positive_log_prob": -86.1575, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.5431, "epoch": 1.2076749435665914, "grad_norm": 14.558280944824219, "learning_rate": 8.812024762376477e-06, "lm_loss": 5.7293, "loss": 1.5564, "step": 535, "text_contrastive_loss": 0.8807, "train_positive_log_prob": -82.6475, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.4477, "epoch": 1.2099322799097065, "grad_norm": 17.240751266479492, "learning_rate": 8.807325773093904e-06, "lm_loss": 5.7842, "loss": 1.5066, "step": 536, "text_contrastive_loss": 0.9611, "train_positive_log_prob": -84.7264, "train_positive_token_accuracy": 0.0701, "train_positive_token_prob": 0.027 }, { "contrastive_loss": 0.5589, "epoch": 1.2121896162528216, "grad_norm": 15.851217269897461, "learning_rate": 8.802618766958586e-06, "lm_loss": 5.7564, "loss": 1.5424, "step": 537, "text_contrastive_loss": 0.8157, "train_positive_log_prob": -86.8839, "train_positive_token_accuracy": 0.0719, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.5343, "epoch": 1.2144469525959367, "grad_norm": 15.090217590332031, "learning_rate": 8.797903753881775e-06, "lm_loss": 5.7031, "loss": 1.5509, "step": 538, "text_contrastive_loss": 0.8924, "train_positive_log_prob": -84.3439, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4792, "epoch": 1.2167042889390518, "grad_norm": 14.871121406555176, "learning_rate": 8.793180743791587e-06, "lm_loss": 5.6515, "loss": 1.4767, "step": 539, "text_contrastive_loss": 0.8647, "train_positive_log_prob": -84.1551, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.4826, "epoch": 1.2189616252821671, "grad_norm": 14.766619682312012, "learning_rate": 8.788449746632976e-06, "lm_loss": 5.7669, "loss": 1.5008, "step": 540, "text_contrastive_loss": 0.8829, "train_positive_log_prob": -85.2913, "train_positive_token_accuracy": 0.0726, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.4565, "epoch": 1.2212189616252822, "grad_norm": 13.520822525024414, "learning_rate": 8.78371077236771e-06, "lm_loss": 5.7719, "loss": 1.456, "step": 541, "text_contrastive_loss": 0.8445, "train_positive_log_prob": -83.2678, "train_positive_token_accuracy": 0.0747, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.4993, "epoch": 1.2234762979683973, "grad_norm": 15.36131763458252, "learning_rate": 8.778963830974362e-06, "lm_loss": 5.8409, "loss": 1.6026, "step": 542, "text_contrastive_loss": 1.0384, "train_positive_log_prob": -86.1752, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.3354, "epoch": 1.2257336343115124, "grad_norm": 11.211355209350586, "learning_rate": 8.77420893244827e-06, "lm_loss": 5.7047, "loss": 1.2846, "step": 543, "text_contrastive_loss": 0.7573, "train_positive_log_prob": -84.77, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.4381, "epoch": 1.2279909706546275, "grad_norm": 15.713208198547363, "learning_rate": 8.769446086801536e-06, "lm_loss": 5.6901, "loss": 1.4123, "step": 544, "text_contrastive_loss": 0.8102, "train_positive_log_prob": -85.6427, "train_positive_token_accuracy": 0.0838, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4511, "epoch": 1.2302483069977426, "grad_norm": 15.119928359985352, "learning_rate": 8.764675304062992e-06, "lm_loss": 5.8012, "loss": 1.4833, "step": 545, "text_contrastive_loss": 0.9043, "train_positive_log_prob": -86.3176, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.5804, "epoch": 1.2325056433408579, "grad_norm": 17.2160587310791, "learning_rate": 8.759896594278183e-06, "lm_loss": 5.7205, "loss": 1.6213, "step": 546, "text_contrastive_loss": 0.9377, "train_positive_log_prob": -83.6166, "train_positive_token_accuracy": 0.0852, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4843, "epoch": 1.234762979683973, "grad_norm": 14.953021049499512, "learning_rate": 8.755109967509345e-06, "lm_loss": 5.7142, "loss": 1.4419, "step": 547, "text_contrastive_loss": 0.7725, "train_positive_log_prob": -83.6403, "train_positive_token_accuracy": 0.0761, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.5046, "epoch": 1.237020316027088, "grad_norm": 15.522333145141602, "learning_rate": 8.750315433835387e-06, "lm_loss": 5.6737, "loss": 1.5456, "step": 548, "text_contrastive_loss": 0.9472, "train_positive_log_prob": -83.5554, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.5071, "epoch": 1.2392776523702032, "grad_norm": 14.70351505279541, "learning_rate": 8.745513003351862e-06, "lm_loss": 5.6814, "loss": 1.5055, "step": 549, "text_contrastive_loss": 0.8606, "train_positive_log_prob": -85.7084, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.5358, "epoch": 1.2415349887133182, "grad_norm": 16.600210189819336, "learning_rate": 8.740702686170955e-06, "lm_loss": 5.814, "loss": 1.6439, "step": 550, "text_contrastive_loss": 1.0533, "train_positive_log_prob": -86.0512, "train_positive_token_accuracy": 0.0839, "train_positive_token_prob": 0.0277 }, { "contrastive_loss": 0.3952, "epoch": 1.2437923250564333, "grad_norm": 15.454834938049316, "learning_rate": 8.735884492421457e-06, "lm_loss": 5.6809, "loss": 1.3336, "step": 551, "text_contrastive_loss": 0.7406, "train_positive_log_prob": -83.021, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.5029, "epoch": 1.2460496613995486, "grad_norm": 16.872909545898438, "learning_rate": 8.731058432248743e-06, "lm_loss": 5.7728, "loss": 1.606, "step": 552, "text_contrastive_loss": 1.0516, "train_positive_log_prob": -84.6123, "train_positive_token_accuracy": 0.0736, "train_positive_token_prob": 0.0272 }, { "contrastive_loss": 0.4669, "epoch": 1.2483069977426637, "grad_norm": 15.279902458190918, "learning_rate": 8.726224515814752e-06, "lm_loss": 5.7958, "loss": 1.5404, "step": 553, "text_contrastive_loss": 0.9878, "train_positive_log_prob": -86.8693, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0275 }, { "contrastive_loss": 0.5938, "epoch": 1.2505643340857788, "grad_norm": 16.064992904663086, "learning_rate": 8.721382753297967e-06, "lm_loss": 5.7911, "loss": 1.5923, "step": 554, "text_contrastive_loss": 0.8389, "train_positive_log_prob": -84.9457, "train_positive_token_accuracy": 0.086, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.4106, "epoch": 1.252821670428894, "grad_norm": 15.163758277893066, "learning_rate": 8.71653315489339e-06, "lm_loss": 5.7514, "loss": 1.4164, "step": 555, "text_contrastive_loss": 0.8613, "train_positive_log_prob": -86.3594, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.3873, "epoch": 1.255079006772009, "grad_norm": 13.098235130310059, "learning_rate": 8.711675730812522e-06, "lm_loss": 5.731, "loss": 1.3723, "step": 556, "text_contrastive_loss": 0.8238, "train_positive_log_prob": -84.1503, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.5018, "epoch": 1.257336343115124, "grad_norm": 15.086501121520996, "learning_rate": 8.706810491283346e-06, "lm_loss": 5.7284, "loss": 1.4601, "step": 557, "text_contrastive_loss": 0.7709, "train_positive_log_prob": -83.2167, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.5713, "epoch": 1.2595936794582392, "grad_norm": 17.556636810302734, "learning_rate": 8.701937446550298e-06, "lm_loss": 5.6779, "loss": 1.6101, "step": 558, "text_contrastive_loss": 0.942, "train_positive_log_prob": -84.7448, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.4792, "epoch": 1.2618510158013545, "grad_norm": 13.67333698272705, "learning_rate": 8.69705660687425e-06, "lm_loss": 5.7951, "loss": 1.4587, "step": 559, "text_contrastive_loss": 0.7999, "train_positive_log_prob": -86.564, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.6232, "epoch": 1.2641083521444696, "grad_norm": 16.826778411865234, "learning_rate": 8.692167982532487e-06, "lm_loss": 5.7352, "loss": 1.7336, "step": 560, "text_contrastive_loss": 1.0739, "train_positive_log_prob": -84.1038, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4988, "epoch": 1.2663656884875847, "grad_norm": 15.485930442810059, "learning_rate": 8.687271583818687e-06, "lm_loss": 5.6988, "loss": 1.5127, "step": 561, "text_contrastive_loss": 0.888, "train_positive_log_prob": -84.5729, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4706, "epoch": 1.2686230248306998, "grad_norm": 14.440827369689941, "learning_rate": 8.682367421042895e-06, "lm_loss": 5.7956, "loss": 1.4354, "step": 562, "text_contrastive_loss": 0.7705, "train_positive_log_prob": -87.1599, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.6045, "epoch": 1.2708803611738149, "grad_norm": 16.22441864013672, "learning_rate": 8.677455504531507e-06, "lm_loss": 5.8231, "loss": 1.7022, "step": 563, "text_contrastive_loss": 1.0307, "train_positive_log_prob": -87.6188, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.4107, "epoch": 1.27313769751693, "grad_norm": 14.138707160949707, "learning_rate": 8.672535844627243e-06, "lm_loss": 5.8323, "loss": 1.446, "step": 564, "text_contrastive_loss": 0.904, "train_positive_log_prob": -89.0805, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.5039, "epoch": 1.275395033860045, "grad_norm": 15.108689308166504, "learning_rate": 8.667608451689135e-06, "lm_loss": 5.7063, "loss": 1.4567, "step": 565, "text_contrastive_loss": 0.7643, "train_positive_log_prob": -84.7062, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.5203, "epoch": 1.2776523702031604, "grad_norm": 14.707185745239258, "learning_rate": 8.662673336092487e-06, "lm_loss": 5.8072, "loss": 1.5847, "step": 566, "text_contrastive_loss": 0.9673, "train_positive_log_prob": -86.0674, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.4584, "epoch": 1.2799097065462754, "grad_norm": 14.290581703186035, "learning_rate": 8.657730508228874e-06, "lm_loss": 5.8627, "loss": 1.4925, "step": 567, "text_contrastive_loss": 0.8957, "train_positive_log_prob": -87.209, "train_positive_token_accuracy": 0.0725, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.5026, "epoch": 1.2821670428893905, "grad_norm": 15.093138694763184, "learning_rate": 8.652779978506103e-06, "lm_loss": 5.8232, "loss": 1.577, "step": 568, "text_contrastive_loss": 0.9841, "train_positive_log_prob": -87.1005, "train_positive_token_accuracy": 0.0739, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.4428, "epoch": 1.2844243792325056, "grad_norm": 14.957511901855469, "learning_rate": 8.647821757348202e-06, "lm_loss": 5.8571, "loss": 1.3795, "step": 569, "text_contrastive_loss": 0.7018, "train_positive_log_prob": -88.7241, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.5481, "epoch": 1.2866817155756207, "grad_norm": 13.965394020080566, "learning_rate": 8.642855855195394e-06, "lm_loss": 5.7554, "loss": 1.5387, "step": 570, "text_contrastive_loss": 0.83, "train_positive_log_prob": -85.7922, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.4729, "epoch": 1.2889390519187358, "grad_norm": 13.30693244934082, "learning_rate": 8.637882282504075e-06, "lm_loss": 5.8883, "loss": 1.4739, "step": 571, "text_contrastive_loss": 0.8244, "train_positive_log_prob": -89.8679, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.5084, "epoch": 1.291196388261851, "grad_norm": 15.2211332321167, "learning_rate": 8.632901049746793e-06, "lm_loss": 5.7314, "loss": 1.6142, "step": 572, "text_contrastive_loss": 1.0654, "train_positive_log_prob": -84.6486, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.5172, "epoch": 1.2934537246049662, "grad_norm": 16.37986183166504, "learning_rate": 8.627912167412222e-06, "lm_loss": 5.6582, "loss": 1.5207, "step": 573, "text_contrastive_loss": 0.8753, "train_positive_log_prob": -84.4503, "train_positive_token_accuracy": 0.0845, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4995, "epoch": 1.2957110609480813, "grad_norm": 16.388891220092773, "learning_rate": 8.622915646005152e-06, "lm_loss": 5.7194, "loss": 1.5174, "step": 574, "text_contrastive_loss": 0.8919, "train_positive_log_prob": -85.2107, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.414, "epoch": 1.2979683972911964, "grad_norm": 14.741597175598145, "learning_rate": 8.617911496046446e-06, "lm_loss": 5.7209, "loss": 1.3457, "step": 575, "text_contrastive_loss": 0.7192, "train_positive_log_prob": -85.7448, "train_positive_token_accuracy": 0.0753, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.4004, "epoch": 1.3002257336343115, "grad_norm": 13.857163429260254, "learning_rate": 8.612899728073039e-06, "lm_loss": 5.818, "loss": 1.3764, "step": 576, "text_contrastive_loss": 0.7884, "train_positive_log_prob": -88.0437, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.458, "epoch": 1.3024830699774266, "grad_norm": 14.838714599609375, "learning_rate": 8.607880352637905e-06, "lm_loss": 5.7136, "loss": 1.3895, "step": 577, "text_contrastive_loss": 0.7202, "train_positive_log_prob": -82.3788, "train_positive_token_accuracy": 0.087, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.467, "epoch": 1.304740406320542, "grad_norm": 16.11862564086914, "learning_rate": 8.602853380310033e-06, "lm_loss": 5.7059, "loss": 1.4292, "step": 578, "text_contrastive_loss": 0.7831, "train_positive_log_prob": -83.1163, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.5316, "epoch": 1.3069977426636568, "grad_norm": 16.140466690063477, "learning_rate": 8.59781882167441e-06, "lm_loss": 5.7495, "loss": 1.5035, "step": 579, "text_contrastive_loss": 0.7939, "train_positive_log_prob": -85.2785, "train_positive_token_accuracy": 0.083, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.4612, "epoch": 1.309255079006772, "grad_norm": 14.843207359313965, "learning_rate": 8.592776687332003e-06, "lm_loss": 5.8023, "loss": 1.5381, "step": 580, "text_contrastive_loss": 0.9934, "train_positive_log_prob": -85.1521, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.4867, "epoch": 1.3115124153498872, "grad_norm": 14.938104629516602, "learning_rate": 8.58772698789972e-06, "lm_loss": 5.7041, "loss": 1.4913, "step": 581, "text_contrastive_loss": 0.8684, "train_positive_log_prob": -83.6713, "train_positive_token_accuracy": 0.0858, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4529, "epoch": 1.3137697516930023, "grad_norm": 14.714336395263672, "learning_rate": 8.582669734010407e-06, "lm_loss": 5.6696, "loss": 1.4519, "step": 582, "text_contrastive_loss": 0.8641, "train_positive_log_prob": -83.7359, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.4953, "epoch": 1.3160270880361173, "grad_norm": 14.955601692199707, "learning_rate": 8.577604936312813e-06, "lm_loss": 5.6612, "loss": 1.4908, "step": 583, "text_contrastive_loss": 0.8588, "train_positive_log_prob": -82.7071, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.5167, "epoch": 1.3182844243792324, "grad_norm": 16.038482666015625, "learning_rate": 8.572532605471572e-06, "lm_loss": 5.759, "loss": 1.5626, "step": 584, "text_contrastive_loss": 0.94, "train_positive_log_prob": -85.9355, "train_positive_token_accuracy": 0.0728, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.5387, "epoch": 1.3205417607223477, "grad_norm": 14.877217292785645, "learning_rate": 8.567452752167183e-06, "lm_loss": 5.6663, "loss": 1.5811, "step": 585, "text_contrastive_loss": 0.9515, "train_positive_log_prob": -83.557, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4369, "epoch": 1.3227990970654628, "grad_norm": 14.755962371826172, "learning_rate": 8.562365387095977e-06, "lm_loss": 5.6585, "loss": 1.4837, "step": 586, "text_contrastive_loss": 0.9619, "train_positive_log_prob": -82.978, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.4224, "epoch": 1.325056433408578, "grad_norm": 14.569847106933594, "learning_rate": 8.557270520970111e-06, "lm_loss": 5.6073, "loss": 1.3786, "step": 587, "text_contrastive_loss": 0.7909, "train_positive_log_prob": -84.6998, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.4036, "epoch": 1.327313769751693, "grad_norm": 13.44758129119873, "learning_rate": 8.552168164517532e-06, "lm_loss": 5.6238, "loss": 1.3288, "step": 588, "text_contrastive_loss": 0.7257, "train_positive_log_prob": -84.845, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.5184, "epoch": 1.329571106094808, "grad_norm": 13.879511833190918, "learning_rate": 8.547058328481959e-06, "lm_loss": 5.739, "loss": 1.5182, "step": 589, "text_contrastive_loss": 0.8516, "train_positive_log_prob": -83.8685, "train_positive_token_accuracy": 0.0695, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.4487, "epoch": 1.3318284424379232, "grad_norm": 13.161958694458008, "learning_rate": 8.54194102362286e-06, "lm_loss": 5.6756, "loss": 1.4122, "step": 590, "text_contrastive_loss": 0.7918, "train_positive_log_prob": -84.561, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0277 }, { "contrastive_loss": 0.5622, "epoch": 1.3340857787810383, "grad_norm": 15.899773597717285, "learning_rate": 8.536816260715433e-06, "lm_loss": 5.819, "loss": 1.6092, "step": 591, "text_contrastive_loss": 0.9302, "train_positive_log_prob": -86.9451, "train_positive_token_accuracy": 0.0719, "train_positive_token_prob": 0.0272 }, { "contrastive_loss": 0.5415, "epoch": 1.3363431151241536, "grad_norm": 14.699732780456543, "learning_rate": 8.531684050550575e-06, "lm_loss": 5.6373, "loss": 1.574, "step": 592, "text_contrastive_loss": 0.9375, "train_positive_log_prob": -82.4896, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.436, "epoch": 1.3386004514672687, "grad_norm": 13.371824264526367, "learning_rate": 8.526544403934868e-06, "lm_loss": 5.7423, "loss": 1.4383, "step": 593, "text_contrastive_loss": 0.8561, "train_positive_log_prob": -85.6432, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.4243, "epoch": 1.3408577878103838, "grad_norm": 12.91390323638916, "learning_rate": 8.521397331690551e-06, "lm_loss": 5.6372, "loss": 1.3804, "step": 594, "text_contrastive_loss": 0.7848, "train_positive_log_prob": -83.7875, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.4223, "epoch": 1.3431151241534989, "grad_norm": 14.641847610473633, "learning_rate": 8.516242844655498e-06, "lm_loss": 5.7631, "loss": 1.368, "step": 595, "text_contrastive_loss": 0.7388, "train_positive_log_prob": -84.9133, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.6231, "epoch": 1.345372460496614, "grad_norm": 18.55071258544922, "learning_rate": 8.5110809536832e-06, "lm_loss": 5.8372, "loss": 1.6618, "step": 596, "text_contrastive_loss": 0.9099, "train_positive_log_prob": -86.3248, "train_positive_token_accuracy": 0.0711, "train_positive_token_prob": 0.0271 }, { "contrastive_loss": 0.5289, "epoch": 1.347629796839729, "grad_norm": 17.197635650634766, "learning_rate": 8.50591166964273e-06, "lm_loss": 5.804, "loss": 1.5507, "step": 597, "text_contrastive_loss": 0.8828, "train_positive_log_prob": -86.7005, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.5315, "epoch": 1.3498871331828441, "grad_norm": 12.700179100036621, "learning_rate": 8.500735003418734e-06, "lm_loss": 5.6742, "loss": 1.5912, "step": 598, "text_contrastive_loss": 0.9845, "train_positive_log_prob": -82.3024, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.5325, "epoch": 1.3521444695259595, "grad_norm": 15.083609580993652, "learning_rate": 8.495550965911403e-06, "lm_loss": 5.6869, "loss": 1.6384, "step": 599, "text_contrastive_loss": 1.0745, "train_positive_log_prob": -85.2988, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.5557, "epoch": 1.3544018058690745, "grad_norm": 16.89891815185547, "learning_rate": 8.490359568036446e-06, "lm_loss": 5.7464, "loss": 1.6062, "step": 600, "text_contrastive_loss": 0.9517, "train_positive_log_prob": -86.6905, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.5086, "epoch": 1.3566591422121896, "grad_norm": 14.070127487182617, "learning_rate": 8.485160820725073e-06, "lm_loss": 5.834, "loss": 1.4916, "step": 601, "text_contrastive_loss": 0.7992, "train_positive_log_prob": -88.8102, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.492, "epoch": 1.3589164785553047, "grad_norm": 15.26427936553955, "learning_rate": 8.479954734923967e-06, "lm_loss": 5.6653, "loss": 1.458, "step": 602, "text_contrastive_loss": 0.799, "train_positive_log_prob": -83.984, "train_positive_token_accuracy": 0.087, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.4101, "epoch": 1.3611738148984198, "grad_norm": 13.670388221740723, "learning_rate": 8.474741321595263e-06, "lm_loss": 5.7464, "loss": 1.3817, "step": 603, "text_contrastive_loss": 0.7938, "train_positive_log_prob": -84.6457, "train_positive_token_accuracy": 0.0814, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.5669, "epoch": 1.363431151241535, "grad_norm": 16.399883270263672, "learning_rate": 8.46952059171653e-06, "lm_loss": 5.7989, "loss": 1.5579, "step": 604, "text_contrastive_loss": 0.8222, "train_positive_log_prob": -87.2108, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.5052, "epoch": 1.36568848758465, "grad_norm": 14.55126667022705, "learning_rate": 8.464292556280734e-06, "lm_loss": 5.6811, "loss": 1.4282, "step": 605, "text_contrastive_loss": 0.7097, "train_positive_log_prob": -83.2894, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4126, "epoch": 1.3679458239277653, "grad_norm": 13.797871589660645, "learning_rate": 8.459057226296232e-06, "lm_loss": 5.6525, "loss": 1.3848, "step": 606, "text_contrastive_loss": 0.8138, "train_positive_log_prob": -83.4893, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4463, "epoch": 1.3702031602708804, "grad_norm": 14.512334823608398, "learning_rate": 8.453814612786736e-06, "lm_loss": 5.7283, "loss": 1.5064, "step": 607, "text_contrastive_loss": 0.9745, "train_positive_log_prob": -86.4649, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4708, "epoch": 1.3724604966139955, "grad_norm": 15.86633586883545, "learning_rate": 8.4485647267913e-06, "lm_loss": 5.7088, "loss": 1.4663, "step": 608, "text_contrastive_loss": 0.8493, "train_positive_log_prob": -85.651, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.4696, "epoch": 1.3747178329571106, "grad_norm": 14.49863338470459, "learning_rate": 8.443307579364282e-06, "lm_loss": 5.7355, "loss": 1.4756, "step": 609, "text_contrastive_loss": 0.8649, "train_positive_log_prob": -86.3931, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.5206, "epoch": 1.3769751693002257, "grad_norm": 17.275390625, "learning_rate": 8.43804318157534e-06, "lm_loss": 5.742, "loss": 1.5092, "step": 610, "text_contrastive_loss": 0.8289, "train_positive_log_prob": -84.9649, "train_positive_token_accuracy": 0.0697, "train_positive_token_prob": 0.0277 }, { "contrastive_loss": 0.5528, "epoch": 1.379232505643341, "grad_norm": 16.464248657226562, "learning_rate": 8.432771544509395e-06, "lm_loss": 5.8354, "loss": 1.6296, "step": 611, "text_contrastive_loss": 0.9863, "train_positive_log_prob": -89.8798, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.431, "epoch": 1.3814898419864559, "grad_norm": 16.348220825195312, "learning_rate": 8.427492679266605e-06, "lm_loss": 5.7165, "loss": 1.4121, "step": 612, "text_contrastive_loss": 0.8189, "train_positive_log_prob": -86.3244, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.5288, "epoch": 1.3837471783295712, "grad_norm": 14.306358337402344, "learning_rate": 8.422206596962357e-06, "lm_loss": 5.653, "loss": 1.4818, "step": 613, "text_contrastive_loss": 0.7755, "train_positive_log_prob": -80.8444, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.6173, "epoch": 1.3860045146726863, "grad_norm": 17.739561080932617, "learning_rate": 8.416913308727229e-06, "lm_loss": 5.6375, "loss": 1.5781, "step": 614, "text_contrastive_loss": 0.794, "train_positive_log_prob": -83.7935, "train_positive_token_accuracy": 0.0883, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4479, "epoch": 1.3882618510158014, "grad_norm": 15.596267700195312, "learning_rate": 8.411612825706976e-06, "lm_loss": 5.6922, "loss": 1.4017, "step": 615, "text_contrastive_loss": 0.7692, "train_positive_log_prob": -84.7406, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.5118, "epoch": 1.3905191873589164, "grad_norm": 14.741986274719238, "learning_rate": 8.4063051590625e-06, "lm_loss": 5.6216, "loss": 1.53, "step": 616, "text_contrastive_loss": 0.9121, "train_positive_log_prob": -84.3669, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3836, "epoch": 1.3927765237020315, "grad_norm": 13.403180122375488, "learning_rate": 8.400990319969829e-06, "lm_loss": 5.609, "loss": 1.3193, "step": 617, "text_contrastive_loss": 0.7495, "train_positive_log_prob": -81.4905, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.5574, "epoch": 1.3950338600451468, "grad_norm": 14.65643310546875, "learning_rate": 8.395668319620092e-06, "lm_loss": 5.6239, "loss": 1.5532, "step": 618, "text_contrastive_loss": 0.8669, "train_positive_log_prob": -82.0509, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.4803, "epoch": 1.3972911963882617, "grad_norm": 15.410263061523438, "learning_rate": 8.390339169219504e-06, "lm_loss": 5.6461, "loss": 1.4675, "step": 619, "text_contrastive_loss": 0.8452, "train_positive_log_prob": -84.9576, "train_positive_token_accuracy": 0.0854, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.5147, "epoch": 1.399548532731377, "grad_norm": 15.741019248962402, "learning_rate": 8.385002879989328e-06, "lm_loss": 5.7148, "loss": 1.5343, "step": 620, "text_contrastive_loss": 0.8963, "train_positive_log_prob": -83.5083, "train_positive_token_accuracy": 0.0756, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.4724, "epoch": 1.4018058690744921, "grad_norm": 16.445375442504883, "learning_rate": 8.37965946316586e-06, "lm_loss": 5.7327, "loss": 1.4951, "step": 621, "text_contrastive_loss": 0.899, "train_positive_log_prob": -83.3037, "train_positive_token_accuracy": 0.0753, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.385, "epoch": 1.4040632054176072, "grad_norm": 13.678441047668457, "learning_rate": 8.37430893000041e-06, "lm_loss": 5.6494, "loss": 1.3745, "step": 622, "text_contrastive_loss": 0.8491, "train_positive_log_prob": -83.9212, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.5289, "epoch": 1.4063205417607223, "grad_norm": 16.341611862182617, "learning_rate": 8.368951291759264e-06, "lm_loss": 5.6466, "loss": 1.5547, "step": 623, "text_contrastive_loss": 0.9223, "train_positive_log_prob": -83.3343, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.6226, "epoch": 1.4085778781038374, "grad_norm": 16.759748458862305, "learning_rate": 8.363586559723675e-06, "lm_loss": 5.6907, "loss": 1.6319, "step": 624, "text_contrastive_loss": 0.8803, "train_positive_log_prob": -83.9181, "train_positive_token_accuracy": 0.0646, "train_positive_token_prob": 0.0273 }, { "contrastive_loss": 0.4707, "epoch": 1.4108352144469527, "grad_norm": 15.580821990966797, "learning_rate": 8.35821474518983e-06, "lm_loss": 5.6648, "loss": 1.4457, "step": 625, "text_contrastive_loss": 0.817, "train_positive_log_prob": -83.7316, "train_positive_token_accuracy": 0.084, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.4396, "epoch": 1.4130925507900678, "grad_norm": 13.662397384643555, "learning_rate": 8.352835859468829e-06, "lm_loss": 5.6666, "loss": 1.4652, "step": 626, "text_contrastive_loss": 0.9179, "train_positive_log_prob": -82.5654, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.471, "epoch": 1.4153498871331829, "grad_norm": 15.736557960510254, "learning_rate": 8.347449913886662e-06, "lm_loss": 5.5869, "loss": 1.5001, "step": 627, "text_contrastive_loss": 0.941, "train_positive_log_prob": -82.6043, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4888, "epoch": 1.417607223476298, "grad_norm": 12.857428550720215, "learning_rate": 8.34205691978419e-06, "lm_loss": 5.6094, "loss": 1.5109, "step": 628, "text_contrastive_loss": 0.9223, "train_positive_log_prob": -84.2689, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4558, "epoch": 1.419864559819413, "grad_norm": 15.458051681518555, "learning_rate": 8.336656888517103e-06, "lm_loss": 5.6711, "loss": 1.4628, "step": 629, "text_contrastive_loss": 0.8797, "train_positive_log_prob": -84.7394, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.4538, "epoch": 1.4221218961625282, "grad_norm": 13.901751518249512, "learning_rate": 8.331249831455921e-06, "lm_loss": 5.7476, "loss": 1.4583, "step": 630, "text_contrastive_loss": 0.8595, "train_positive_log_prob": -84.4216, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.3812, "epoch": 1.4243792325056432, "grad_norm": 11.676010131835938, "learning_rate": 8.325835759985951e-06, "lm_loss": 5.7537, "loss": 1.3213, "step": 631, "text_contrastive_loss": 0.7296, "train_positive_log_prob": -85.0485, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.4459, "epoch": 1.4266365688487586, "grad_norm": 14.951569557189941, "learning_rate": 8.320414685507272e-06, "lm_loss": 5.7978, "loss": 1.3755, "step": 632, "text_contrastive_loss": 0.6995, "train_positive_log_prob": -88.2355, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.5285, "epoch": 1.4288939051918736, "grad_norm": 15.13301944732666, "learning_rate": 8.31498661943471e-06, "lm_loss": 5.7731, "loss": 1.5599, "step": 633, "text_contrastive_loss": 0.9081, "train_positive_log_prob": -86.3685, "train_positive_token_accuracy": 0.0708, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.4824, "epoch": 1.4311512415349887, "grad_norm": 14.152031898498535, "learning_rate": 8.309551573197809e-06, "lm_loss": 5.7039, "loss": 1.4794, "step": 634, "text_contrastive_loss": 0.8533, "train_positive_log_prob": -85.8893, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3754, "epoch": 1.4334085778781038, "grad_norm": 13.575399398803711, "learning_rate": 8.304109558240817e-06, "lm_loss": 5.7387, "loss": 1.2938, "step": 635, "text_contrastive_loss": 0.6891, "train_positive_log_prob": -85.2221, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.4933, "epoch": 1.435665914221219, "grad_norm": 14.869497299194336, "learning_rate": 8.298660586022646e-06, "lm_loss": 5.8215, "loss": 1.5802, "step": 636, "text_contrastive_loss": 1.0095, "train_positive_log_prob": -85.527, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.5576, "epoch": 1.437923250564334, "grad_norm": 18.26089859008789, "learning_rate": 8.293204668016867e-06, "lm_loss": 5.6906, "loss": 1.618, "step": 637, "text_contrastive_loss": 0.9827, "train_positive_log_prob": -86.403, "train_positive_token_accuracy": 0.0849, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.5191, "epoch": 1.440180586907449, "grad_norm": 15.64136028289795, "learning_rate": 8.287741815711674e-06, "lm_loss": 5.7477, "loss": 1.5562, "step": 638, "text_contrastive_loss": 0.9246, "train_positive_log_prob": -85.3564, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.5803, "epoch": 1.4424379232505644, "grad_norm": 15.560050010681152, "learning_rate": 8.282272040609855e-06, "lm_loss": 5.7404, "loss": 1.5859, "step": 639, "text_contrastive_loss": 0.8631, "train_positive_log_prob": -85.7271, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.5483, "epoch": 1.4446952595936795, "grad_norm": 15.672173500061035, "learning_rate": 8.276795354228785e-06, "lm_loss": 5.6833, "loss": 1.5249, "step": 640, "text_contrastive_loss": 0.8164, "train_positive_log_prob": -84.3885, "train_positive_token_accuracy": 0.0829, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3893, "epoch": 1.4469525959367946, "grad_norm": 12.000456809997559, "learning_rate": 8.271311768100386e-06, "lm_loss": 5.5456, "loss": 1.3374, "step": 641, "text_contrastive_loss": 0.7871, "train_positive_log_prob": -82.8222, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.5464, "epoch": 1.4492099322799097, "grad_norm": 15.725830078125, "learning_rate": 8.26582129377111e-06, "lm_loss": 5.6961, "loss": 1.5786, "step": 642, "text_contrastive_loss": 0.9251, "train_positive_log_prob": -83.1883, "train_positive_token_accuracy": 0.0706, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.3856, "epoch": 1.4514672686230248, "grad_norm": 14.19225788116455, "learning_rate": 8.26032394280191e-06, "lm_loss": 5.7577, "loss": 1.357, "step": 643, "text_contrastive_loss": 0.7912, "train_positive_log_prob": -85.9421, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.583, "epoch": 1.45372460496614, "grad_norm": 14.988269805908203, "learning_rate": 8.254819726768224e-06, "lm_loss": 5.658, "loss": 1.6074, "step": 644, "text_contrastive_loss": 0.9173, "train_positive_log_prob": -84.5007, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.3982, "epoch": 1.455981941309255, "grad_norm": 14.792583465576172, "learning_rate": 8.249308657259943e-06, "lm_loss": 5.6463, "loss": 1.3618, "step": 645, "text_contrastive_loss": 0.798, "train_positive_log_prob": -84.7384, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4313, "epoch": 1.4582392776523703, "grad_norm": 13.597331047058105, "learning_rate": 8.243790745881389e-06, "lm_loss": 5.6629, "loss": 1.4241, "step": 646, "text_contrastive_loss": 0.8531, "train_positive_log_prob": -83.5331, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.5069, "epoch": 1.4604966139954854, "grad_norm": 15.051453590393066, "learning_rate": 8.238266004251284e-06, "lm_loss": 5.7407, "loss": 1.4731, "step": 647, "text_contrastive_loss": 0.7842, "train_positive_log_prob": -85.7958, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.466, "epoch": 1.4627539503386005, "grad_norm": 13.057682037353516, "learning_rate": 8.232734444002748e-06, "lm_loss": 5.7397, "loss": 1.446, "step": 648, "text_contrastive_loss": 0.812, "train_positive_log_prob": -85.3991, "train_positive_token_accuracy": 0.0851, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4597, "epoch": 1.4650112866817155, "grad_norm": 14.503249168395996, "learning_rate": 8.22719607678324e-06, "lm_loss": 5.713, "loss": 1.4275, "step": 649, "text_contrastive_loss": 0.793, "train_positive_log_prob": -84.7791, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4196, "epoch": 1.4672686230248306, "grad_norm": 12.781978607177734, "learning_rate": 8.221650914254566e-06, "lm_loss": 5.7031, "loss": 1.3435, "step": 650, "text_contrastive_loss": 0.7072, "train_positive_log_prob": -83.7659, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.3606, "epoch": 1.469525959367946, "grad_norm": 12.280264854431152, "learning_rate": 8.216098968092833e-06, "lm_loss": 5.652, "loss": 1.2835, "step": 651, "text_contrastive_loss": 0.7153, "train_positive_log_prob": -83.9411, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3972, "epoch": 1.4717832957110608, "grad_norm": 15.713282585144043, "learning_rate": 8.210540249988435e-06, "lm_loss": 5.6435, "loss": 1.2746, "step": 652, "text_contrastive_loss": 0.6261, "train_positive_log_prob": -84.1212, "train_positive_token_accuracy": 0.0888, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4787, "epoch": 1.4740406320541761, "grad_norm": 15.67564582824707, "learning_rate": 8.204974771646023e-06, "lm_loss": 5.6599, "loss": 1.453, "step": 653, "text_contrastive_loss": 0.8167, "train_positive_log_prob": -83.7458, "train_positive_token_accuracy": 0.0845, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.5185, "epoch": 1.4762979683972912, "grad_norm": 15.40292739868164, "learning_rate": 8.199402544784485e-06, "lm_loss": 5.6211, "loss": 1.6099, "step": 654, "text_contrastive_loss": 1.0584, "train_positive_log_prob": -82.9765, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.4908, "epoch": 1.4785553047404063, "grad_norm": 15.722895622253418, "learning_rate": 8.193823581136919e-06, "lm_loss": 5.7332, "loss": 1.559, "step": 655, "text_contrastive_loss": 0.9897, "train_positive_log_prob": -84.6532, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.4544, "epoch": 1.4808126410835214, "grad_norm": 14.068140029907227, "learning_rate": 8.188237892450603e-06, "lm_loss": 5.782, "loss": 1.4121, "step": 656, "text_contrastive_loss": 0.759, "train_positive_log_prob": -85.3028, "train_positive_token_accuracy": 0.0702, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.5044, "epoch": 1.4830699774266365, "grad_norm": 16.4137020111084, "learning_rate": 8.182645490486986e-06, "lm_loss": 5.6261, "loss": 1.4556, "step": 657, "text_contrastive_loss": 0.7772, "train_positive_log_prob": -83.9315, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.4391, "epoch": 1.4853273137697518, "grad_norm": 14.37668514251709, "learning_rate": 8.177046387021641e-06, "lm_loss": 5.6918, "loss": 1.4325, "step": 658, "text_contrastive_loss": 0.8484, "train_positive_log_prob": -84.3999, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.5499, "epoch": 1.487584650112867, "grad_norm": 16.686614990234375, "learning_rate": 8.17144059384426e-06, "lm_loss": 5.6319, "loss": 1.5167, "step": 659, "text_contrastive_loss": 0.8072, "train_positive_log_prob": -84.5463, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4126, "epoch": 1.489841986455982, "grad_norm": 12.877717971801758, "learning_rate": 8.165828122758615e-06, "lm_loss": 5.7432, "loss": 1.3588, "step": 660, "text_contrastive_loss": 0.7439, "train_positive_log_prob": -87.4941, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.6014, "epoch": 1.492099322799097, "grad_norm": 17.75623321533203, "learning_rate": 8.160208985582547e-06, "lm_loss": 5.6466, "loss": 1.6029, "step": 661, "text_contrastive_loss": 0.8738, "train_positive_log_prob": -82.8446, "train_positive_token_accuracy": 0.0716, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.3884, "epoch": 1.4943566591422122, "grad_norm": 13.010163307189941, "learning_rate": 8.154583194147929e-06, "lm_loss": 5.6319, "loss": 1.2631, "step": 662, "text_contrastive_loss": 0.623, "train_positive_log_prob": -80.9603, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4159, "epoch": 1.4966139954853273, "grad_norm": 14.1793851852417, "learning_rate": 8.148950760300642e-06, "lm_loss": 5.7532, "loss": 1.3051, "step": 663, "text_contrastive_loss": 0.6278, "train_positive_log_prob": -86.3863, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.5176, "epoch": 1.4988713318284423, "grad_norm": 16.44997215270996, "learning_rate": 8.14331169590056e-06, "lm_loss": 5.7242, "loss": 1.4979, "step": 664, "text_contrastive_loss": 0.8158, "train_positive_log_prob": -85.5964, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.3672, "epoch": 1.5011286681715577, "grad_norm": 14.054045677185059, "learning_rate": 8.137666012821514e-06, "lm_loss": 5.7167, "loss": 1.3639, "step": 665, "text_contrastive_loss": 0.8502, "train_positive_log_prob": -83.6982, "train_positive_token_accuracy": 0.0686, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.5241, "epoch": 1.5033860045146725, "grad_norm": 15.702958106994629, "learning_rate": 8.132013722951275e-06, "lm_loss": 5.6649, "loss": 1.4999, "step": 666, "text_contrastive_loss": 0.8186, "train_positive_log_prob": -83.3398, "train_positive_token_accuracy": 0.086, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4516, "epoch": 1.5056433408577878, "grad_norm": 16.065961837768555, "learning_rate": 8.12635483819152e-06, "lm_loss": 5.8018, "loss": 1.412, "step": 667, "text_contrastive_loss": 0.7605, "train_positive_log_prob": -85.0356, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.4771, "epoch": 1.507900677200903, "grad_norm": 16.118410110473633, "learning_rate": 8.12068937045782e-06, "lm_loss": 5.592, "loss": 1.4338, "step": 668, "text_contrastive_loss": 0.7951, "train_positive_log_prob": -83.6645, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.4767, "epoch": 1.510158013544018, "grad_norm": 14.855873107910156, "learning_rate": 8.115017331679602e-06, "lm_loss": 5.7021, "loss": 1.466, "step": 669, "text_contrastive_loss": 0.8381, "train_positive_log_prob": -84.6433, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.5037, "epoch": 1.5124153498871333, "grad_norm": 16.366140365600586, "learning_rate": 8.109338733800132e-06, "lm_loss": 5.6545, "loss": 1.5321, "step": 670, "text_contrastive_loss": 0.926, "train_positive_log_prob": -83.7774, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4368, "epoch": 1.5146726862302482, "grad_norm": 14.199953079223633, "learning_rate": 8.103653588776483e-06, "lm_loss": 5.573, "loss": 1.3232, "step": 671, "text_contrastive_loss": 0.6582, "train_positive_log_prob": -80.8225, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3749, "epoch": 1.5169300225733635, "grad_norm": 14.447689056396484, "learning_rate": 8.09796190857952e-06, "lm_loss": 5.6185, "loss": 1.3952, "step": 672, "text_contrastive_loss": 0.917, "train_positive_log_prob": -81.0253, "train_positive_token_accuracy": 0.0712, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.4518, "epoch": 1.5191873589164786, "grad_norm": 13.073445320129395, "learning_rate": 8.09226370519386e-06, "lm_loss": 5.5978, "loss": 1.3589, "step": 673, "text_contrastive_loss": 0.6947, "train_positive_log_prob": -85.5665, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4737, "epoch": 1.5214446952595937, "grad_norm": 15.316915512084961, "learning_rate": 8.08655899061787e-06, "lm_loss": 5.8387, "loss": 1.5077, "step": 674, "text_contrastive_loss": 0.9004, "train_positive_log_prob": -87.4698, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0275 }, { "contrastive_loss": 0.551, "epoch": 1.5237020316027088, "grad_norm": 15.95355224609375, "learning_rate": 8.080847776863609e-06, "lm_loss": 5.5784, "loss": 1.5882, "step": 675, "text_contrastive_loss": 0.9586, "train_positive_log_prob": -82.7395, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4212, "epoch": 1.5259593679458239, "grad_norm": 12.847784996032715, "learning_rate": 8.075130075956836e-06, "lm_loss": 5.6926, "loss": 1.3971, "step": 676, "text_contrastive_loss": 0.8133, "train_positive_log_prob": -84.1003, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.4108, "epoch": 1.5282167042889392, "grad_norm": 12.959222793579102, "learning_rate": 8.069405899936961e-06, "lm_loss": 5.6467, "loss": 1.3945, "step": 677, "text_contrastive_loss": 0.838, "train_positive_log_prob": -84.1897, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.4695, "epoch": 1.530474040632054, "grad_norm": 14.164462089538574, "learning_rate": 8.06367526085703e-06, "lm_loss": 5.6998, "loss": 1.4137, "step": 678, "text_contrastive_loss": 0.7485, "train_positive_log_prob": -83.6287, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.4206, "epoch": 1.5327313769751694, "grad_norm": 13.234776496887207, "learning_rate": 8.057938170783704e-06, "lm_loss": 5.6494, "loss": 1.3521, "step": 679, "text_contrastive_loss": 0.7333, "train_positive_log_prob": -83.8824, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.4758, "epoch": 1.5349887133182845, "grad_norm": 14.323420524597168, "learning_rate": 8.052194641797217e-06, "lm_loss": 5.8014, "loss": 1.4548, "step": 680, "text_contrastive_loss": 0.7976, "train_positive_log_prob": -85.9592, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.4938, "epoch": 1.5372460496613995, "grad_norm": 16.471073150634766, "learning_rate": 8.046444685991369e-06, "lm_loss": 5.6084, "loss": 1.4596, "step": 681, "text_contrastive_loss": 0.8099, "train_positive_log_prob": -83.9705, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.4681, "epoch": 1.5395033860045146, "grad_norm": 17.488418579101562, "learning_rate": 8.040688315473489e-06, "lm_loss": 5.5904, "loss": 1.4683, "step": 682, "text_contrastive_loss": 0.8823, "train_positive_log_prob": -84.3386, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.5798, "epoch": 1.5417607223476297, "grad_norm": 16.649585723876953, "learning_rate": 8.034925542364412e-06, "lm_loss": 5.7301, "loss": 1.589, "step": 683, "text_contrastive_loss": 0.8725, "train_positive_log_prob": -85.1206, "train_positive_token_accuracy": 0.0686, "train_positive_token_prob": 0.0275 }, { "contrastive_loss": 0.4264, "epoch": 1.544018058690745, "grad_norm": 13.816652297973633, "learning_rate": 8.029156378798459e-06, "lm_loss": 5.7822, "loss": 1.4185, "step": 684, "text_contrastive_loss": 0.8277, "train_positive_log_prob": -85.9443, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.527, "epoch": 1.54627539503386, "grad_norm": 14.386276245117188, "learning_rate": 8.023380836923404e-06, "lm_loss": 5.7393, "loss": 1.54, "step": 685, "text_contrastive_loss": 0.8782, "train_positive_log_prob": -84.275, "train_positive_token_accuracy": 0.0782, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.477, "epoch": 1.5485327313769752, "grad_norm": 14.044321060180664, "learning_rate": 8.017598928900452e-06, "lm_loss": 5.5485, "loss": 1.4258, "step": 686, "text_contrastive_loss": 0.7879, "train_positive_log_prob": -81.8228, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.5478, "epoch": 1.5507900677200903, "grad_norm": 14.924182891845703, "learning_rate": 8.011810666904212e-06, "lm_loss": 5.7297, "loss": 1.5407, "step": 687, "text_contrastive_loss": 0.8399, "train_positive_log_prob": -85.5175, "train_positive_token_accuracy": 0.0741, "train_positive_token_prob": 0.0278 }, { "contrastive_loss": 0.4488, "epoch": 1.5530474040632054, "grad_norm": 15.37421703338623, "learning_rate": 8.006016063122672e-06, "lm_loss": 5.6321, "loss": 1.4148, "step": 688, "text_contrastive_loss": 0.8057, "train_positive_log_prob": -81.6189, "train_positive_token_accuracy": 0.0733, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.5096, "epoch": 1.5553047404063205, "grad_norm": 16.845478057861328, "learning_rate": 8.000215129757178e-06, "lm_loss": 5.6407, "loss": 1.5149, "step": 689, "text_contrastive_loss": 0.8824, "train_positive_log_prob": -83.489, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.3692, "epoch": 1.5575620767494356, "grad_norm": 13.815431594848633, "learning_rate": 7.994407879022397e-06, "lm_loss": 5.7419, "loss": 1.3044, "step": 690, "text_contrastive_loss": 0.7221, "train_positive_log_prob": -84.4463, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.5475, "epoch": 1.559819413092551, "grad_norm": 15.667496681213379, "learning_rate": 7.9885943231463e-06, "lm_loss": 5.7556, "loss": 1.6112, "step": 691, "text_contrastive_loss": 0.9763, "train_positive_log_prob": -84.1954, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0275 }, { "contrastive_loss": 0.4485, "epoch": 1.5620767494356658, "grad_norm": 13.958014488220215, "learning_rate": 7.98277447437014e-06, "lm_loss": 5.6992, "loss": 1.4239, "step": 692, "text_contrastive_loss": 0.8111, "train_positive_log_prob": -87.2531, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.5523, "epoch": 1.564334085778781, "grad_norm": 15.36923885345459, "learning_rate": 7.976948344948412e-06, "lm_loss": 5.775, "loss": 1.5736, "step": 693, "text_contrastive_loss": 0.8876, "train_positive_log_prob": -85.3385, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.5094, "epoch": 1.5665914221218962, "grad_norm": 13.96995735168457, "learning_rate": 7.971115947148842e-06, "lm_loss": 5.5939, "loss": 1.4705, "step": 694, "text_contrastive_loss": 0.8034, "train_positive_log_prob": -82.3289, "train_positive_token_accuracy": 0.0756, "train_positive_token_prob": 0.0275 }, { "contrastive_loss": 0.5592, "epoch": 1.5688487584650113, "grad_norm": 16.41417121887207, "learning_rate": 7.965277293252354e-06, "lm_loss": 5.5482, "loss": 1.5618, "step": 695, "text_contrastive_loss": 0.8955, "train_positive_log_prob": -82.7689, "train_positive_token_accuracy": 0.0712, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.5122, "epoch": 1.5711060948081266, "grad_norm": 15.281829833984375, "learning_rate": 7.95943239555304e-06, "lm_loss": 5.6175, "loss": 1.4699, "step": 696, "text_contrastive_loss": 0.7918, "train_positive_log_prob": -81.5889, "train_positive_token_accuracy": 0.0734, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.4928, "epoch": 1.5733634311512414, "grad_norm": 16.319522857666016, "learning_rate": 7.953581266358148e-06, "lm_loss": 5.6711, "loss": 1.4936, "step": 697, "text_contrastive_loss": 0.8676, "train_positive_log_prob": -84.4906, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.4439, "epoch": 1.5756207674943568, "grad_norm": 16.381437301635742, "learning_rate": 7.94772391798804e-06, "lm_loss": 5.5834, "loss": 1.2828, "step": 698, "text_contrastive_loss": 0.5611, "train_positive_log_prob": -82.8392, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.528, "epoch": 1.5778781038374716, "grad_norm": 15.742697715759277, "learning_rate": 7.941860362776176e-06, "lm_loss": 5.5959, "loss": 1.534, "step": 699, "text_contrastive_loss": 0.8929, "train_positive_log_prob": -78.8482, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.4935, "epoch": 1.580135440180587, "grad_norm": 14.445799827575684, "learning_rate": 7.935990613069087e-06, "lm_loss": 5.6173, "loss": 1.4992, "step": 700, "text_contrastive_loss": 0.888, "train_positive_log_prob": -81.5845, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.4314, "epoch": 1.582392776523702, "grad_norm": 14.703787803649902, "learning_rate": 7.930114681226341e-06, "lm_loss": 5.7236, "loss": 1.3482, "step": 701, "text_contrastive_loss": 0.6888, "train_positive_log_prob": -82.8079, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.4213, "epoch": 1.5846501128668171, "grad_norm": 16.77519416809082, "learning_rate": 7.924232579620533e-06, "lm_loss": 5.6974, "loss": 1.4344, "step": 702, "text_contrastive_loss": 0.8867, "train_positive_log_prob": -83.9277, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.3852, "epoch": 1.5869074492099324, "grad_norm": 13.184041023254395, "learning_rate": 7.91834432063724e-06, "lm_loss": 5.5581, "loss": 1.293, "step": 703, "text_contrastive_loss": 0.704, "train_positive_log_prob": -82.5643, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4773, "epoch": 1.5891647855530473, "grad_norm": 14.658939361572266, "learning_rate": 7.912449916675008e-06, "lm_loss": 5.5706, "loss": 1.4164, "step": 704, "text_contrastive_loss": 0.764, "train_positive_log_prob": -83.3206, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.5394, "epoch": 1.5914221218961626, "grad_norm": 15.919829368591309, "learning_rate": 7.90654938014533e-06, "lm_loss": 5.5969, "loss": 1.5109, "step": 705, "text_contrastive_loss": 0.8237, "train_positive_log_prob": -82.9038, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.4613, "epoch": 1.5936794582392777, "grad_norm": 14.476689338684082, "learning_rate": 7.900642723472596e-06, "lm_loss": 5.8075, "loss": 1.4326, "step": 706, "text_contrastive_loss": 0.7811, "train_positive_log_prob": -88.1512, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4404, "epoch": 1.5959367945823928, "grad_norm": 16.509441375732422, "learning_rate": 7.894729959094097e-06, "lm_loss": 5.7106, "loss": 1.4022, "step": 707, "text_contrastive_loss": 0.7815, "train_positive_log_prob": -83.4062, "train_positive_token_accuracy": 0.0752, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.5398, "epoch": 1.5981941309255079, "grad_norm": 15.218833923339844, "learning_rate": 7.888811099459974e-06, "lm_loss": 5.695, "loss": 1.5809, "step": 708, "text_contrastive_loss": 0.9432, "train_positive_log_prob": -85.3343, "train_positive_token_accuracy": 0.086, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4894, "epoch": 1.600451467268623, "grad_norm": 17.24501609802246, "learning_rate": 7.882886157033209e-06, "lm_loss": 5.6748, "loss": 1.4233, "step": 709, "text_contrastive_loss": 0.7329, "train_positive_log_prob": -84.2801, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4836, "epoch": 1.6027088036117383, "grad_norm": 15.260598182678223, "learning_rate": 7.876955144289594e-06, "lm_loss": 5.6995, "loss": 1.4934, "step": 710, "text_contrastive_loss": 0.8796, "train_positive_log_prob": -84.0514, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.5571, "epoch": 1.6049661399548532, "grad_norm": 17.923580169677734, "learning_rate": 7.871018073717693e-06, "lm_loss": 5.6762, "loss": 1.5727, "step": 711, "text_contrastive_loss": 0.8961, "train_positive_log_prob": -83.0791, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.5156, "epoch": 1.6072234762979685, "grad_norm": 16.633056640625, "learning_rate": 7.865074957818839e-06, "lm_loss": 5.8299, "loss": 1.4893, "step": 712, "text_contrastive_loss": 0.7814, "train_positive_log_prob": -86.8327, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4718, "epoch": 1.6094808126410836, "grad_norm": 15.569839477539062, "learning_rate": 7.859125809107082e-06, "lm_loss": 5.6042, "loss": 1.4569, "step": 713, "text_contrastive_loss": 0.8494, "train_positive_log_prob": -85.0284, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.5376, "epoch": 1.6117381489841986, "grad_norm": 17.901193618774414, "learning_rate": 7.853170640109182e-06, "lm_loss": 5.7166, "loss": 1.5815, "step": 714, "text_contrastive_loss": 0.9443, "train_positive_log_prob": -86.239, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4856, "epoch": 1.6139954853273137, "grad_norm": 14.7576322555542, "learning_rate": 7.847209463364574e-06, "lm_loss": 5.5901, "loss": 1.4724, "step": 715, "text_contrastive_loss": 0.8555, "train_positive_log_prob": -80.5992, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.512, "epoch": 1.6162528216704288, "grad_norm": 15.334814071655273, "learning_rate": 7.841242291425342e-06, "lm_loss": 5.5756, "loss": 1.5143, "step": 716, "text_contrastive_loss": 0.8895, "train_positive_log_prob": -81.8691, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.5843, "epoch": 1.6185101580135441, "grad_norm": 17.95580291748047, "learning_rate": 7.835269136856194e-06, "lm_loss": 5.6804, "loss": 1.6047, "step": 717, "text_contrastive_loss": 0.9047, "train_positive_log_prob": -82.1908, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.4107, "epoch": 1.620767494356659, "grad_norm": 15.698619842529297, "learning_rate": 7.829290012234438e-06, "lm_loss": 5.5673, "loss": 1.332, "step": 718, "text_contrastive_loss": 0.7291, "train_positive_log_prob": -81.011, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.4688, "epoch": 1.6230248306997743, "grad_norm": 16.278276443481445, "learning_rate": 7.823304930149949e-06, "lm_loss": 5.4734, "loss": 1.4136, "step": 719, "text_contrastive_loss": 0.795, "train_positive_log_prob": -79.1723, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.4137, "epoch": 1.6252821670428894, "grad_norm": 15.17823314666748, "learning_rate": 7.817313903205148e-06, "lm_loss": 5.6698, "loss": 1.3413, "step": 720, "text_contrastive_loss": 0.7212, "train_positive_log_prob": -85.289, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.443, "epoch": 1.6275395033860045, "grad_norm": 16.49557876586914, "learning_rate": 7.811316944014974e-06, "lm_loss": 5.6835, "loss": 1.4397, "step": 721, "text_contrastive_loss": 0.8566, "train_positive_log_prob": -83.278, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.4804, "epoch": 1.6297968397291196, "grad_norm": 16.320707321166992, "learning_rate": 7.805314065206857e-06, "lm_loss": 5.578, "loss": 1.4139, "step": 722, "text_contrastive_loss": 0.7515, "train_positive_log_prob": -81.5975, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4617, "epoch": 1.6320541760722347, "grad_norm": 16.48221206665039, "learning_rate": 7.799305279420691e-06, "lm_loss": 5.6306, "loss": 1.4554, "step": 723, "text_contrastive_loss": 0.8613, "train_positive_log_prob": -82.1051, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4122, "epoch": 1.63431151241535, "grad_norm": 14.674779891967773, "learning_rate": 7.793290599308807e-06, "lm_loss": 5.6626, "loss": 1.4416, "step": 724, "text_contrastive_loss": 0.9262, "train_positive_log_prob": -83.4107, "train_positive_token_accuracy": 0.0719, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.5598, "epoch": 1.6365688487584649, "grad_norm": 19.37317657470703, "learning_rate": 7.78727003753595e-06, "lm_loss": 5.586, "loss": 1.5882, "step": 725, "text_contrastive_loss": 0.9396, "train_positive_log_prob": -80.9306, "train_positive_token_accuracy": 0.0817, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4673, "epoch": 1.6388261851015802, "grad_norm": 16.270069122314453, "learning_rate": 7.78124360677925e-06, "lm_loss": 5.5472, "loss": 1.4907, "step": 726, "text_contrastive_loss": 0.9374, "train_positive_log_prob": -83.2954, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4777, "epoch": 1.6410835214446953, "grad_norm": 15.725870132446289, "learning_rate": 7.775211319728191e-06, "lm_loss": 5.6535, "loss": 1.4712, "step": 727, "text_contrastive_loss": 0.8564, "train_positive_log_prob": -83.6781, "train_positive_token_accuracy": 0.0726, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4651, "epoch": 1.6433408577878104, "grad_norm": 14.012334823608398, "learning_rate": 7.769173189084589e-06, "lm_loss": 5.6447, "loss": 1.4904, "step": 728, "text_contrastive_loss": 0.9217, "train_positive_log_prob": -83.985, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.5654, "epoch": 1.6455981941309257, "grad_norm": 16.746809005737305, "learning_rate": 7.763129227562568e-06, "lm_loss": 5.6947, "loss": 1.5699, "step": 729, "text_contrastive_loss": 0.8701, "train_positive_log_prob": -84.9315, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.4177, "epoch": 1.6478555304740405, "grad_norm": 11.539710998535156, "learning_rate": 7.757079447888529e-06, "lm_loss": 5.6066, "loss": 1.3854, "step": 730, "text_contrastive_loss": 0.8141, "train_positive_log_prob": -83.3069, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.4228, "epoch": 1.6501128668171559, "grad_norm": 15.049748420715332, "learning_rate": 7.75102386280112e-06, "lm_loss": 5.57, "loss": 1.421, "step": 731, "text_contrastive_loss": 0.8824, "train_positive_log_prob": -82.678, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.4932, "epoch": 1.6523702031602707, "grad_norm": 15.766206741333008, "learning_rate": 7.744962485051217e-06, "lm_loss": 5.6068, "loss": 1.4178, "step": 732, "text_contrastive_loss": 0.728, "train_positive_log_prob": -82.5477, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.5588, "epoch": 1.654627539503386, "grad_norm": 14.77518367767334, "learning_rate": 7.738895327401891e-06, "lm_loss": 5.6551, "loss": 1.5478, "step": 733, "text_contrastive_loss": 0.8469, "train_positive_log_prob": -84.3434, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.4651, "epoch": 1.6568848758465011, "grad_norm": 16.423664093017578, "learning_rate": 7.732822402628385e-06, "lm_loss": 5.5931, "loss": 1.4284, "step": 734, "text_contrastive_loss": 0.8079, "train_positive_log_prob": -81.4274, "train_positive_token_accuracy": 0.0856, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.5366, "epoch": 1.6591422121896162, "grad_norm": 14.504514694213867, "learning_rate": 7.726743723518087e-06, "lm_loss": 5.5241, "loss": 1.4753, "step": 735, "text_contrastive_loss": 0.7726, "train_positive_log_prob": -82.1937, "train_positive_token_accuracy": 0.0856, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.5463, "epoch": 1.6613995485327315, "grad_norm": 16.042978286743164, "learning_rate": 7.720659302870496e-06, "lm_loss": 5.6109, "loss": 1.561, "step": 736, "text_contrastive_loss": 0.9071, "train_positive_log_prob": -82.9788, "train_positive_token_accuracy": 0.0851, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.5426, "epoch": 1.6636568848758464, "grad_norm": 15.57763957977295, "learning_rate": 7.714569153497204e-06, "lm_loss": 5.7038, "loss": 1.5502, "step": 737, "text_contrastive_loss": 0.8744, "train_positive_log_prob": -83.8156, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.3659, "epoch": 1.6659142212189617, "grad_norm": 13.961685180664062, "learning_rate": 7.708473288221868e-06, "lm_loss": 5.7913, "loss": 1.3805, "step": 738, "text_contrastive_loss": 0.8711, "train_positive_log_prob": -85.8103, "train_positive_token_accuracy": 0.0714, "train_positive_token_prob": 0.0277 }, { "contrastive_loss": 0.4062, "epoch": 1.6681715575620768, "grad_norm": 14.212257385253906, "learning_rate": 7.702371719880178e-06, "lm_loss": 5.7684, "loss": 1.3621, "step": 739, "text_contrastive_loss": 0.7582, "train_positive_log_prob": -84.7057, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.5704, "epoch": 1.670428893905192, "grad_norm": 17.218656539916992, "learning_rate": 7.696264461319831e-06, "lm_loss": 5.5532, "loss": 1.5963, "step": 740, "text_contrastive_loss": 0.9412, "train_positive_log_prob": -80.7605, "train_positive_token_accuracy": 0.0741, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4466, "epoch": 1.672686230248307, "grad_norm": 14.9937744140625, "learning_rate": 7.69015152540051e-06, "lm_loss": 5.5905, "loss": 1.4574, "step": 741, "text_contrastive_loss": 0.9035, "train_positive_log_prob": -82.3747, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.4056, "epoch": 1.674943566591422, "grad_norm": 14.841171264648438, "learning_rate": 7.684032924993845e-06, "lm_loss": 5.6774, "loss": 1.3664, "step": 742, "text_contrastive_loss": 0.7862, "train_positive_log_prob": -83.3485, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.3857, "epoch": 1.6772009029345374, "grad_norm": 13.606510162353516, "learning_rate": 7.677908672983404e-06, "lm_loss": 5.6338, "loss": 1.3532, "step": 743, "text_contrastive_loss": 0.8083, "train_positive_log_prob": -83.7588, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4237, "epoch": 1.6794582392776523, "grad_norm": 14.244503021240234, "learning_rate": 7.671778782264647e-06, "lm_loss": 5.6147, "loss": 1.3599, "step": 744, "text_contrastive_loss": 0.7493, "train_positive_log_prob": -81.7582, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.535, "epoch": 1.6817155756207676, "grad_norm": 14.576723098754883, "learning_rate": 7.66564326574491e-06, "lm_loss": 5.7495, "loss": 1.5361, "step": 745, "text_contrastive_loss": 0.8524, "train_positive_log_prob": -84.6237, "train_positive_token_accuracy": 0.074, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.4851, "epoch": 1.6839729119638827, "grad_norm": 14.613896369934082, "learning_rate": 7.65950213634337e-06, "lm_loss": 5.713, "loss": 1.5287, "step": 746, "text_contrastive_loss": 0.9446, "train_positive_log_prob": -84.5161, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.3686, "epoch": 1.6862302483069977, "grad_norm": 15.065150260925293, "learning_rate": 7.653355406991034e-06, "lm_loss": 5.5927, "loss": 1.3653, "step": 747, "text_contrastive_loss": 0.8748, "train_positive_log_prob": -83.303, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4851, "epoch": 1.6884875846501128, "grad_norm": 17.209030151367188, "learning_rate": 7.64720309063069e-06, "lm_loss": 5.6565, "loss": 1.4543, "step": 748, "text_contrastive_loss": 0.8071, "train_positive_log_prob": -83.6971, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4535, "epoch": 1.690744920993228, "grad_norm": 12.747918128967285, "learning_rate": 7.641045200216896e-06, "lm_loss": 5.5735, "loss": 1.4081, "step": 749, "text_contrastive_loss": 0.7944, "train_positive_log_prob": -83.2573, "train_positive_token_accuracy": 0.0782, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.4117, "epoch": 1.6930022573363432, "grad_norm": 13.951990127563477, "learning_rate": 7.634881748715941e-06, "lm_loss": 5.8124, "loss": 1.3534, "step": 750, "text_contrastive_loss": 0.7211, "train_positive_log_prob": -85.0944, "train_positive_token_accuracy": 0.072, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.5052, "epoch": 1.695259593679458, "grad_norm": 16.40280532836914, "learning_rate": 7.628712749105831e-06, "lm_loss": 5.6827, "loss": 1.5382, "step": 751, "text_contrastive_loss": 0.9294, "train_positive_log_prob": -83.285, "train_positive_token_accuracy": 0.0712, "train_positive_token_prob": 0.0276 }, { "contrastive_loss": 0.3899, "epoch": 1.6975169300225734, "grad_norm": 13.680509567260742, "learning_rate": 7.622538214376248e-06, "lm_loss": 5.613, "loss": 1.2772, "step": 752, "text_contrastive_loss": 0.6521, "train_positive_log_prob": -83.7103, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.4275, "epoch": 1.6997742663656885, "grad_norm": 14.89928150177002, "learning_rate": 7.616358157528535e-06, "lm_loss": 5.5666, "loss": 1.3869, "step": 753, "text_contrastive_loss": 0.8054, "train_positive_log_prob": -81.299, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4492, "epoch": 1.7020316027088036, "grad_norm": 16.91277503967285, "learning_rate": 7.610172591575656e-06, "lm_loss": 5.7597, "loss": 1.392, "step": 754, "text_contrastive_loss": 0.7337, "train_positive_log_prob": -84.726, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0274 }, { "contrastive_loss": 0.4825, "epoch": 1.7042889390519187, "grad_norm": 15.468907356262207, "learning_rate": 7.60398152954218e-06, "lm_loss": 5.6553, "loss": 1.3819, "step": 755, "text_contrastive_loss": 0.6679, "train_positive_log_prob": -83.44, "train_positive_token_accuracy": 0.0694, "train_positive_token_prob": 0.0277 }, { "contrastive_loss": 0.4433, "epoch": 1.7065462753950338, "grad_norm": 14.060961723327637, "learning_rate": 7.597784984464248e-06, "lm_loss": 5.6453, "loss": 1.4166, "step": 756, "text_contrastive_loss": 0.8175, "train_positive_log_prob": -84.5482, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.5329, "epoch": 1.708803611738149, "grad_norm": 15.737454414367676, "learning_rate": 7.5915829693895435e-06, "lm_loss": 5.6437, "loss": 1.5176, "step": 757, "text_contrastive_loss": 0.8408, "train_positive_log_prob": -82.3182, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0277 }, { "contrastive_loss": 0.5537, "epoch": 1.711060948081264, "grad_norm": 15.397076606750488, "learning_rate": 7.585375497377271e-06, "lm_loss": 5.6778, "loss": 1.6266, "step": 758, "text_contrastive_loss": 1.0103, "train_positive_log_prob": -83.88, "train_positive_token_accuracy": 0.0738, "train_positive_token_prob": 0.0273 }, { "contrastive_loss": 0.455, "epoch": 1.7133182844243793, "grad_norm": 14.482380867004395, "learning_rate": 7.579162581498125e-06, "lm_loss": 5.6564, "loss": 1.461, "step": 759, "text_contrastive_loss": 0.8807, "train_positive_log_prob": -83.6313, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.5666, "epoch": 1.7155756207674944, "grad_norm": 16.678985595703125, "learning_rate": 7.572944234834261e-06, "lm_loss": 5.5595, "loss": 1.5437, "step": 760, "text_contrastive_loss": 0.8424, "train_positive_log_prob": -81.9402, "train_positive_token_accuracy": 0.0742, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.5585, "epoch": 1.7178329571106095, "grad_norm": 14.428262710571289, "learning_rate": 7.5667204704792706e-06, "lm_loss": 5.5821, "loss": 1.5035, "step": 761, "text_contrastive_loss": 0.7737, "train_positive_log_prob": -83.0824, "train_positive_token_accuracy": 0.0814, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.5673, "epoch": 1.7200902934537246, "grad_norm": 15.505300521850586, "learning_rate": 7.5604913015381535e-06, "lm_loss": 5.585, "loss": 1.6184, "step": 762, "text_contrastive_loss": 0.9851, "train_positive_log_prob": -82.4666, "train_positive_token_accuracy": 0.0714, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4963, "epoch": 1.7223476297968396, "grad_norm": 14.312376976013184, "learning_rate": 7.554256741127291e-06, "lm_loss": 5.6173, "loss": 1.4956, "step": 763, "text_contrastive_loss": 0.8751, "train_positive_log_prob": -83.3581, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4345, "epoch": 1.724604966139955, "grad_norm": 14.42605209350586, "learning_rate": 7.548016802374412e-06, "lm_loss": 5.5431, "loss": 1.4118, "step": 764, "text_contrastive_loss": 0.8461, "train_positive_log_prob": -79.7033, "train_positive_token_accuracy": 0.0742, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.5725, "epoch": 1.7268623024830698, "grad_norm": 15.596787452697754, "learning_rate": 7.541771498418575e-06, "lm_loss": 5.714, "loss": 1.5954, "step": 765, "text_contrastive_loss": 0.9031, "train_positive_log_prob": -84.3706, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.4699, "epoch": 1.7291196388261851, "grad_norm": 14.061925888061523, "learning_rate": 7.535520842410136e-06, "lm_loss": 5.5871, "loss": 1.4654, "step": 766, "text_contrastive_loss": 0.8736, "train_positive_log_prob": -81.562, "train_positive_token_accuracy": 0.0736, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4901, "epoch": 1.7313769751693002, "grad_norm": 17.323240280151367, "learning_rate": 7.529264847510715e-06, "lm_loss": 5.6402, "loss": 1.4201, "step": 767, "text_contrastive_loss": 0.7319, "train_positive_log_prob": -84.026, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.5083, "epoch": 1.7336343115124153, "grad_norm": 14.857467651367188, "learning_rate": 7.52300352689318e-06, "lm_loss": 5.6372, "loss": 1.5528, "step": 768, "text_contrastive_loss": 0.9616, "train_positive_log_prob": -84.6456, "train_positive_token_accuracy": 0.0744, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.497, "epoch": 1.7358916478555306, "grad_norm": 16.907846450805664, "learning_rate": 7.516736893741611e-06, "lm_loss": 5.5738, "loss": 1.4939, "step": 769, "text_contrastive_loss": 0.8791, "train_positive_log_prob": -83.9804, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3947, "epoch": 1.7381489841986455, "grad_norm": 12.01798152923584, "learning_rate": 7.510464961251271e-06, "lm_loss": 5.6794, "loss": 1.2747, "step": 770, "text_contrastive_loss": 0.6241, "train_positive_log_prob": -84.013, "train_positive_token_accuracy": 0.0737, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.4547, "epoch": 1.7404063205417608, "grad_norm": 14.21453857421875, "learning_rate": 7.5041877426285856e-06, "lm_loss": 5.5642, "loss": 1.4135, "step": 771, "text_contrastive_loss": 0.8047, "train_positive_log_prob": -83.0057, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.5835, "epoch": 1.742663656884876, "grad_norm": 16.025463104248047, "learning_rate": 7.49790525109111e-06, "lm_loss": 5.5922, "loss": 1.6391, "step": 772, "text_contrastive_loss": 0.9926, "train_positive_log_prob": -83.6232, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3763, "epoch": 1.744920993227991, "grad_norm": 12.589165687561035, "learning_rate": 7.491617499867502e-06, "lm_loss": 5.4859, "loss": 1.2875, "step": 773, "text_contrastive_loss": 0.7251, "train_positive_log_prob": -81.8992, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3712, "epoch": 1.747178329571106, "grad_norm": 12.508708953857422, "learning_rate": 7.485324502197494e-06, "lm_loss": 5.6359, "loss": 1.2815, "step": 774, "text_contrastive_loss": 0.6935, "train_positive_log_prob": -84.3179, "train_positive_token_accuracy": 0.0856, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3641, "epoch": 1.7494356659142212, "grad_norm": 13.209696769714355, "learning_rate": 7.479026271331864e-06, "lm_loss": 5.5879, "loss": 1.2816, "step": 775, "text_contrastive_loss": 0.7173, "train_positive_log_prob": -81.8326, "train_positive_token_accuracy": 0.0843, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.546, "epoch": 1.7516930022573365, "grad_norm": 15.404215812683105, "learning_rate": 7.472722820532414e-06, "lm_loss": 5.5922, "loss": 1.5356, "step": 776, "text_contrastive_loss": 0.8608, "train_positive_log_prob": -82.8356, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.6744, "epoch": 1.7539503386004514, "grad_norm": 18.27569007873535, "learning_rate": 7.466414163071934e-06, "lm_loss": 5.5766, "loss": 1.6637, "step": 777, "text_contrastive_loss": 0.8633, "train_positive_log_prob": -82.8614, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.5493, "epoch": 1.7562076749435667, "grad_norm": 18.299306869506836, "learning_rate": 7.460100312234176e-06, "lm_loss": 5.6565, "loss": 1.5948, "step": 778, "text_contrastive_loss": 0.9596, "train_positive_log_prob": -84.4055, "train_positive_token_accuracy": 0.073, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.4558, "epoch": 1.7584650112866818, "grad_norm": 14.698132514953613, "learning_rate": 7.453781281313831e-06, "lm_loss": 5.5634, "loss": 1.3756, "step": 779, "text_contrastive_loss": 0.7269, "train_positive_log_prob": -81.147, "train_positive_token_accuracy": 0.0852, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.5006, "epoch": 1.7607223476297968, "grad_norm": 14.811718940734863, "learning_rate": 7.447457083616494e-06, "lm_loss": 5.5326, "loss": 1.4775, "step": 780, "text_contrastive_loss": 0.8473, "train_positive_log_prob": -82.9922, "train_positive_token_accuracy": 0.0831, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.4054, "epoch": 1.762979683972912, "grad_norm": 15.256193161010742, "learning_rate": 7.441127732458642e-06, "lm_loss": 5.5708, "loss": 1.379, "step": 781, "text_contrastive_loss": 0.8329, "train_positive_log_prob": -83.2141, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4579, "epoch": 1.765237020316027, "grad_norm": 15.612141609191895, "learning_rate": 7.434793241167601e-06, "lm_loss": 5.4878, "loss": 1.5079, "step": 782, "text_contrastive_loss": 1.0025, "train_positive_log_prob": -80.8577, "train_positive_token_accuracy": 0.0885, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.5026, "epoch": 1.7674943566591423, "grad_norm": 16.013240814208984, "learning_rate": 7.428453623081522e-06, "lm_loss": 5.5057, "loss": 1.5084, "step": 783, "text_contrastive_loss": 0.9105, "train_positive_log_prob": -79.1509, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.5301, "epoch": 1.7697516930022572, "grad_norm": 14.345297813415527, "learning_rate": 7.422108891549349e-06, "lm_loss": 5.5626, "loss": 1.5358, "step": 784, "text_contrastive_loss": 0.8988, "train_positive_log_prob": -83.0716, "train_positive_token_accuracy": 0.0842, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.4109, "epoch": 1.7720090293453725, "grad_norm": 14.167484283447266, "learning_rate": 7.415759059930799e-06, "lm_loss": 5.5699, "loss": 1.323, "step": 785, "text_contrastive_loss": 0.7102, "train_positive_log_prob": -80.807, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4247, "epoch": 1.7742663656884876, "grad_norm": 13.521806716918945, "learning_rate": 7.409404141596319e-06, "lm_loss": 5.6025, "loss": 1.3949, "step": 786, "text_contrastive_loss": 0.8197, "train_positive_log_prob": -81.7631, "train_positive_token_accuracy": 0.0736, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4207, "epoch": 1.7765237020316027, "grad_norm": 15.362735748291016, "learning_rate": 7.403044149927074e-06, "lm_loss": 5.4708, "loss": 1.3121, "step": 787, "text_contrastive_loss": 0.6886, "train_positive_log_prob": -78.3631, "train_positive_token_accuracy": 0.0725, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4382, "epoch": 1.7787810383747178, "grad_norm": 13.427018165588379, "learning_rate": 7.396679098314908e-06, "lm_loss": 5.5537, "loss": 1.3769, "step": 788, "text_contrastive_loss": 0.7667, "train_positive_log_prob": -82.6966, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.5195, "epoch": 1.7810383747178329, "grad_norm": 14.191274642944336, "learning_rate": 7.390309000162321e-06, "lm_loss": 5.6429, "loss": 1.5185, "step": 789, "text_contrastive_loss": 0.8693, "train_positive_log_prob": -84.3314, "train_positive_token_accuracy": 0.0729, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.4409, "epoch": 1.7832957110609482, "grad_norm": 15.111398696899414, "learning_rate": 7.383933868882438e-06, "lm_loss": 5.6692, "loss": 1.4199, "step": 790, "text_contrastive_loss": 0.8241, "train_positive_log_prob": -84.2383, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.405, "epoch": 1.785553047404063, "grad_norm": 12.903446197509766, "learning_rate": 7.377553717898983e-06, "lm_loss": 5.5162, "loss": 1.317, "step": 791, "text_contrastive_loss": 0.7208, "train_positive_log_prob": -80.6727, "train_positive_token_accuracy": 0.0847, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3485, "epoch": 1.7878103837471784, "grad_norm": 12.686439514160156, "learning_rate": 7.37116856064625e-06, "lm_loss": 5.676, "loss": 1.313, "step": 792, "text_contrastive_loss": 0.7937, "train_positive_log_prob": -83.7908, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4713, "epoch": 1.7900677200902935, "grad_norm": 14.899544715881348, "learning_rate": 7.364778410569071e-06, "lm_loss": 5.6578, "loss": 1.4345, "step": 793, "text_contrastive_loss": 0.7949, "train_positive_log_prob": -85.3269, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4433, "epoch": 1.7923250564334086, "grad_norm": 15.099254608154297, "learning_rate": 7.358383281122797e-06, "lm_loss": 5.6283, "loss": 1.4054, "step": 794, "text_contrastive_loss": 0.7986, "train_positive_log_prob": -81.9379, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.5166, "epoch": 1.7945823927765236, "grad_norm": 12.828969955444336, "learning_rate": 7.351983185773259e-06, "lm_loss": 5.5089, "loss": 1.5173, "step": 795, "text_contrastive_loss": 0.8996, "train_positive_log_prob": -81.3185, "train_positive_token_accuracy": 0.0831, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.5595, "epoch": 1.7968397291196387, "grad_norm": 17.080780029296875, "learning_rate": 7.345578137996745e-06, "lm_loss": 5.5889, "loss": 1.5754, "step": 796, "text_contrastive_loss": 0.9141, "train_positive_log_prob": -82.4799, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.5049, "epoch": 1.799097065462754, "grad_norm": 15.56982135772705, "learning_rate": 7.339168151279974e-06, "lm_loss": 5.6143, "loss": 1.5121, "step": 797, "text_contrastive_loss": 0.8917, "train_positive_log_prob": -84.216, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4489, "epoch": 1.801354401805869, "grad_norm": 15.048324584960938, "learning_rate": 7.332753239120061e-06, "lm_loss": 5.6385, "loss": 1.4327, "step": 798, "text_contrastive_loss": 0.8399, "train_positive_log_prob": -85.109, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.473, "epoch": 1.8036117381489842, "grad_norm": 14.806596755981445, "learning_rate": 7.326333415024494e-06, "lm_loss": 5.6085, "loss": 1.4863, "step": 799, "text_contrastive_loss": 0.9048, "train_positive_log_prob": -83.4921, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3592, "epoch": 1.8058690744920993, "grad_norm": 12.606931686401367, "learning_rate": 7.319908692511103e-06, "lm_loss": 5.6913, "loss": 1.3472, "step": 800, "text_contrastive_loss": 0.8378, "train_positive_log_prob": -87.3034, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4869, "epoch": 1.8081264108352144, "grad_norm": 14.769548416137695, "learning_rate": 7.313479085108033e-06, "lm_loss": 5.5339, "loss": 1.552, "step": 801, "text_contrastive_loss": 1.0234, "train_positive_log_prob": -81.4544, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.5564, "epoch": 1.8103837471783297, "grad_norm": 14.806395530700684, "learning_rate": 7.307044606353715e-06, "lm_loss": 5.5761, "loss": 1.5849, "step": 802, "text_contrastive_loss": 0.9418, "train_positive_log_prob": -81.4017, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.598, "epoch": 1.8126410835214446, "grad_norm": 16.165496826171875, "learning_rate": 7.300605269796839e-06, "lm_loss": 5.6073, "loss": 1.626, "step": 803, "text_contrastive_loss": 0.9346, "train_positive_log_prob": -83.693, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4908, "epoch": 1.81489841986456, "grad_norm": 18.01096534729004, "learning_rate": 7.2941610889963164e-06, "lm_loss": 5.5842, "loss": 1.5407, "step": 804, "text_contrastive_loss": 0.9828, "train_positive_log_prob": -83.4605, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.5103, "epoch": 1.8171557562076748, "grad_norm": 15.611786842346191, "learning_rate": 7.2877120775212685e-06, "lm_loss": 5.5133, "loss": 1.4826, "step": 805, "text_contrastive_loss": 0.8421, "train_positive_log_prob": -80.532, "train_positive_token_accuracy": 0.0818, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.4798, "epoch": 1.81941309255079, "grad_norm": 13.229434967041016, "learning_rate": 7.2812582489509844e-06, "lm_loss": 5.5485, "loss": 1.4481, "step": 806, "text_contrastive_loss": 0.8268, "train_positive_log_prob": -81.2095, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.468, "epoch": 1.8216704288939052, "grad_norm": 15.610215187072754, "learning_rate": 7.2747996168748915e-06, "lm_loss": 5.7483, "loss": 1.4664, "step": 807, "text_contrastive_loss": 0.8473, "train_positive_log_prob": -87.0547, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.3884, "epoch": 1.8239277652370203, "grad_norm": 13.929288864135742, "learning_rate": 7.26833619489254e-06, "lm_loss": 5.569, "loss": 1.2913, "step": 808, "text_contrastive_loss": 0.6921, "train_positive_log_prob": -82.9014, "train_positive_token_accuracy": 0.09, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.5016, "epoch": 1.8261851015801356, "grad_norm": 18.220733642578125, "learning_rate": 7.261867996613559e-06, "lm_loss": 5.6519, "loss": 1.6004, "step": 809, "text_contrastive_loss": 1.0672, "train_positive_log_prob": -85.3075, "train_positive_token_accuracy": 0.0735, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.3633, "epoch": 1.8284424379232505, "grad_norm": 12.820158004760742, "learning_rate": 7.255395035657639e-06, "lm_loss": 5.6493, "loss": 1.3667, "step": 810, "text_contrastive_loss": 0.8769, "train_positive_log_prob": -82.6875, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.3875, "epoch": 1.8306997742663658, "grad_norm": 14.121953964233398, "learning_rate": 7.2489173256544975e-06, "lm_loss": 5.5581, "loss": 1.3184, "step": 811, "text_contrastive_loss": 0.7502, "train_positive_log_prob": -83.6329, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4855, "epoch": 1.8329571106094809, "grad_norm": 14.91685676574707, "learning_rate": 7.242434880243851e-06, "lm_loss": 5.5543, "loss": 1.4778, "step": 812, "text_contrastive_loss": 0.8737, "train_positive_log_prob": -84.0102, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4148, "epoch": 1.835214446952596, "grad_norm": 13.67586898803711, "learning_rate": 7.235947713075389e-06, "lm_loss": 5.6382, "loss": 1.3529, "step": 813, "text_contrastive_loss": 0.7487, "train_positive_log_prob": -83.2335, "train_positive_token_accuracy": 0.0717, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.4714, "epoch": 1.837471783295711, "grad_norm": 15.072975158691406, "learning_rate": 7.229455837808741e-06, "lm_loss": 5.7478, "loss": 1.4768, "step": 814, "text_contrastive_loss": 0.8612, "train_positive_log_prob": -85.4531, "train_positive_token_accuracy": 0.0744, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.5182, "epoch": 1.8397291196388261, "grad_norm": 15.159923553466797, "learning_rate": 7.222959268113452e-06, "lm_loss": 5.5198, "loss": 1.5056, "step": 815, "text_contrastive_loss": 0.871, "train_positive_log_prob": -81.5024, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.476, "epoch": 1.8419864559819414, "grad_norm": 13.744406700134277, "learning_rate": 7.216458017668951e-06, "lm_loss": 5.6182, "loss": 1.4193, "step": 816, "text_contrastive_loss": 0.763, "train_positive_log_prob": -83.7656, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4769, "epoch": 1.8442437923250563, "grad_norm": 16.792842864990234, "learning_rate": 7.2099521001645225e-06, "lm_loss": 5.6844, "loss": 1.4495, "step": 817, "text_contrastive_loss": 0.8084, "train_positive_log_prob": -84.5348, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.5317, "epoch": 1.8465011286681716, "grad_norm": 15.967273712158203, "learning_rate": 7.20344152929928e-06, "lm_loss": 5.5861, "loss": 1.5331, "step": 818, "text_contrastive_loss": 0.8855, "train_positive_log_prob": -82.8577, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.3967, "epoch": 1.8487584650112867, "grad_norm": 13.10386848449707, "learning_rate": 7.19692631878213e-06, "lm_loss": 5.7143, "loss": 1.3304, "step": 819, "text_contrastive_loss": 0.7244, "train_positive_log_prob": -87.948, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4624, "epoch": 1.8510158013544018, "grad_norm": 14.461969375610352, "learning_rate": 7.190406482331757e-06, "lm_loss": 5.5348, "loss": 1.4178, "step": 820, "text_contrastive_loss": 0.8039, "train_positive_log_prob": -80.4716, "train_positive_token_accuracy": 0.0731, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.5617, "epoch": 1.853273137697517, "grad_norm": 18.000465393066406, "learning_rate": 7.183882033676579e-06, "lm_loss": 5.5423, "loss": 1.5223, "step": 821, "text_contrastive_loss": 0.8127, "train_positive_log_prob": -83.162, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.5078, "epoch": 1.855530474040632, "grad_norm": 15.518335342407227, "learning_rate": 7.177352986554729e-06, "lm_loss": 5.5988, "loss": 1.5518, "step": 822, "text_contrastive_loss": 0.968, "train_positive_log_prob": -84.3963, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.4374, "epoch": 1.8577878103837473, "grad_norm": 15.986565589904785, "learning_rate": 7.1708193547140205e-06, "lm_loss": 5.7263, "loss": 1.364, "step": 823, "text_contrastive_loss": 0.708, "train_positive_log_prob": -85.1994, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.5156, "epoch": 1.8600451467268622, "grad_norm": 17.1726131439209, "learning_rate": 7.164281151911923e-06, "lm_loss": 5.58, "loss": 1.4341, "step": 824, "text_contrastive_loss": 0.721, "train_positive_log_prob": -84.4152, "train_positive_token_accuracy": 0.0726, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.5864, "epoch": 1.8623024830699775, "grad_norm": 15.21026611328125, "learning_rate": 7.157738391915531e-06, "lm_loss": 5.5732, "loss": 1.631, "step": 825, "text_contrastive_loss": 0.9744, "train_positive_log_prob": -81.9442, "train_positive_token_accuracy": 0.0887, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3965, "epoch": 1.8645598194130926, "grad_norm": 13.081595420837402, "learning_rate": 7.151191088501531e-06, "lm_loss": 5.6041, "loss": 1.3544, "step": 826, "text_contrastive_loss": 0.795, "train_positive_log_prob": -84.1719, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.5501, "epoch": 1.8668171557562077, "grad_norm": 15.819600105285645, "learning_rate": 7.14463925545618e-06, "lm_loss": 5.6947, "loss": 1.5524, "step": 827, "text_contrastive_loss": 0.8656, "train_positive_log_prob": -84.9321, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.4349, "epoch": 1.8690744920993227, "grad_norm": 15.383347511291504, "learning_rate": 7.138082906575271e-06, "lm_loss": 5.5701, "loss": 1.4005, "step": 828, "text_contrastive_loss": 0.8173, "train_positive_log_prob": -84.0264, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.4265, "epoch": 1.8713318284424378, "grad_norm": 14.575815200805664, "learning_rate": 7.131522055664109e-06, "lm_loss": 5.6305, "loss": 1.4322, "step": 829, "text_contrastive_loss": 0.8852, "train_positive_log_prob": -82.6329, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3829, "epoch": 1.8735891647855532, "grad_norm": 13.55797004699707, "learning_rate": 7.124956716537471e-06, "lm_loss": 5.5966, "loss": 1.3409, "step": 830, "text_contrastive_loss": 0.7966, "train_positive_log_prob": -83.8393, "train_positive_token_accuracy": 0.0866, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4603, "epoch": 1.875846501128668, "grad_norm": 13.942564964294434, "learning_rate": 7.118386903019594e-06, "lm_loss": 5.6932, "loss": 1.4178, "step": 831, "text_contrastive_loss": 0.7764, "train_positive_log_prob": -81.8914, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4562, "epoch": 1.8781038374717833, "grad_norm": 13.51505184173584, "learning_rate": 7.111812628944132e-06, "lm_loss": 5.6058, "loss": 1.4571, "step": 832, "text_contrastive_loss": 0.8806, "train_positive_log_prob": -82.7086, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.5499, "epoch": 1.8803611738148984, "grad_norm": 16.279476165771484, "learning_rate": 7.105233908154128e-06, "lm_loss": 5.5627, "loss": 1.5165, "step": 833, "text_contrastive_loss": 0.8206, "train_positive_log_prob": -82.4866, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.5027, "epoch": 1.8826185101580135, "grad_norm": 16.14896583557129, "learning_rate": 7.098650754501994e-06, "lm_loss": 5.637, "loss": 1.5065, "step": 834, "text_contrastive_loss": 0.8802, "train_positive_log_prob": -83.8451, "train_positive_token_accuracy": 0.0814, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4518, "epoch": 1.8848758465011288, "grad_norm": 15.16618824005127, "learning_rate": 7.0920631818494745e-06, "lm_loss": 5.6372, "loss": 1.5074, "step": 835, "text_contrastive_loss": 0.9837, "train_positive_log_prob": -83.4141, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.4711, "epoch": 1.8871331828442437, "grad_norm": 15.736971855163574, "learning_rate": 7.085471204067616e-06, "lm_loss": 5.544, "loss": 1.4553, "step": 836, "text_contrastive_loss": 0.8596, "train_positive_log_prob": -82.9027, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.3976, "epoch": 1.889390519187359, "grad_norm": 14.282797813415527, "learning_rate": 7.078874835036742e-06, "lm_loss": 5.605, "loss": 1.3383, "step": 837, "text_contrastive_loss": 0.7603, "train_positive_log_prob": -83.8771, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.5501, "epoch": 1.8916478555304739, "grad_norm": 16.621746063232422, "learning_rate": 7.072274088646425e-06, "lm_loss": 5.6455, "loss": 1.4939, "step": 838, "text_contrastive_loss": 0.7583, "train_positive_log_prob": -83.645, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.488, "epoch": 1.8939051918735892, "grad_norm": 16.08866310119629, "learning_rate": 7.065668978795449e-06, "lm_loss": 5.6173, "loss": 1.5136, "step": 839, "text_contrastive_loss": 0.9277, "train_positive_log_prob": -82.9, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3812, "epoch": 1.8961625282167043, "grad_norm": 13.742374420166016, "learning_rate": 7.059059519391794e-06, "lm_loss": 5.5417, "loss": 1.2894, "step": 840, "text_contrastive_loss": 0.708, "train_positive_log_prob": -84.0539, "train_positive_token_accuracy": 0.0865, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.4737, "epoch": 1.8984198645598194, "grad_norm": 16.210573196411133, "learning_rate": 7.05244572435259e-06, "lm_loss": 5.5965, "loss": 1.4377, "step": 841, "text_contrastive_loss": 0.8088, "train_positive_log_prob": -82.3539, "train_positive_token_accuracy": 0.0719, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.4684, "epoch": 1.9006772009029347, "grad_norm": 15.303285598754883, "learning_rate": 7.045827607604103e-06, "lm_loss": 5.6008, "loss": 1.3846, "step": 842, "text_contrastive_loss": 0.7122, "train_positive_log_prob": -82.5049, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.4653, "epoch": 1.9029345372460496, "grad_norm": 14.521797180175781, "learning_rate": 7.039205183081694e-06, "lm_loss": 5.607, "loss": 1.4238, "step": 843, "text_contrastive_loss": 0.7956, "train_positive_log_prob": -83.471, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4137, "epoch": 1.9051918735891649, "grad_norm": 13.851727485656738, "learning_rate": 7.0325784647298e-06, "lm_loss": 5.7506, "loss": 1.4192, "step": 844, "text_contrastive_loss": 0.861, "train_positive_log_prob": -86.1233, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.5484, "epoch": 1.90744920993228, "grad_norm": 14.06857967376709, "learning_rate": 7.0259474665018915e-06, "lm_loss": 5.5755, "loss": 1.4758, "step": 845, "text_contrastive_loss": 0.7397, "train_positive_log_prob": -84.3952, "train_positive_token_accuracy": 0.0911, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.5151, "epoch": 1.909706546275395, "grad_norm": 14.030274391174316, "learning_rate": 7.019312202360457e-06, "lm_loss": 5.4148, "loss": 1.4335, "step": 846, "text_contrastive_loss": 0.7538, "train_positive_log_prob": -79.4206, "train_positive_token_accuracy": 0.0913, "train_positive_token_prob": 0.0328 }, { "contrastive_loss": 0.4719, "epoch": 1.9119638826185101, "grad_norm": 17.120309829711914, "learning_rate": 7.012672686276969e-06, "lm_loss": 5.519, "loss": 1.5222, "step": 847, "text_contrastive_loss": 0.9969, "train_positive_log_prob": -80.3922, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.4682, "epoch": 1.9142212189616252, "grad_norm": 14.32099437713623, "learning_rate": 7.006028932231847e-06, "lm_loss": 5.5321, "loss": 1.4433, "step": 848, "text_contrastive_loss": 0.8439, "train_positive_log_prob": -81.9454, "train_positive_token_accuracy": 0.0857, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.5206, "epoch": 1.9164785553047405, "grad_norm": 16.19694709777832, "learning_rate": 6.999380954214438e-06, "lm_loss": 5.4697, "loss": 1.4496, "step": 849, "text_contrastive_loss": 0.7641, "train_positive_log_prob": -78.7331, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.4355, "epoch": 1.9187358916478554, "grad_norm": 15.280251502990723, "learning_rate": 6.992728766222982e-06, "lm_loss": 5.5791, "loss": 1.3745, "step": 850, "text_contrastive_loss": 0.7621, "train_positive_log_prob": -83.1853, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4448, "epoch": 1.9209932279909707, "grad_norm": 15.748678207397461, "learning_rate": 6.9860723822645825e-06, "lm_loss": 5.6034, "loss": 1.404, "step": 851, "text_contrastive_loss": 0.7978, "train_positive_log_prob": -82.9328, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.4533, "epoch": 1.9232505643340858, "grad_norm": 13.725008010864258, "learning_rate": 6.979411816355183e-06, "lm_loss": 5.5198, "loss": 1.4633, "step": 852, "text_contrastive_loss": 0.9161, "train_positive_log_prob": -81.696, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.5777, "epoch": 1.925507900677201, "grad_norm": 16.754261016845703, "learning_rate": 6.972747082519526e-06, "lm_loss": 5.6656, "loss": 1.6488, "step": 853, "text_contrastive_loss": 1.009, "train_positive_log_prob": -85.5092, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4732, "epoch": 1.927765237020316, "grad_norm": 13.837919235229492, "learning_rate": 6.966078194791133e-06, "lm_loss": 5.4962, "loss": 1.3674, "step": 854, "text_contrastive_loss": 0.6891, "train_positive_log_prob": -80.7456, "train_positive_token_accuracy": 0.0782, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4928, "epoch": 1.930022573363431, "grad_norm": 14.32505989074707, "learning_rate": 6.959405167212278e-06, "lm_loss": 5.6566, "loss": 1.5324, "step": 855, "text_contrastive_loss": 0.9478, "train_positive_log_prob": -81.7352, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4668, "epoch": 1.9322799097065464, "grad_norm": 14.705467224121094, "learning_rate": 6.952728013833941e-06, "lm_loss": 5.6002, "loss": 1.4817, "step": 856, "text_contrastive_loss": 0.9097, "train_positive_log_prob": -84.2861, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.5499, "epoch": 1.9345372460496613, "grad_norm": 15.175885200500488, "learning_rate": 6.946046748715796e-06, "lm_loss": 5.5416, "loss": 1.5401, "step": 857, "text_contrastive_loss": 0.8722, "train_positive_log_prob": -81.0438, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.5026, "epoch": 1.9367945823927766, "grad_norm": 16.210250854492188, "learning_rate": 6.9393613859261755e-06, "lm_loss": 5.6146, "loss": 1.4351, "step": 858, "text_contrastive_loss": 0.7421, "train_positive_log_prob": -82.7325, "train_positive_token_accuracy": 0.0824, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4726, "epoch": 1.9390519187358917, "grad_norm": 14.95361328125, "learning_rate": 6.932671939542037e-06, "lm_loss": 5.5204, "loss": 1.4901, "step": 859, "text_contrastive_loss": 0.931, "train_positive_log_prob": -83.196, "train_positive_token_accuracy": 0.084, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.4611, "epoch": 1.9413092550790068, "grad_norm": 13.421514511108398, "learning_rate": 6.925978423648941e-06, "lm_loss": 5.4818, "loss": 1.4622, "step": 860, "text_contrastive_loss": 0.9058, "train_positive_log_prob": -80.0633, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.4955, "epoch": 1.9435665914221218, "grad_norm": 14.257747650146484, "learning_rate": 6.919280852341011e-06, "lm_loss": 5.4519, "loss": 1.4749, "step": 861, "text_contrastive_loss": 0.8685, "train_positive_log_prob": -79.9278, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.4035, "epoch": 1.945823927765237, "grad_norm": 12.491300582885742, "learning_rate": 6.912579239720913e-06, "lm_loss": 5.598, "loss": 1.3188, "step": 862, "text_contrastive_loss": 0.711, "train_positive_log_prob": -81.0553, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4694, "epoch": 1.9480812641083523, "grad_norm": 17.115652084350586, "learning_rate": 6.9058735998998224e-06, "lm_loss": 5.5599, "loss": 1.4206, "step": 863, "text_contrastive_loss": 0.7905, "train_positive_log_prob": -82.4229, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.5671, "epoch": 1.9503386004514671, "grad_norm": 13.943278312683105, "learning_rate": 6.899163946997396e-06, "lm_loss": 5.5436, "loss": 1.5007, "step": 864, "text_contrastive_loss": 0.7585, "train_positive_log_prob": -81.8299, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.5044, "epoch": 1.9525959367945824, "grad_norm": 15.068373680114746, "learning_rate": 6.892450295141737e-06, "lm_loss": 5.5476, "loss": 1.4091, "step": 865, "text_contrastive_loss": 0.6998, "train_positive_log_prob": -82.3679, "train_positive_token_accuracy": 0.0831, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4936, "epoch": 1.9548532731376975, "grad_norm": 13.565226554870605, "learning_rate": 6.885732658469374e-06, "lm_loss": 5.6471, "loss": 1.5469, "step": 866, "text_contrastive_loss": 0.9771, "train_positive_log_prob": -83.9096, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.421, "epoch": 1.9571106094808126, "grad_norm": 13.99299430847168, "learning_rate": 6.8790110511252195e-06, "lm_loss": 5.6158, "loss": 1.3815, "step": 867, "text_contrastive_loss": 0.7979, "train_positive_log_prob": -83.9282, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.5897, "epoch": 1.959367945823928, "grad_norm": 15.685070037841797, "learning_rate": 6.872285487262555e-06, "lm_loss": 5.5884, "loss": 1.6263, "step": 868, "text_contrastive_loss": 0.9556, "train_positive_log_prob": -82.8237, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.545, "epoch": 1.9616252821670428, "grad_norm": 15.854179382324219, "learning_rate": 6.865555981042983e-06, "lm_loss": 5.6359, "loss": 1.5263, "step": 869, "text_contrastive_loss": 0.8353, "train_positive_log_prob": -82.9802, "train_positive_token_accuracy": 0.0761, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4383, "epoch": 1.963882618510158, "grad_norm": 16.044401168823242, "learning_rate": 6.858822546636417e-06, "lm_loss": 5.6004, "loss": 1.4463, "step": 870, "text_contrastive_loss": 0.896, "train_positive_log_prob": -84.2292, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4094, "epoch": 1.966139954853273, "grad_norm": 13.492218971252441, "learning_rate": 6.852085198221035e-06, "lm_loss": 5.5522, "loss": 1.4249, "step": 871, "text_contrastive_loss": 0.9206, "train_positive_log_prob": -81.5495, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.5196, "epoch": 1.9683972911963883, "grad_norm": 15.734671592712402, "learning_rate": 6.845343949983258e-06, "lm_loss": 5.5699, "loss": 1.5554, "step": 872, "text_contrastive_loss": 0.9576, "train_positive_log_prob": -81.1526, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0281 }, { "contrastive_loss": 0.4282, "epoch": 1.9706546275395034, "grad_norm": 14.115796089172363, "learning_rate": 6.838598816117725e-06, "lm_loss": 5.6147, "loss": 1.4456, "step": 873, "text_contrastive_loss": 0.9118, "train_positive_log_prob": -84.8847, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.461, "epoch": 1.9729119638826185, "grad_norm": 14.111851692199707, "learning_rate": 6.831849810827247e-06, "lm_loss": 5.4677, "loss": 1.4165, "step": 874, "text_contrastive_loss": 0.8173, "train_positive_log_prob": -81.0533, "train_positive_token_accuracy": 0.0812, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.5937, "epoch": 1.9751693002257338, "grad_norm": 16.07346534729004, "learning_rate": 6.825096948322791e-06, "lm_loss": 5.4724, "loss": 1.6095, "step": 875, "text_contrastive_loss": 0.9372, "train_positive_log_prob": -82.13, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4975, "epoch": 1.9774266365688487, "grad_norm": 14.203269004821777, "learning_rate": 6.818340242823449e-06, "lm_loss": 5.5741, "loss": 1.433, "step": 876, "text_contrastive_loss": 0.756, "train_positive_log_prob": -82.7429, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4657, "epoch": 1.979683972911964, "grad_norm": 13.515305519104004, "learning_rate": 6.8115797085564e-06, "lm_loss": 5.5507, "loss": 1.3591, "step": 877, "text_contrastive_loss": 0.6766, "train_positive_log_prob": -81.7874, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4917, "epoch": 1.981941309255079, "grad_norm": 16.76006507873535, "learning_rate": 6.804815359756887e-06, "lm_loss": 5.6322, "loss": 1.472, "step": 878, "text_contrastive_loss": 0.8342, "train_positive_log_prob": -86.0254, "train_positive_token_accuracy": 0.0793, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.4057, "epoch": 1.9841986455981941, "grad_norm": 14.97724437713623, "learning_rate": 6.798047210668185e-06, "lm_loss": 5.638, "loss": 1.3498, "step": 879, "text_contrastive_loss": 0.7604, "train_positive_log_prob": -84.4639, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4114, "epoch": 1.9864559819413092, "grad_norm": 14.190703392028809, "learning_rate": 6.7912752755415716e-06, "lm_loss": 5.6413, "loss": 1.452, "step": 880, "text_contrastive_loss": 0.9531, "train_positive_log_prob": -83.3415, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.4907, "epoch": 1.9887133182844243, "grad_norm": 14.56006145477295, "learning_rate": 6.7844995686362955e-06, "lm_loss": 5.5309, "loss": 1.5096, "step": 881, "text_contrastive_loss": 0.9315, "train_positive_log_prob": -82.1201, "train_positive_token_accuracy": 0.0868, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.3811, "epoch": 1.9909706546275396, "grad_norm": 12.781140327453613, "learning_rate": 6.777720104219548e-06, "lm_loss": 5.4105, "loss": 1.3768, "step": 882, "text_contrastive_loss": 0.9093, "train_positive_log_prob": -81.5598, "train_positive_token_accuracy": 0.0886, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.55, "epoch": 1.9932279909706545, "grad_norm": 15.659453392028809, "learning_rate": 6.770936896566434e-06, "lm_loss": 5.5339, "loss": 1.5144, "step": 883, "text_contrastive_loss": 0.822, "train_positive_log_prob": -79.573, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.3921, "epoch": 1.9954853273137698, "grad_norm": 13.856972694396973, "learning_rate": 6.7641499599599355e-06, "lm_loss": 5.5311, "loss": 1.3099, "step": 884, "text_contrastive_loss": 0.7294, "train_positive_log_prob": -82.2734, "train_positive_token_accuracy": 0.0838, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.5687, "epoch": 1.997742663656885, "grad_norm": 15.394890785217285, "learning_rate": 6.757359308690889e-06, "lm_loss": 5.6018, "loss": 1.6017, "step": 885, "text_contrastive_loss": 0.9457, "train_positive_log_prob": -81.442, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.2688, "epoch": 2.0, "grad_norm": 14.641634941101074, "learning_rate": 6.750564957057958e-06, "lm_loss": 5.4777, "loss": 1.0655, "step": 886, "text_contrastive_loss": 0.4979, "train_positive_log_prob": -81.6036, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4136, "epoch": 2.0022573363431153, "grad_norm": 13.20592975616455, "learning_rate": 6.743766919367588e-06, "lm_loss": 5.6038, "loss": 1.3932, "step": 887, "text_contrastive_loss": 0.8384, "train_positive_log_prob": -84.9565, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.4362, "epoch": 2.00451467268623, "grad_norm": 12.936826705932617, "learning_rate": 6.736965209933992e-06, "lm_loss": 5.4725, "loss": 1.3581, "step": 888, "text_contrastive_loss": 0.7494, "train_positive_log_prob": -80.7032, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4231, "epoch": 2.0067720090293455, "grad_norm": 13.164968490600586, "learning_rate": 6.730159843079113e-06, "lm_loss": 5.5138, "loss": 1.4154, "step": 889, "text_contrastive_loss": 0.8817, "train_positive_log_prob": -82.879, "train_positive_token_accuracy": 0.0852, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.4867, "epoch": 2.0090293453724604, "grad_norm": 15.41411018371582, "learning_rate": 6.723350833132596e-06, "lm_loss": 5.4926, "loss": 1.4675, "step": 890, "text_contrastive_loss": 0.863, "train_positive_log_prob": -80.0519, "train_positive_token_accuracy": 0.0868, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.365, "epoch": 2.0112866817155757, "grad_norm": 12.457436561584473, "learning_rate": 6.716538194431754e-06, "lm_loss": 5.5439, "loss": 1.2308, "step": 891, "text_contrastive_loss": 0.6227, "train_positive_log_prob": -83.6553, "train_positive_token_accuracy": 0.0896, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.455, "epoch": 2.0135440180586905, "grad_norm": 14.338443756103516, "learning_rate": 6.7097219413215474e-06, "lm_loss": 5.6032, "loss": 1.4843, "step": 892, "text_contrastive_loss": 0.9379, "train_positive_log_prob": -83.8695, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.458, "epoch": 2.015801354401806, "grad_norm": 14.148242950439453, "learning_rate": 6.702902088154539e-06, "lm_loss": 5.6708, "loss": 1.4636, "step": 893, "text_contrastive_loss": 0.8769, "train_positive_log_prob": -83.5063, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4041, "epoch": 2.018058690744921, "grad_norm": 13.274083137512207, "learning_rate": 6.696078649290878e-06, "lm_loss": 5.5271, "loss": 1.32, "step": 894, "text_contrastive_loss": 0.7263, "train_positive_log_prob": -81.4611, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3685, "epoch": 2.020316027088036, "grad_norm": 14.36821460723877, "learning_rate": 6.689251639098261e-06, "lm_loss": 5.5055, "loss": 1.2879, "step": 895, "text_contrastive_loss": 0.7377, "train_positive_log_prob": -80.4559, "train_positive_token_accuracy": 0.0901, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.5273, "epoch": 2.0225733634311513, "grad_norm": 15.859416961669922, "learning_rate": 6.682421071951907e-06, "lm_loss": 5.6741, "loss": 1.5354, "step": 896, "text_contrastive_loss": 0.8815, "train_positive_log_prob": -85.2214, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.3596, "epoch": 2.024830699774266, "grad_norm": 10.912479400634766, "learning_rate": 6.67558696223452e-06, "lm_loss": 5.5394, "loss": 1.2973, "step": 897, "text_contrastive_loss": 0.7675, "train_positive_log_prob": -81.6643, "train_positive_token_accuracy": 0.071, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.4024, "epoch": 2.0270880361173815, "grad_norm": 14.293588638305664, "learning_rate": 6.668749324336268e-06, "lm_loss": 5.5751, "loss": 1.3196, "step": 898, "text_contrastive_loss": 0.7193, "train_positive_log_prob": -84.0258, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.4079, "epoch": 2.0293453724604964, "grad_norm": 14.883182525634766, "learning_rate": 6.661908172654746e-06, "lm_loss": 5.4958, "loss": 1.3871, "step": 899, "text_contrastive_loss": 0.8592, "train_positive_log_prob": -81.7061, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4327, "epoch": 2.0316027088036117, "grad_norm": 13.25998592376709, "learning_rate": 6.65506352159495e-06, "lm_loss": 5.5033, "loss": 1.4092, "step": 900, "text_contrastive_loss": 0.8522, "train_positive_log_prob": -81.9003, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4036, "epoch": 2.033860045146727, "grad_norm": 12.483712196350098, "learning_rate": 6.6482153855692395e-06, "lm_loss": 5.587, "loss": 1.3186, "step": 901, "text_contrastive_loss": 0.7127, "train_positive_log_prob": -82.5823, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4604, "epoch": 2.036117381489842, "grad_norm": 13.481544494628906, "learning_rate": 6.64136377899732e-06, "lm_loss": 5.4519, "loss": 1.3855, "step": 902, "text_contrastive_loss": 0.7599, "train_positive_log_prob": -80.2875, "train_positive_token_accuracy": 0.091, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.5341, "epoch": 2.038374717832957, "grad_norm": 15.048299789428711, "learning_rate": 6.6345087163061935e-06, "lm_loss": 5.5481, "loss": 1.5891, "step": 903, "text_contrastive_loss": 1.0005, "train_positive_log_prob": -81.3128, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.4192, "epoch": 2.040632054176072, "grad_norm": 12.896744728088379, "learning_rate": 6.627650211930152e-06, "lm_loss": 5.6139, "loss": 1.4559, "step": 904, "text_contrastive_loss": 0.9506, "train_positive_log_prob": -83.2017, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.4721, "epoch": 2.0428893905191874, "grad_norm": 13.316905975341797, "learning_rate": 6.620788280310722e-06, "lm_loss": 5.5356, "loss": 1.4551, "step": 905, "text_contrastive_loss": 0.859, "train_positive_log_prob": -82.946, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3342, "epoch": 2.0451467268623027, "grad_norm": 12.637482643127441, "learning_rate": 6.613922935896659e-06, "lm_loss": 5.5852, "loss": 1.2455, "step": 906, "text_contrastive_loss": 0.7056, "train_positive_log_prob": -82.7525, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4832, "epoch": 2.0474040632054176, "grad_norm": 15.180583000183105, "learning_rate": 6.607054193143894e-06, "lm_loss": 5.5181, "loss": 1.493, "step": 907, "text_contrastive_loss": 0.916, "train_positive_log_prob": -81.5231, "train_positive_token_accuracy": 0.0851, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3542, "epoch": 2.049661399548533, "grad_norm": 12.676249504089355, "learning_rate": 6.600182066515519e-06, "lm_loss": 5.5761, "loss": 1.2865, "step": 908, "text_contrastive_loss": 0.7494, "train_positive_log_prob": -81.6597, "train_positive_token_accuracy": 0.0814, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3492, "epoch": 2.0519187358916477, "grad_norm": 11.73995590209961, "learning_rate": 6.593306570481751e-06, "lm_loss": 5.5869, "loss": 1.2174, "step": 909, "text_contrastive_loss": 0.619, "train_positive_log_prob": -83.1531, "train_positive_token_accuracy": 0.0829, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.4948, "epoch": 2.054176072234763, "grad_norm": 14.678378105163574, "learning_rate": 6.586427719519901e-06, "lm_loss": 5.4833, "loss": 1.5127, "step": 910, "text_contrastive_loss": 0.9392, "train_positive_log_prob": -80.2189, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4893, "epoch": 2.056433408577878, "grad_norm": 14.416768074035645, "learning_rate": 6.579545528114344e-06, "lm_loss": 5.5788, "loss": 1.4894, "step": 911, "text_contrastive_loss": 0.8845, "train_positive_log_prob": -81.7893, "train_positive_token_accuracy": 0.0738, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4469, "epoch": 2.0586907449209932, "grad_norm": 14.04563045501709, "learning_rate": 6.572660010756489e-06, "lm_loss": 5.4231, "loss": 1.3099, "step": 912, "text_contrastive_loss": 0.6414, "train_positive_log_prob": -79.2252, "train_positive_token_accuracy": 0.0872, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3964, "epoch": 2.0609480812641086, "grad_norm": 12.90860366821289, "learning_rate": 6.565771181944747e-06, "lm_loss": 5.3843, "loss": 1.2803, "step": 913, "text_contrastive_loss": 0.691, "train_positive_log_prob": -77.2742, "train_positive_token_accuracy": 0.0852, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.4296, "epoch": 2.0632054176072234, "grad_norm": 14.181915283203125, "learning_rate": 6.558879056184505e-06, "lm_loss": 5.6611, "loss": 1.3225, "step": 914, "text_contrastive_loss": 0.6536, "train_positive_log_prob": -84.2789, "train_positive_token_accuracy": 0.0722, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4067, "epoch": 2.0654627539503387, "grad_norm": 13.280404090881348, "learning_rate": 6.551983647988089e-06, "lm_loss": 5.5431, "loss": 1.3324, "step": 915, "text_contrastive_loss": 0.7427, "train_positive_log_prob": -80.6588, "train_positive_token_accuracy": 0.0846, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.5256, "epoch": 2.0677200902934536, "grad_norm": 15.21223258972168, "learning_rate": 6.545084971874738e-06, "lm_loss": 5.5956, "loss": 1.6109, "step": 916, "text_contrastive_loss": 1.0513, "train_positive_log_prob": -81.8115, "train_positive_token_accuracy": 0.0847, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3383, "epoch": 2.069977426636569, "grad_norm": 12.661205291748047, "learning_rate": 6.5381830423705714e-06, "lm_loss": 5.5814, "loss": 1.2323, "step": 917, "text_contrastive_loss": 0.6718, "train_positive_log_prob": -82.2398, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4661, "epoch": 2.072234762979684, "grad_norm": 13.613990783691406, "learning_rate": 6.531277874008562e-06, "lm_loss": 5.4936, "loss": 1.5213, "step": 918, "text_contrastive_loss": 1.0117, "train_positive_log_prob": -81.3131, "train_positive_token_accuracy": 0.0866, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3477, "epoch": 2.074492099322799, "grad_norm": 14.345525741577148, "learning_rate": 6.5243694813284975e-06, "lm_loss": 5.7273, "loss": 1.2323, "step": 919, "text_contrastive_loss": 0.6239, "train_positive_log_prob": -85.0802, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.3837, "epoch": 2.0767494356659144, "grad_norm": 12.754055976867676, "learning_rate": 6.517457878876958e-06, "lm_loss": 5.619, "loss": 1.3939, "step": 920, "text_contrastive_loss": 0.8966, "train_positive_log_prob": -82.7717, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.3951, "epoch": 2.0790067720090293, "grad_norm": 13.097175598144531, "learning_rate": 6.510543081207281e-06, "lm_loss": 5.5249, "loss": 1.3679, "step": 921, "text_contrastive_loss": 0.8406, "train_positive_log_prob": -82.0078, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.5029, "epoch": 2.0812641083521446, "grad_norm": 16.092721939086914, "learning_rate": 6.503625102879534e-06, "lm_loss": 5.5724, "loss": 1.417, "step": 922, "text_contrastive_loss": 0.7137, "train_positive_log_prob": -83.2183, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3339, "epoch": 2.0835214446952595, "grad_norm": 13.593026161193848, "learning_rate": 6.496703958460479e-06, "lm_loss": 5.502, "loss": 1.2641, "step": 923, "text_contrastive_loss": 0.76, "train_positive_log_prob": -81.2405, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4719, "epoch": 2.0857787810383748, "grad_norm": 14.608567237854004, "learning_rate": 6.489779662523545e-06, "lm_loss": 5.6129, "loss": 1.4514, "step": 924, "text_contrastive_loss": 0.8363, "train_positive_log_prob": -84.1724, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.4405, "epoch": 2.0880361173814896, "grad_norm": 13.26819133758545, "learning_rate": 6.4828522296488014e-06, "lm_loss": 5.5443, "loss": 1.5133, "step": 925, "text_contrastive_loss": 1.0368, "train_positive_log_prob": -82.1472, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4492, "epoch": 2.090293453724605, "grad_norm": 14.2427339553833, "learning_rate": 6.475921674422917e-06, "lm_loss": 5.473, "loss": 1.4559, "step": 926, "text_contrastive_loss": 0.9186, "train_positive_log_prob": -80.4308, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.2978, "epoch": 2.0925507900677203, "grad_norm": 10.867948532104492, "learning_rate": 6.4689880114391375e-06, "lm_loss": 5.5041, "loss": 1.1965, "step": 927, "text_contrastive_loss": 0.6966, "train_positive_log_prob": -81.1004, "train_positive_token_accuracy": 0.0756, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4376, "epoch": 2.094808126410835, "grad_norm": 15.287556648254395, "learning_rate": 6.462051255297255e-06, "lm_loss": 5.6055, "loss": 1.4279, "step": 928, "text_contrastive_loss": 0.8596, "train_positive_log_prob": -84.4968, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.3481, "epoch": 2.0970654627539504, "grad_norm": 11.80434799194336, "learning_rate": 6.455111420603568e-06, "lm_loss": 5.5582, "loss": 1.3113, "step": 929, "text_contrastive_loss": 0.8148, "train_positive_log_prob": -81.5301, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.3704, "epoch": 2.0993227990970653, "grad_norm": 14.718912124633789, "learning_rate": 6.448168521970865e-06, "lm_loss": 5.5771, "loss": 1.2943, "step": 930, "text_contrastive_loss": 0.7324, "train_positive_log_prob": -81.2363, "train_positive_token_accuracy": 0.074, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.4146, "epoch": 2.1015801354401806, "grad_norm": 13.320003509521484, "learning_rate": 6.441222574018378e-06, "lm_loss": 5.5359, "loss": 1.3511, "step": 931, "text_contrastive_loss": 0.7659, "train_positive_log_prob": -81.8475, "train_positive_token_accuracy": 0.0729, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.4271, "epoch": 2.1038374717832955, "grad_norm": 15.628487586975098, "learning_rate": 6.434273591371771e-06, "lm_loss": 5.6005, "loss": 1.4878, "step": 932, "text_contrastive_loss": 1.0014, "train_positive_log_prob": -85.2814, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3987, "epoch": 2.106094808126411, "grad_norm": 13.400457382202148, "learning_rate": 6.427321588663085e-06, "lm_loss": 5.5665, "loss": 1.339, "step": 933, "text_contrastive_loss": 0.7674, "train_positive_log_prob": -83.3696, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4431, "epoch": 2.108352144469526, "grad_norm": 13.66365909576416, "learning_rate": 6.42036658053073e-06, "lm_loss": 5.6296, "loss": 1.3941, "step": 934, "text_contrastive_loss": 0.7761, "train_positive_log_prob": -84.5974, "train_positive_token_accuracy": 0.0756, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4123, "epoch": 2.110609480812641, "grad_norm": 13.463959693908691, "learning_rate": 6.41340858161944e-06, "lm_loss": 5.4605, "loss": 1.3361, "step": 935, "text_contrastive_loss": 0.7553, "train_positive_log_prob": -80.5011, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.4612, "epoch": 2.1128668171557563, "grad_norm": 14.150492668151855, "learning_rate": 6.406447606580248e-06, "lm_loss": 5.58, "loss": 1.4585, "step": 936, "text_contrastive_loss": 0.8786, "train_positive_log_prob": -83.7241, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3639, "epoch": 2.115124153498871, "grad_norm": 13.390018463134766, "learning_rate": 6.399483670070451e-06, "lm_loss": 5.5468, "loss": 1.2795, "step": 937, "text_contrastive_loss": 0.7219, "train_positive_log_prob": -83.3512, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.3514, "epoch": 2.1173814898419865, "grad_norm": 13.621367454528809, "learning_rate": 6.392516786753586e-06, "lm_loss": 5.6154, "loss": 1.2941, "step": 938, "text_contrastive_loss": 0.7623, "train_positive_log_prob": -83.423, "train_positive_token_accuracy": 0.0678, "train_positive_token_prob": 0.0279 }, { "contrastive_loss": 0.4295, "epoch": 2.119638826185102, "grad_norm": 14.347768783569336, "learning_rate": 6.385546971299389e-06, "lm_loss": 5.491, "loss": 1.3672, "step": 939, "text_contrastive_loss": 0.7772, "train_positive_log_prob": -79.6387, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3662, "epoch": 2.1218961625282167, "grad_norm": 11.981817245483398, "learning_rate": 6.378574238383776e-06, "lm_loss": 5.3599, "loss": 1.2613, "step": 940, "text_contrastive_loss": 0.7182, "train_positive_log_prob": -79.4741, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3346, "epoch": 2.124153498871332, "grad_norm": 12.558072090148926, "learning_rate": 6.3715986026888046e-06, "lm_loss": 5.6151, "loss": 1.2974, "step": 941, "text_contrastive_loss": 0.8026, "train_positive_log_prob": -83.5464, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4367, "epoch": 2.126410835214447, "grad_norm": 13.51561450958252, "learning_rate": 6.3646200789026426e-06, "lm_loss": 5.5248, "loss": 1.422, "step": 942, "text_contrastive_loss": 0.8657, "train_positive_log_prob": -81.407, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4196, "epoch": 2.128668171557562, "grad_norm": 12.983024597167969, "learning_rate": 6.35763868171954e-06, "lm_loss": 5.5869, "loss": 1.401, "step": 943, "text_contrastive_loss": 0.8454, "train_positive_log_prob": -79.9807, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3939, "epoch": 2.130925507900677, "grad_norm": 12.527701377868652, "learning_rate": 6.350654425839799e-06, "lm_loss": 5.5153, "loss": 1.398, "step": 944, "text_contrastive_loss": 0.9052, "train_positive_log_prob": -81.3503, "train_positive_token_accuracy": 0.089, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.4015, "epoch": 2.1331828442437923, "grad_norm": 14.444681167602539, "learning_rate": 6.343667325969736e-06, "lm_loss": 5.5525, "loss": 1.416, "step": 945, "text_contrastive_loss": 0.9185, "train_positive_log_prob": -82.01, "train_positive_token_accuracy": 0.0727, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.439, "epoch": 2.1354401805869077, "grad_norm": 12.963115692138672, "learning_rate": 6.336677396821663e-06, "lm_loss": 5.4959, "loss": 1.4531, "step": 946, "text_contrastive_loss": 0.9291, "train_positive_log_prob": -80.2713, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3516, "epoch": 2.1376975169300225, "grad_norm": 12.958985328674316, "learning_rate": 6.3296846531138445e-06, "lm_loss": 5.6031, "loss": 1.3727, "step": 947, "text_contrastive_loss": 0.9215, "train_positive_log_prob": -82.3616, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4498, "epoch": 2.139954853273138, "grad_norm": 12.821478843688965, "learning_rate": 6.322689109570472e-06, "lm_loss": 5.6741, "loss": 1.4163, "step": 948, "text_contrastive_loss": 0.7982, "train_positive_log_prob": -85.5767, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.473, "epoch": 2.1422121896162527, "grad_norm": 13.26208782196045, "learning_rate": 6.315690780921634e-06, "lm_loss": 5.5058, "loss": 1.4371, "step": 949, "text_contrastive_loss": 0.8271, "train_positive_log_prob": -81.7886, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.3098, "epoch": 2.144469525959368, "grad_norm": 12.116291046142578, "learning_rate": 6.3086896819032814e-06, "lm_loss": 5.6079, "loss": 1.2509, "step": 950, "text_contrastive_loss": 0.7606, "train_positive_log_prob": -84.4703, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4076, "epoch": 2.146726862302483, "grad_norm": 14.053050994873047, "learning_rate": 6.301685827257202e-06, "lm_loss": 5.5962, "loss": 1.4516, "step": 951, "text_contrastive_loss": 0.9688, "train_positive_log_prob": -82.8739, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.4269, "epoch": 2.148984198645598, "grad_norm": 13.233821868896484, "learning_rate": 6.294679231730983e-06, "lm_loss": 5.5696, "loss": 1.4272, "step": 952, "text_contrastive_loss": 0.8866, "train_positive_log_prob": -83.3044, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4512, "epoch": 2.1512415349887135, "grad_norm": 15.380033493041992, "learning_rate": 6.2876699100779815e-06, "lm_loss": 5.5953, "loss": 1.4521, "step": 953, "text_contrastive_loss": 0.8827, "train_positive_log_prob": -83.3358, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3405, "epoch": 2.1534988713318284, "grad_norm": 11.722415924072266, "learning_rate": 6.2806578770573e-06, "lm_loss": 5.4673, "loss": 1.2318, "step": 954, "text_contrastive_loss": 0.6891, "train_positive_log_prob": -81.4551, "train_positive_token_accuracy": 0.0742, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3513, "epoch": 2.1557562076749437, "grad_norm": 12.42605972290039, "learning_rate": 6.273643147433743e-06, "lm_loss": 5.4634, "loss": 1.2589, "step": 955, "text_contrastive_loss": 0.7226, "train_positive_log_prob": -78.9663, "train_positive_token_accuracy": 0.0854, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3209, "epoch": 2.1580135440180586, "grad_norm": 11.624284744262695, "learning_rate": 6.266625735977802e-06, "lm_loss": 5.5567, "loss": 1.1859, "step": 956, "text_contrastive_loss": 0.6185, "train_positive_log_prob": -83.1381, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.5073, "epoch": 2.160270880361174, "grad_norm": 15.735133171081543, "learning_rate": 6.259605657465607e-06, "lm_loss": 5.5571, "loss": 1.491, "step": 957, "text_contrastive_loss": 0.856, "train_positive_log_prob": -81.992, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.3826, "epoch": 2.1625282167042887, "grad_norm": 12.599930763244629, "learning_rate": 6.252582926678908e-06, "lm_loss": 5.5687, "loss": 1.3872, "step": 958, "text_contrastive_loss": 0.8956, "train_positive_log_prob": -84.5738, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3622, "epoch": 2.164785553047404, "grad_norm": 13.990739822387695, "learning_rate": 6.24555755840504e-06, "lm_loss": 5.6309, "loss": 1.2896, "step": 959, "text_contrastive_loss": 0.7287, "train_positive_log_prob": -83.6268, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.3748, "epoch": 2.1670428893905194, "grad_norm": 12.508328437805176, "learning_rate": 6.238529567436892e-06, "lm_loss": 5.5369, "loss": 1.2588, "step": 960, "text_contrastive_loss": 0.6607, "train_positive_log_prob": -82.728, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.4102, "epoch": 2.1693002257336342, "grad_norm": 14.44834041595459, "learning_rate": 6.231498968572872e-06, "lm_loss": 5.5447, "loss": 1.3961, "step": 961, "text_contrastive_loss": 0.8627, "train_positive_log_prob": -84.1846, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4039, "epoch": 2.1715575620767495, "grad_norm": 12.841310501098633, "learning_rate": 6.224465776616883e-06, "lm_loss": 5.5862, "loss": 1.312, "step": 962, "text_contrastive_loss": 0.699, "train_positive_log_prob": -83.576, "train_positive_token_accuracy": 0.0747, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.413, "epoch": 2.1738148984198644, "grad_norm": 12.299847602844238, "learning_rate": 6.217430006378285e-06, "lm_loss": 5.6728, "loss": 1.3981, "step": 963, "text_contrastive_loss": 0.8357, "train_positive_log_prob": -83.8058, "train_positive_token_accuracy": 0.0793, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.418, "epoch": 2.1760722347629797, "grad_norm": 14.192880630493164, "learning_rate": 6.210391672671869e-06, "lm_loss": 5.5752, "loss": 1.2848, "step": 964, "text_contrastive_loss": 0.6185, "train_positive_log_prob": -80.6874, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3756, "epoch": 2.1783295711060946, "grad_norm": 13.454263687133789, "learning_rate": 6.203350790317825e-06, "lm_loss": 5.5448, "loss": 1.361, "step": 965, "text_contrastive_loss": 0.8617, "train_positive_log_prob": -80.205, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4535, "epoch": 2.18058690744921, "grad_norm": 13.363504409790039, "learning_rate": 6.196307374141707e-06, "lm_loss": 5.5796, "loss": 1.4404, "step": 966, "text_contrastive_loss": 0.858, "train_positive_log_prob": -84.37, "train_positive_token_accuracy": 0.0741, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4532, "epoch": 2.1828442437923252, "grad_norm": 15.592851638793945, "learning_rate": 6.189261438974403e-06, "lm_loss": 5.6168, "loss": 1.4715, "step": 967, "text_contrastive_loss": 0.9132, "train_positive_log_prob": -82.6105, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4228, "epoch": 2.18510158013544, "grad_norm": 13.822402000427246, "learning_rate": 6.1822129996521105e-06, "lm_loss": 5.679, "loss": 1.4811, "step": 968, "text_contrastive_loss": 0.9808, "train_positive_log_prob": -82.7222, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3349, "epoch": 2.1873589164785554, "grad_norm": 12.573756217956543, "learning_rate": 6.175162071016295e-06, "lm_loss": 5.5118, "loss": 1.3064, "step": 969, "text_contrastive_loss": 0.8408, "train_positive_log_prob": -80.7816, "train_positive_token_accuracy": 0.0717, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.4184, "epoch": 2.1896162528216703, "grad_norm": 14.005714416503906, "learning_rate": 6.168108667913666e-06, "lm_loss": 5.5475, "loss": 1.4001, "step": 970, "text_contrastive_loss": 0.8538, "train_positive_log_prob": -82.1283, "train_positive_token_accuracy": 0.0736, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3898, "epoch": 2.1918735891647856, "grad_norm": 12.450246810913086, "learning_rate": 6.161052805196141e-06, "lm_loss": 5.5428, "loss": 1.3176, "step": 971, "text_contrastive_loss": 0.7471, "train_positive_log_prob": -82.1434, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3987, "epoch": 2.194130925507901, "grad_norm": 13.179352760314941, "learning_rate": 6.15399449772082e-06, "lm_loss": 5.5757, "loss": 1.283, "step": 972, "text_contrastive_loss": 0.6534, "train_positive_log_prob": -83.6822, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.2804, "epoch": 2.1963882618510158, "grad_norm": 11.19333267211914, "learning_rate": 6.146933760349947e-06, "lm_loss": 5.6084, "loss": 1.1683, "step": 973, "text_contrastive_loss": 0.654, "train_positive_log_prob": -84.9345, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.4641, "epoch": 2.198645598194131, "grad_norm": 13.623973846435547, "learning_rate": 6.139870607950885e-06, "lm_loss": 5.5269, "loss": 1.4512, "step": 974, "text_contrastive_loss": 0.8689, "train_positive_log_prob": -81.4148, "train_positive_token_accuracy": 0.0711, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3802, "epoch": 2.200902934537246, "grad_norm": 11.477293014526367, "learning_rate": 6.1328050553960804e-06, "lm_loss": 5.5935, "loss": 1.2467, "step": 975, "text_contrastive_loss": 0.6142, "train_positive_log_prob": -82.6155, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4762, "epoch": 2.2031602708803613, "grad_norm": 14.226114273071289, "learning_rate": 6.1257371175630375e-06, "lm_loss": 5.4978, "loss": 1.4596, "step": 976, "text_contrastive_loss": 0.8673, "train_positive_log_prob": -81.0309, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.4986, "epoch": 2.205417607223476, "grad_norm": 14.475671768188477, "learning_rate": 6.118666809334277e-06, "lm_loss": 5.582, "loss": 1.5774, "step": 977, "text_contrastive_loss": 1.0414, "train_positive_log_prob": -82.0069, "train_positive_token_accuracy": 0.0744, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4309, "epoch": 2.2076749435665914, "grad_norm": 13.799911499023438, "learning_rate": 6.111594145597319e-06, "lm_loss": 5.4693, "loss": 1.3871, "step": 978, "text_contrastive_loss": 0.8186, "train_positive_log_prob": -80.9287, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.4412, "epoch": 2.2099322799097068, "grad_norm": 13.247078895568848, "learning_rate": 6.104519141244631e-06, "lm_loss": 5.4917, "loss": 1.395, "step": 979, "text_contrastive_loss": 0.8093, "train_positive_log_prob": -80.4919, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.283, "epoch": 2.2121896162528216, "grad_norm": 11.904372215270996, "learning_rate": 6.0974418111736235e-06, "lm_loss": 5.6329, "loss": 1.1767, "step": 980, "text_contrastive_loss": 0.6608, "train_positive_log_prob": -84.5426, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4525, "epoch": 2.214446952595937, "grad_norm": 14.136039733886719, "learning_rate": 6.090362170286591e-06, "lm_loss": 5.505, "loss": 1.3167, "step": 981, "text_contrastive_loss": 0.6275, "train_positive_log_prob": -81.738, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.4498, "epoch": 2.216704288939052, "grad_norm": 14.322342872619629, "learning_rate": 6.0832802334907044e-06, "lm_loss": 5.5289, "loss": 1.3687, "step": 982, "text_contrastive_loss": 0.7321, "train_positive_log_prob": -83.043, "train_positive_token_accuracy": 0.0861, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3964, "epoch": 2.218961625282167, "grad_norm": 13.998641967773438, "learning_rate": 6.076196015697963e-06, "lm_loss": 5.5985, "loss": 1.3455, "step": 983, "text_contrastive_loss": 0.7786, "train_positive_log_prob": -85.139, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3999, "epoch": 2.221218961625282, "grad_norm": 16.890783309936523, "learning_rate": 6.069109531825169e-06, "lm_loss": 5.5384, "loss": 1.3413, "step": 984, "text_contrastive_loss": 0.7753, "train_positive_log_prob": -80.1608, "train_positive_token_accuracy": 0.0726, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4184, "epoch": 2.2234762979683973, "grad_norm": 14.210457801818848, "learning_rate": 6.0620207967939e-06, "lm_loss": 5.6143, "loss": 1.4467, "step": 985, "text_contrastive_loss": 0.9337, "train_positive_log_prob": -82.2629, "train_positive_token_accuracy": 0.0698, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.4921, "epoch": 2.2257336343115126, "grad_norm": 14.350348472595215, "learning_rate": 6.054929825530469e-06, "lm_loss": 5.5366, "loss": 1.4817, "step": 986, "text_contrastive_loss": 0.8719, "train_positive_log_prob": -82.1436, "train_positive_token_accuracy": 0.0761, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.2638, "epoch": 2.2279909706546275, "grad_norm": 11.467473030090332, "learning_rate": 6.047836632965901e-06, "lm_loss": 5.6007, "loss": 1.1329, "step": 987, "text_contrastive_loss": 0.6181, "train_positive_log_prob": -82.5658, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4258, "epoch": 2.230248306997743, "grad_norm": 12.957710266113281, "learning_rate": 6.040741234035898e-06, "lm_loss": 5.5427, "loss": 1.3607, "step": 988, "text_contrastive_loss": 0.7613, "train_positive_log_prob": -81.516, "train_positive_token_accuracy": 0.0752, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.4547, "epoch": 2.2325056433408577, "grad_norm": 14.008922576904297, "learning_rate": 6.0336436436808054e-06, "lm_loss": 5.5997, "loss": 1.5015, "step": 989, "text_contrastive_loss": 0.9736, "train_positive_log_prob": -82.5375, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.3921, "epoch": 2.234762979683973, "grad_norm": 13.679863929748535, "learning_rate": 6.026543876845586e-06, "lm_loss": 5.5863, "loss": 1.3665, "step": 990, "text_contrastive_loss": 0.8316, "train_positive_log_prob": -83.5533, "train_positive_token_accuracy": 0.0867, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.3997, "epoch": 2.237020316027088, "grad_norm": 14.333312034606934, "learning_rate": 6.019441948479784e-06, "lm_loss": 5.4539, "loss": 1.3825, "step": 991, "text_contrastive_loss": 0.8748, "train_positive_log_prob": -80.6994, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4194, "epoch": 2.239277652370203, "grad_norm": 13.665488243103027, "learning_rate": 6.012337873537494e-06, "lm_loss": 5.5423, "loss": 1.3489, "step": 992, "text_contrastive_loss": 0.7507, "train_positive_log_prob": -83.0468, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.3387, "epoch": 2.2415349887133185, "grad_norm": 12.015118598937988, "learning_rate": 6.005231666977331e-06, "lm_loss": 5.4918, "loss": 1.2651, "step": 993, "text_contrastive_loss": 0.7545, "train_positive_log_prob": -81.261, "train_positive_token_accuracy": 0.0837, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3605, "epoch": 2.2437923250564333, "grad_norm": 12.484308242797852, "learning_rate": 5.998123343762403e-06, "lm_loss": 5.5407, "loss": 1.3577, "step": 994, "text_contrastive_loss": 0.8864, "train_positive_log_prob": -81.2947, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.328, "epoch": 2.2460496613995486, "grad_norm": 12.022150039672852, "learning_rate": 5.9910129188602665e-06, "lm_loss": 5.6345, "loss": 1.321, "step": 995, "text_contrastive_loss": 0.8593, "train_positive_log_prob": -83.0857, "train_positive_token_accuracy": 0.0731, "train_positive_token_prob": 0.0284 }, { "contrastive_loss": 0.3676, "epoch": 2.2483069977426635, "grad_norm": 13.612593650817871, "learning_rate": 5.983900407242911e-06, "lm_loss": 5.5751, "loss": 1.332, "step": 996, "text_contrastive_loss": 0.8138, "train_positive_log_prob": -82.7977, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3833, "epoch": 2.250564334085779, "grad_norm": 13.808063507080078, "learning_rate": 5.976785823886713e-06, "lm_loss": 5.6367, "loss": 1.3317, "step": 997, "text_contrastive_loss": 0.7696, "train_positive_log_prob": -83.0521, "train_positive_token_accuracy": 0.071, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.3783, "epoch": 2.2528216704288937, "grad_norm": 12.80401611328125, "learning_rate": 5.96966918377242e-06, "lm_loss": 5.46, "loss": 1.3027, "step": 998, "text_contrastive_loss": 0.7569, "train_positive_log_prob": -82.0075, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3602, "epoch": 2.255079006772009, "grad_norm": 13.811467170715332, "learning_rate": 5.9625505018851e-06, "lm_loss": 5.4395, "loss": 1.3155, "step": 999, "text_contrastive_loss": 0.8227, "train_positive_log_prob": -82.1437, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.3746, "epoch": 2.2573363431151243, "grad_norm": 13.968853950500488, "learning_rate": 5.955429793214129e-06, "lm_loss": 5.6201, "loss": 1.2878, "step": 1000, "text_contrastive_loss": 0.7023, "train_positive_log_prob": -82.3269, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4229, "epoch": 2.259593679458239, "grad_norm": 14.745325088500977, "learning_rate": 5.948307072753146e-06, "lm_loss": 5.5213, "loss": 1.3778, "step": 1001, "text_contrastive_loss": 0.8054, "train_positive_log_prob": -81.3077, "train_positive_token_accuracy": 0.0724, "train_positive_token_prob": 0.0288 }, { "contrastive_loss": 0.3283, "epoch": 2.2618510158013545, "grad_norm": 14.804535865783691, "learning_rate": 5.941182355500028e-06, "lm_loss": 5.4092, "loss": 1.2485, "step": 1002, "text_contrastive_loss": 0.7585, "train_positive_log_prob": -80.8454, "train_positive_token_accuracy": 0.0847, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3974, "epoch": 2.2641083521444694, "grad_norm": 13.344841957092285, "learning_rate": 5.934055656456855e-06, "lm_loss": 5.3794, "loss": 1.2822, "step": 1003, "text_contrastive_loss": 0.6938, "train_positive_log_prob": -79.8066, "train_positive_token_accuracy": 0.0887, "train_positive_token_prob": 0.0336 }, { "contrastive_loss": 0.4088, "epoch": 2.2663656884875847, "grad_norm": 12.040769577026367, "learning_rate": 5.926926990629883e-06, "lm_loss": 5.442, "loss": 1.3089, "step": 1004, "text_contrastive_loss": 0.7117, "train_positive_log_prob": -79.3547, "train_positive_token_accuracy": 0.0838, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.4572, "epoch": 2.2686230248307, "grad_norm": 13.579639434814453, "learning_rate": 5.919796373029504e-06, "lm_loss": 5.5397, "loss": 1.3674, "step": 1005, "text_contrastive_loss": 0.7125, "train_positive_log_prob": -82.7463, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3127, "epoch": 2.270880361173815, "grad_norm": 12.374353408813477, "learning_rate": 5.912663818670224e-06, "lm_loss": 5.5049, "loss": 1.2269, "step": 1006, "text_contrastive_loss": 0.7274, "train_positive_log_prob": -81.8819, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3436, "epoch": 2.27313769751693, "grad_norm": 12.528451919555664, "learning_rate": 5.905529342570627e-06, "lm_loss": 5.5235, "loss": 1.2804, "step": 1007, "text_contrastive_loss": 0.7689, "train_positive_log_prob": -80.7811, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4227, "epoch": 2.275395033860045, "grad_norm": 16.113021850585938, "learning_rate": 5.898392959753343e-06, "lm_loss": 5.4232, "loss": 1.3767, "step": 1008, "text_contrastive_loss": 0.8232, "train_positive_log_prob": -80.6083, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.405, "epoch": 2.2776523702031604, "grad_norm": 13.573700904846191, "learning_rate": 5.8912546852450116e-06, "lm_loss": 5.5703, "loss": 1.3367, "step": 1009, "text_contrastive_loss": 0.7493, "train_positive_log_prob": -82.5105, "train_positive_token_accuracy": 0.0694, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.3962, "epoch": 2.2799097065462752, "grad_norm": 13.823863983154297, "learning_rate": 5.8841145340762665e-06, "lm_loss": 5.496, "loss": 1.3336, "step": 1010, "text_contrastive_loss": 0.7756, "train_positive_log_prob": -81.0228, "train_positive_token_accuracy": 0.0829, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3785, "epoch": 2.2821670428893905, "grad_norm": 13.912785530090332, "learning_rate": 5.876972521281683e-06, "lm_loss": 5.5152, "loss": 1.2976, "step": 1011, "text_contrastive_loss": 0.7351, "train_positive_log_prob": -81.6721, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3995, "epoch": 2.2844243792325054, "grad_norm": 14.400925636291504, "learning_rate": 5.869828661899761e-06, "lm_loss": 5.5173, "loss": 1.4048, "step": 1012, "text_contrastive_loss": 0.9072, "train_positive_log_prob": -79.5032, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4157, "epoch": 2.2866817155756207, "grad_norm": 14.685125350952148, "learning_rate": 5.862682970972888e-06, "lm_loss": 5.4966, "loss": 1.3343, "step": 1013, "text_contrastive_loss": 0.7379, "train_positive_log_prob": -80.7977, "train_positive_token_accuracy": 0.0874, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4512, "epoch": 2.288939051918736, "grad_norm": 14.808658599853516, "learning_rate": 5.855535463547309e-06, "lm_loss": 5.4907, "loss": 1.3872, "step": 1014, "text_contrastive_loss": 0.7739, "train_positive_log_prob": -80.4921, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.385, "epoch": 2.291196388261851, "grad_norm": 13.9261474609375, "learning_rate": 5.8483861546730915e-06, "lm_loss": 5.6069, "loss": 1.2924, "step": 1015, "text_contrastive_loss": 0.6934, "train_positive_log_prob": -82.5283, "train_positive_token_accuracy": 0.0719, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4113, "epoch": 2.293453724604966, "grad_norm": 14.202028274536133, "learning_rate": 5.841235059404097e-06, "lm_loss": 5.418, "loss": 1.4645, "step": 1016, "text_contrastive_loss": 1.0228, "train_positive_log_prob": -80.362, "train_positive_token_accuracy": 0.0877, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.4061, "epoch": 2.295711060948081, "grad_norm": 13.882689476013184, "learning_rate": 5.834082192797948e-06, "lm_loss": 5.4877, "loss": 1.342, "step": 1017, "text_contrastive_loss": 0.7742, "train_positive_log_prob": -79.5026, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3712, "epoch": 2.2979683972911964, "grad_norm": 13.90723991394043, "learning_rate": 5.826927569915999e-06, "lm_loss": 5.7271, "loss": 1.3197, "step": 1018, "text_contrastive_loss": 0.7516, "train_positive_log_prob": -83.911, "train_positive_token_accuracy": 0.0725, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4399, "epoch": 2.3002257336343117, "grad_norm": 14.244397163391113, "learning_rate": 5.819771205823303e-06, "lm_loss": 5.6209, "loss": 1.3774, "step": 1019, "text_contrastive_loss": 0.7507, "train_positive_log_prob": -83.5902, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4219, "epoch": 2.3024830699774266, "grad_norm": 14.064270973205566, "learning_rate": 5.812613115588575e-06, "lm_loss": 5.4932, "loss": 1.3646, "step": 1020, "text_contrastive_loss": 0.7868, "train_positive_log_prob": -79.76, "train_positive_token_accuracy": 0.0729, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3629, "epoch": 2.304740406320542, "grad_norm": 13.734200477600098, "learning_rate": 5.805453314284168e-06, "lm_loss": 5.5866, "loss": 1.2901, "step": 1021, "text_contrastive_loss": 0.7371, "train_positive_log_prob": -83.4928, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3651, "epoch": 2.3069977426636568, "grad_norm": 14.739347457885742, "learning_rate": 5.7982918169860395e-06, "lm_loss": 5.507, "loss": 1.3218, "step": 1022, "text_contrastive_loss": 0.8121, "train_positive_log_prob": -81.3155, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3772, "epoch": 2.309255079006772, "grad_norm": 14.899076461791992, "learning_rate": 5.791128638773711e-06, "lm_loss": 5.5482, "loss": 1.3346, "step": 1023, "text_contrastive_loss": 0.8051, "train_positive_log_prob": -81.4921, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3936, "epoch": 2.311512415349887, "grad_norm": 12.669140815734863, "learning_rate": 5.783963794730254e-06, "lm_loss": 5.5638, "loss": 1.3333, "step": 1024, "text_contrastive_loss": 0.7665, "train_positive_log_prob": -82.1736, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3984, "epoch": 2.3137697516930023, "grad_norm": 13.517724990844727, "learning_rate": 5.776797299942236e-06, "lm_loss": 5.6272, "loss": 1.3307, "step": 1025, "text_contrastive_loss": 0.7392, "train_positive_log_prob": -83.9292, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.2764, "epoch": 2.3160270880361176, "grad_norm": 11.722050666809082, "learning_rate": 5.7696291694997105e-06, "lm_loss": 5.6105, "loss": 1.2179, "step": 1026, "text_contrastive_loss": 0.761, "train_positive_log_prob": -84.001, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.4774, "epoch": 2.3182844243792324, "grad_norm": 14.694503784179688, "learning_rate": 5.762459418496169e-06, "lm_loss": 5.4532, "loss": 1.4874, "step": 1027, "text_contrastive_loss": 0.9293, "train_positive_log_prob": -79.5419, "train_positive_token_accuracy": 0.0846, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.4467, "epoch": 2.3205417607223477, "grad_norm": 13.764412879943848, "learning_rate": 5.755288062028519e-06, "lm_loss": 5.4687, "loss": 1.398, "step": 1028, "text_contrastive_loss": 0.8088, "train_positive_log_prob": -80.6833, "train_positive_token_accuracy": 0.0736, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3892, "epoch": 2.3227990970654626, "grad_norm": 13.268473625183105, "learning_rate": 5.748115115197045e-06, "lm_loss": 5.5849, "loss": 1.335, "step": 1029, "text_contrastive_loss": 0.7746, "train_positive_log_prob": -81.4176, "train_positive_token_accuracy": 0.074, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.3528, "epoch": 2.325056433408578, "grad_norm": 13.001954078674316, "learning_rate": 5.740940593105383e-06, "lm_loss": 5.5094, "loss": 1.2074, "step": 1030, "text_contrastive_loss": 0.6074, "train_positive_log_prob": -81.6473, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4495, "epoch": 2.327313769751693, "grad_norm": 14.6458158493042, "learning_rate": 5.733764510860482e-06, "lm_loss": 5.4524, "loss": 1.3742, "step": 1031, "text_contrastive_loss": 0.759, "train_positive_log_prob": -79.1298, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3712, "epoch": 2.329571106094808, "grad_norm": 13.251885414123535, "learning_rate": 5.726586883572584e-06, "lm_loss": 5.4817, "loss": 1.3233, "step": 1032, "text_contrastive_loss": 0.808, "train_positive_log_prob": -82.1205, "train_positive_token_accuracy": 0.073, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.4568, "epoch": 2.3318284424379234, "grad_norm": 14.308194160461426, "learning_rate": 5.719407726355174e-06, "lm_loss": 5.5153, "loss": 1.424, "step": 1033, "text_contrastive_loss": 0.8315, "train_positive_log_prob": -82.2517, "train_positive_token_accuracy": 0.0752, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.4328, "epoch": 2.3340857787810383, "grad_norm": 14.968981742858887, "learning_rate": 5.712227054324968e-06, "lm_loss": 5.6012, "loss": 1.4212, "step": 1034, "text_contrastive_loss": 0.8564, "train_positive_log_prob": -83.9986, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.4197, "epoch": 2.3363431151241536, "grad_norm": 14.584522247314453, "learning_rate": 5.705044882601862e-06, "lm_loss": 5.5542, "loss": 1.4009, "step": 1035, "text_contrastive_loss": 0.8517, "train_positive_log_prob": -83.5942, "train_positive_token_accuracy": 0.0742, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4648, "epoch": 2.3386004514672685, "grad_norm": 14.75509262084961, "learning_rate": 5.697861226308923e-06, "lm_loss": 5.5775, "loss": 1.4913, "step": 1036, "text_contrastive_loss": 0.9375, "train_positive_log_prob": -84.9494, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4331, "epoch": 2.340857787810384, "grad_norm": 14.654216766357422, "learning_rate": 5.69067610057233e-06, "lm_loss": 5.4593, "loss": 1.3833, "step": 1037, "text_contrastive_loss": 0.8085, "train_positive_log_prob": -83.1031, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3848, "epoch": 2.343115124153499, "grad_norm": 12.451683044433594, "learning_rate": 5.683489520521365e-06, "lm_loss": 5.5258, "loss": 1.3609, "step": 1038, "text_contrastive_loss": 0.8471, "train_positive_log_prob": -81.2904, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.4369, "epoch": 2.345372460496614, "grad_norm": 16.772539138793945, "learning_rate": 5.6763015012883686e-06, "lm_loss": 5.4578, "loss": 1.3977, "step": 1039, "text_contrastive_loss": 0.8301, "train_positive_log_prob": -81.2897, "train_positive_token_accuracy": 0.0719, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3251, "epoch": 2.3476297968397293, "grad_norm": 11.789501190185547, "learning_rate": 5.6691120580087126e-06, "lm_loss": 5.5235, "loss": 1.206, "step": 1040, "text_contrastive_loss": 0.6571, "train_positive_log_prob": -79.0103, "train_positive_token_accuracy": 0.0841, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3716, "epoch": 2.349887133182844, "grad_norm": 12.103446960449219, "learning_rate": 5.661921205820767e-06, "lm_loss": 5.4876, "loss": 1.279, "step": 1041, "text_contrastive_loss": 0.7173, "train_positive_log_prob": -80.6443, "train_positive_token_accuracy": 0.0829, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.5042, "epoch": 2.3521444695259595, "grad_norm": 15.723865509033203, "learning_rate": 5.654728959865872e-06, "lm_loss": 5.5846, "loss": 1.4548, "step": 1042, "text_contrastive_loss": 0.7843, "train_positive_log_prob": -84.0779, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3544, "epoch": 2.3544018058690743, "grad_norm": 12.008702278137207, "learning_rate": 5.647535335288296e-06, "lm_loss": 5.5769, "loss": 1.2636, "step": 1043, "text_contrastive_loss": 0.7031, "train_positive_log_prob": -84.395, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.3317, "epoch": 2.3566591422121896, "grad_norm": 11.979096412658691, "learning_rate": 5.640340347235215e-06, "lm_loss": 5.484, "loss": 1.2973, "step": 1044, "text_contrastive_loss": 0.8344, "train_positive_log_prob": -80.8715, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.5111, "epoch": 2.3589164785553045, "grad_norm": 13.818731307983398, "learning_rate": 5.6331440108566735e-06, "lm_loss": 5.6814, "loss": 1.5727, "step": 1045, "text_contrastive_loss": 0.987, "train_positive_log_prob": -84.6209, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.421, "epoch": 2.36117381489842, "grad_norm": 14.53001880645752, "learning_rate": 5.6259463413055604e-06, "lm_loss": 5.5695, "loss": 1.4331, "step": 1046, "text_contrastive_loss": 0.9103, "train_positive_log_prob": -81.9357, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4077, "epoch": 2.363431151241535, "grad_norm": 14.39387321472168, "learning_rate": 5.6187473537375635e-06, "lm_loss": 5.6563, "loss": 1.3375, "step": 1047, "text_contrastive_loss": 0.7283, "train_positive_log_prob": -81.6435, "train_positive_token_accuracy": 0.0725, "train_positive_token_prob": 0.0287 }, { "contrastive_loss": 0.3039, "epoch": 2.36568848758465, "grad_norm": 11.790059089660645, "learning_rate": 5.611547063311152e-06, "lm_loss": 5.4627, "loss": 1.2354, "step": 1048, "text_contrastive_loss": 0.7705, "train_positive_log_prob": -80.297, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.463, "epoch": 2.3679458239277653, "grad_norm": 15.357479095458984, "learning_rate": 5.604345485187535e-06, "lm_loss": 5.5069, "loss": 1.4355, "step": 1049, "text_contrastive_loss": 0.8436, "train_positive_log_prob": -84.0033, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.4173, "epoch": 2.37020316027088, "grad_norm": 12.703691482543945, "learning_rate": 5.597142634530639e-06, "lm_loss": 5.4058, "loss": 1.2865, "step": 1050, "text_contrastive_loss": 0.6572, "train_positive_log_prob": -80.3059, "train_positive_token_accuracy": 0.0884, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.4097, "epoch": 2.3724604966139955, "grad_norm": 14.017982482910156, "learning_rate": 5.589938526507059e-06, "lm_loss": 5.5373, "loss": 1.3326, "step": 1051, "text_contrastive_loss": 0.7383, "train_positive_log_prob": -81.131, "train_positive_token_accuracy": 0.069, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.4417, "epoch": 2.374717832957111, "grad_norm": 17.754405975341797, "learning_rate": 5.582733176286048e-06, "lm_loss": 5.469, "loss": 1.463, "step": 1052, "text_contrastive_loss": 0.9487, "train_positive_log_prob": -79.7064, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3392, "epoch": 2.3769751693002257, "grad_norm": 13.985703468322754, "learning_rate": 5.575526599039472e-06, "lm_loss": 5.3489, "loss": 1.2603, "step": 1053, "text_contrastive_loss": 0.7725, "train_positive_log_prob": -79.1122, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.2968, "epoch": 2.379232505643341, "grad_norm": 12.920539855957031, "learning_rate": 5.568318809941777e-06, "lm_loss": 5.4325, "loss": 1.2386, "step": 1054, "text_contrastive_loss": 0.7971, "train_positive_log_prob": -79.6203, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3871, "epoch": 2.381489841986456, "grad_norm": 13.54922866821289, "learning_rate": 5.561109824169962e-06, "lm_loss": 5.4568, "loss": 1.2994, "step": 1055, "text_contrastive_loss": 0.7332, "train_positive_log_prob": -83.6655, "train_positive_token_accuracy": 0.0818, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3444, "epoch": 2.383747178329571, "grad_norm": 12.597684860229492, "learning_rate": 5.553899656903552e-06, "lm_loss": 5.5629, "loss": 1.2849, "step": 1056, "text_contrastive_loss": 0.7684, "train_positive_log_prob": -84.6168, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.4698, "epoch": 2.386004514672686, "grad_norm": 15.04989242553711, "learning_rate": 5.546688323324548e-06, "lm_loss": 5.5264, "loss": 1.4354, "step": 1057, "text_contrastive_loss": 0.826, "train_positive_log_prob": -82.9815, "train_positive_token_accuracy": 0.0812, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3774, "epoch": 2.3882618510158014, "grad_norm": 13.134078025817871, "learning_rate": 5.53947583861742e-06, "lm_loss": 5.4747, "loss": 1.3198, "step": 1058, "text_contrastive_loss": 0.7899, "train_positive_log_prob": -80.108, "train_positive_token_accuracy": 0.0854, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.4142, "epoch": 2.3905191873589167, "grad_norm": 13.905577659606934, "learning_rate": 5.5322622179690514e-06, "lm_loss": 5.4826, "loss": 1.3297, "step": 1059, "text_contrastive_loss": 0.7345, "train_positive_log_prob": -79.4124, "train_positive_token_accuracy": 0.0828, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3922, "epoch": 2.3927765237020315, "grad_norm": 13.758112907409668, "learning_rate": 5.525047476568722e-06, "lm_loss": 5.5356, "loss": 1.3236, "step": 1060, "text_contrastive_loss": 0.7556, "train_positive_log_prob": -82.5415, "train_positive_token_accuracy": 0.0812, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4356, "epoch": 2.395033860045147, "grad_norm": 13.6261625289917, "learning_rate": 5.51783162960807e-06, "lm_loss": 5.5089, "loss": 1.3608, "step": 1061, "text_contrastive_loss": 0.7486, "train_positive_log_prob": -83.1293, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4276, "epoch": 2.3972911963882617, "grad_norm": 15.979998588562012, "learning_rate": 5.5106146922810664e-06, "lm_loss": 5.5639, "loss": 1.4571, "step": 1062, "text_contrastive_loss": 0.9461, "train_positive_log_prob": -83.0504, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.465, "epoch": 2.399548532731377, "grad_norm": 15.927392959594727, "learning_rate": 5.50339667978397e-06, "lm_loss": 5.5618, "loss": 1.498, "step": 1063, "text_contrastive_loss": 0.9536, "train_positive_log_prob": -82.4492, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4131, "epoch": 2.401805869074492, "grad_norm": 14.187854766845703, "learning_rate": 5.496177607315312e-06, "lm_loss": 5.4653, "loss": 1.4196, "step": 1064, "text_contrastive_loss": 0.92, "train_positive_log_prob": -80.7589, "train_positive_token_accuracy": 0.0833, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3869, "epoch": 2.404063205417607, "grad_norm": 13.354890823364258, "learning_rate": 5.488957490075846e-06, "lm_loss": 5.4859, "loss": 1.3326, "step": 1065, "text_contrastive_loss": 0.7943, "train_positive_log_prob": -80.728, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4043, "epoch": 2.4063205417607225, "grad_norm": 13.615126609802246, "learning_rate": 5.4817363432685355e-06, "lm_loss": 5.6338, "loss": 1.3987, "step": 1066, "text_contrastive_loss": 0.8621, "train_positive_log_prob": -84.2201, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4026, "epoch": 2.4085778781038374, "grad_norm": 12.85096263885498, "learning_rate": 5.474514182098504e-06, "lm_loss": 5.4415, "loss": 1.3841, "step": 1067, "text_contrastive_loss": 0.8747, "train_positive_log_prob": -80.7073, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.445, "epoch": 2.4108352144469527, "grad_norm": 13.284417152404785, "learning_rate": 5.4672910217730155e-06, "lm_loss": 5.4465, "loss": 1.3754, "step": 1068, "text_contrastive_loss": 0.7714, "train_positive_log_prob": -80.9296, "train_positive_token_accuracy": 0.0862, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.4147, "epoch": 2.4130925507900676, "grad_norm": 13.03646469116211, "learning_rate": 5.4600668775014355e-06, "lm_loss": 5.5732, "loss": 1.3354, "step": 1069, "text_contrastive_loss": 0.7267, "train_positive_log_prob": -82.8376, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.5255, "epoch": 2.415349887133183, "grad_norm": 13.955810546875, "learning_rate": 5.452841764495203e-06, "lm_loss": 5.442, "loss": 1.4446, "step": 1070, "text_contrastive_loss": 0.7496, "train_positive_log_prob": -78.8667, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3943, "epoch": 2.417607223476298, "grad_norm": 14.359458923339844, "learning_rate": 5.445615697967797e-06, "lm_loss": 5.4259, "loss": 1.3027, "step": 1071, "text_contrastive_loss": 0.7316, "train_positive_log_prob": -81.8307, "train_positive_token_accuracy": 0.0862, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.531, "epoch": 2.419864559819413, "grad_norm": 14.613249778747559, "learning_rate": 5.438388693134702e-06, "lm_loss": 5.4336, "loss": 1.5319, "step": 1072, "text_contrastive_loss": 0.9151, "train_positive_log_prob": -78.8259, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4024, "epoch": 2.4221218961625284, "grad_norm": 14.3463773727417, "learning_rate": 5.431160765213379e-06, "lm_loss": 5.4399, "loss": 1.3835, "step": 1073, "text_contrastive_loss": 0.8743, "train_positive_log_prob": -79.1338, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3103, "epoch": 2.4243792325056432, "grad_norm": 12.166443824768066, "learning_rate": 5.423931929423235e-06, "lm_loss": 5.4745, "loss": 1.1926, "step": 1074, "text_contrastive_loss": 0.6697, "train_positive_log_prob": -80.5519, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3886, "epoch": 2.4266365688487586, "grad_norm": 12.064868927001953, "learning_rate": 5.416702200985585e-06, "lm_loss": 5.5676, "loss": 1.3866, "step": 1075, "text_contrastive_loss": 0.8824, "train_positive_log_prob": -83.6239, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.409, "epoch": 2.4288939051918734, "grad_norm": 12.926525115966797, "learning_rate": 5.409471595123628e-06, "lm_loss": 5.4047, "loss": 1.3996, "step": 1076, "text_contrastive_loss": 0.9003, "train_positive_log_prob": -78.577, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.38, "epoch": 2.4311512415349887, "grad_norm": 13.372188568115234, "learning_rate": 5.4022401270624036e-06, "lm_loss": 5.494, "loss": 1.2331, "step": 1077, "text_contrastive_loss": 0.6076, "train_positive_log_prob": -82.2837, "train_positive_token_accuracy": 0.0866, "train_positive_token_prob": 0.033 }, { "contrastive_loss": 0.4095, "epoch": 2.4334085778781036, "grad_norm": 12.369166374206543, "learning_rate": 5.395007812028775e-06, "lm_loss": 5.5177, "loss": 1.3271, "step": 1078, "text_contrastive_loss": 0.7316, "train_positive_log_prob": -80.5846, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.4923, "epoch": 2.435665914221219, "grad_norm": 14.51758098602295, "learning_rate": 5.387774665251385e-06, "lm_loss": 5.5253, "loss": 1.4506, "step": 1079, "text_contrastive_loss": 0.8114, "train_positive_log_prob": -83.6361, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3501, "epoch": 2.4379232505643342, "grad_norm": 12.295190811157227, "learning_rate": 5.380540701960627e-06, "lm_loss": 5.4864, "loss": 1.2486, "step": 1080, "text_contrastive_loss": 0.6998, "train_positive_log_prob": -81.5226, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.4023, "epoch": 2.440180586907449, "grad_norm": 13.490459442138672, "learning_rate": 5.373305937388613e-06, "lm_loss": 5.6089, "loss": 1.3018, "step": 1081, "text_contrastive_loss": 0.6773, "train_positive_log_prob": -84.2885, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4213, "epoch": 2.4424379232505644, "grad_norm": 14.755785942077637, "learning_rate": 5.3660703867691475e-06, "lm_loss": 5.4886, "loss": 1.4683, "step": 1082, "text_contrastive_loss": 0.9964, "train_positive_log_prob": -80.3554, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.3914, "epoch": 2.4446952595936793, "grad_norm": 13.41222095489502, "learning_rate": 5.358834065337684e-06, "lm_loss": 5.6045, "loss": 1.4521, "step": 1083, "text_contrastive_loss": 1.0005, "train_positive_log_prob": -83.172, "train_positive_token_accuracy": 0.0837, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3292, "epoch": 2.4469525959367946, "grad_norm": 13.041488647460938, "learning_rate": 5.3515969883313e-06, "lm_loss": 5.5795, "loss": 1.293, "step": 1084, "text_contrastive_loss": 0.8117, "train_positive_log_prob": -83.1063, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3403, "epoch": 2.44920993227991, "grad_norm": 12.110668182373047, "learning_rate": 5.344359170988668e-06, "lm_loss": 5.5531, "loss": 1.2482, "step": 1085, "text_contrastive_loss": 0.705, "train_positive_log_prob": -81.3447, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.446, "epoch": 2.4514672686230248, "grad_norm": 15.7059965133667, "learning_rate": 5.337120628550016e-06, "lm_loss": 5.4581, "loss": 1.4751, "step": 1086, "text_contrastive_loss": 0.9666, "train_positive_log_prob": -82.3006, "train_positive_token_accuracy": 0.0829, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.4065, "epoch": 2.45372460496614, "grad_norm": 13.175836563110352, "learning_rate": 5.329881376257098e-06, "lm_loss": 5.4921, "loss": 1.3573, "step": 1087, "text_contrastive_loss": 0.8032, "train_positive_log_prob": -81.1144, "train_positive_token_accuracy": 0.0828, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4353, "epoch": 2.455981941309255, "grad_norm": 14.727249145507812, "learning_rate": 5.322641429353167e-06, "lm_loss": 5.4534, "loss": 1.4546, "step": 1088, "text_contrastive_loss": 0.948, "train_positive_log_prob": -82.1823, "train_positive_token_accuracy": 0.084, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4149, "epoch": 2.4582392776523703, "grad_norm": 13.355571746826172, "learning_rate": 5.315400803082934e-06, "lm_loss": 5.4401, "loss": 1.346, "step": 1089, "text_contrastive_loss": 0.7743, "train_positive_log_prob": -80.2823, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4249, "epoch": 2.460496613995485, "grad_norm": 15.00484561920166, "learning_rate": 5.308159512692544e-06, "lm_loss": 5.5169, "loss": 1.4481, "step": 1090, "text_contrastive_loss": 0.943, "train_positive_log_prob": -81.1595, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4935, "epoch": 2.4627539503386005, "grad_norm": 14.640666007995605, "learning_rate": 5.300917573429536e-06, "lm_loss": 5.5135, "loss": 1.561, "step": 1091, "text_contrastive_loss": 1.0322, "train_positive_log_prob": -78.5885, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.4456, "epoch": 2.4650112866817158, "grad_norm": 15.940723419189453, "learning_rate": 5.293675000542822e-06, "lm_loss": 5.6074, "loss": 1.4078, "step": 1092, "text_contrastive_loss": 0.8029, "train_positive_log_prob": -83.9385, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.5212, "epoch": 2.4672686230248306, "grad_norm": 15.651947975158691, "learning_rate": 5.286431809282639e-06, "lm_loss": 5.4077, "loss": 1.5292, "step": 1093, "text_contrastive_loss": 0.9345, "train_positive_log_prob": -80.0202, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4062, "epoch": 2.469525959367946, "grad_norm": 13.576653480529785, "learning_rate": 5.279188014900537e-06, "lm_loss": 5.5198, "loss": 1.3024, "step": 1094, "text_contrastive_loss": 0.6884, "train_positive_log_prob": -80.0606, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.4295, "epoch": 2.471783295711061, "grad_norm": 14.4829683303833, "learning_rate": 5.2719436326493255e-06, "lm_loss": 5.5134, "loss": 1.3906, "step": 1095, "text_contrastive_loss": 0.8195, "train_positive_log_prob": -82.8098, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4137, "epoch": 2.474040632054176, "grad_norm": 13.427950859069824, "learning_rate": 5.26469867778306e-06, "lm_loss": 5.5291, "loss": 1.3379, "step": 1096, "text_contrastive_loss": 0.7426, "train_positive_log_prob": -81.4216, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3779, "epoch": 2.476297968397291, "grad_norm": 11.763487815856934, "learning_rate": 5.257453165556996e-06, "lm_loss": 5.4668, "loss": 1.2999, "step": 1097, "text_contrastive_loss": 0.7506, "train_positive_log_prob": -81.7218, "train_positive_token_accuracy": 0.0886, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.3719, "epoch": 2.4785553047404063, "grad_norm": 12.892736434936523, "learning_rate": 5.2502071112275675e-06, "lm_loss": 5.5286, "loss": 1.3189, "step": 1098, "text_contrastive_loss": 0.7883, "train_positive_log_prob": -81.9765, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3745, "epoch": 2.4808126410835216, "grad_norm": 13.535112380981445, "learning_rate": 5.242960530052344e-06, "lm_loss": 5.4514, "loss": 1.3905, "step": 1099, "text_contrastive_loss": 0.9418, "train_positive_log_prob": -79.1948, "train_positive_token_accuracy": 0.0828, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3667, "epoch": 2.4830699774266365, "grad_norm": 12.673724174499512, "learning_rate": 5.235713437290012e-06, "lm_loss": 5.4863, "loss": 1.4022, "step": 1100, "text_contrastive_loss": 0.9738, "train_positive_log_prob": -81.9747, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.3996, "epoch": 2.485327313769752, "grad_norm": 13.809643745422363, "learning_rate": 5.228465848200327e-06, "lm_loss": 5.6012, "loss": 1.3115, "step": 1101, "text_contrastive_loss": 0.7034, "train_positive_log_prob": -79.9467, "train_positive_token_accuracy": 0.0761, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.507, "epoch": 2.4875846501128667, "grad_norm": 15.798452377319336, "learning_rate": 5.221217778044096e-06, "lm_loss": 5.4808, "loss": 1.4458, "step": 1102, "text_contrastive_loss": 0.7814, "train_positive_log_prob": -80.5246, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3997, "epoch": 2.489841986455982, "grad_norm": 14.478659629821777, "learning_rate": 5.2139692420831325e-06, "lm_loss": 5.5925, "loss": 1.3412, "step": 1103, "text_contrastive_loss": 0.7645, "train_positive_log_prob": -83.6685, "train_positive_token_accuracy": 0.0854, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4095, "epoch": 2.4920993227990973, "grad_norm": 12.84615421295166, "learning_rate": 5.206720255580241e-06, "lm_loss": 5.4692, "loss": 1.3362, "step": 1104, "text_contrastive_loss": 0.7595, "train_positive_log_prob": -80.3504, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.4717, "epoch": 2.494356659142212, "grad_norm": 16.172317504882812, "learning_rate": 5.199470833799164e-06, "lm_loss": 5.573, "loss": 1.4538, "step": 1105, "text_contrastive_loss": 0.8496, "train_positive_log_prob": -84.2076, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3017, "epoch": 2.4966139954853275, "grad_norm": 11.299418449401855, "learning_rate": 5.192220992004569e-06, "lm_loss": 5.6036, "loss": 1.2086, "step": 1106, "text_contrastive_loss": 0.6931, "train_positive_log_prob": -83.639, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.5347, "epoch": 2.4988713318284423, "grad_norm": 16.39532470703125, "learning_rate": 5.184970745461998e-06, "lm_loss": 5.6291, "loss": 1.507, "step": 1107, "text_contrastive_loss": 0.8189, "train_positive_log_prob": -84.0616, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.3921, "epoch": 2.5011286681715577, "grad_norm": 12.706552505493164, "learning_rate": 5.177720109437857e-06, "lm_loss": 5.5586, "loss": 1.327, "step": 1108, "text_contrastive_loss": 0.758, "train_positive_log_prob": -83.0968, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3224, "epoch": 2.5033860045146725, "grad_norm": 12.656816482543945, "learning_rate": 5.170469099199363e-06, "lm_loss": 5.4715, "loss": 1.2552, "step": 1109, "text_contrastive_loss": 0.7714, "train_positive_log_prob": -78.5682, "train_positive_token_accuracy": 0.0881, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3911, "epoch": 2.505643340857788, "grad_norm": 14.10784912109375, "learning_rate": 5.1632177300145255e-06, "lm_loss": 5.4638, "loss": 1.3482, "step": 1110, "text_contrastive_loss": 0.8215, "train_positive_log_prob": -81.2746, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.5143, "epoch": 2.5079006772009027, "grad_norm": 14.490861892700195, "learning_rate": 5.155966017152108e-06, "lm_loss": 5.4204, "loss": 1.4946, "step": 1111, "text_contrastive_loss": 0.8765, "train_positive_log_prob": -81.7895, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.4723, "epoch": 2.510158013544018, "grad_norm": 15.501252174377441, "learning_rate": 5.148713975881598e-06, "lm_loss": 5.4654, "loss": 1.4282, "step": 1112, "text_contrastive_loss": 0.8187, "train_positive_log_prob": -80.7631, "train_positive_token_accuracy": 0.0735, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.406, "epoch": 2.5124153498871333, "grad_norm": 13.753080368041992, "learning_rate": 5.141461621473175e-06, "lm_loss": 5.6012, "loss": 1.3412, "step": 1113, "text_contrastive_loss": 0.7502, "train_positive_log_prob": -83.4042, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4504, "epoch": 2.514672686230248, "grad_norm": 15.42505168914795, "learning_rate": 5.1342089691976794e-06, "lm_loss": 5.5882, "loss": 1.45, "step": 1114, "text_contrastive_loss": 0.8816, "train_positive_log_prob": -81.5714, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3244, "epoch": 2.5169300225733635, "grad_norm": 11.779836654663086, "learning_rate": 5.126956034326573e-06, "lm_loss": 5.4373, "loss": 1.2967, "step": 1115, "text_contrastive_loss": 0.8572, "train_positive_log_prob": -79.8218, "train_positive_token_accuracy": 0.0838, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3145, "epoch": 2.5191873589164784, "grad_norm": 12.900495529174805, "learning_rate": 5.119702832131922e-06, "lm_loss": 5.5437, "loss": 1.3138, "step": 1116, "text_contrastive_loss": 0.8898, "train_positive_log_prob": -82.4608, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3986, "epoch": 2.5214446952595937, "grad_norm": 12.25336742401123, "learning_rate": 5.112449377886345e-06, "lm_loss": 5.4775, "loss": 1.372, "step": 1117, "text_contrastive_loss": 0.8514, "train_positive_log_prob": -81.8183, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3882, "epoch": 2.523702031602709, "grad_norm": 13.7731294631958, "learning_rate": 5.105195686863e-06, "lm_loss": 5.6046, "loss": 1.3206, "step": 1118, "text_contrastive_loss": 0.744, "train_positive_log_prob": -82.9545, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3847, "epoch": 2.525959367945824, "grad_norm": 13.900425910949707, "learning_rate": 5.097941774335538e-06, "lm_loss": 5.4642, "loss": 1.2978, "step": 1119, "text_contrastive_loss": 0.7334, "train_positive_log_prob": -80.8777, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.452, "epoch": 2.528216704288939, "grad_norm": 13.458977699279785, "learning_rate": 5.090687655578078e-06, "lm_loss": 5.5858, "loss": 1.4618, "step": 1120, "text_contrastive_loss": 0.9024, "train_positive_log_prob": -84.0085, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.4537, "epoch": 2.530474040632054, "grad_norm": 14.56409740447998, "learning_rate": 5.083433345865175e-06, "lm_loss": 5.4753, "loss": 1.4121, "step": 1121, "text_contrastive_loss": 0.8219, "train_positive_log_prob": -79.843, "train_positive_token_accuracy": 0.0879, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.4135, "epoch": 2.5327313769751694, "grad_norm": 12.935258865356445, "learning_rate": 5.076178860471787e-06, "lm_loss": 5.491, "loss": 1.3246, "step": 1122, "text_contrastive_loss": 0.724, "train_positive_log_prob": -81.0987, "train_positive_token_accuracy": 0.0838, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.4647, "epoch": 2.5349887133182847, "grad_norm": 13.051006317138672, "learning_rate": 5.068924214673234e-06, "lm_loss": 5.4212, "loss": 1.4127, "step": 1123, "text_contrastive_loss": 0.8117, "train_positive_log_prob": -78.798, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.4065, "epoch": 2.5372460496613995, "grad_norm": 13.802006721496582, "learning_rate": 5.061669423745185e-06, "lm_loss": 5.4055, "loss": 1.3731, "step": 1124, "text_contrastive_loss": 0.8523, "train_positive_log_prob": -79.1395, "train_positive_token_accuracy": 0.0934, "train_positive_token_prob": 0.0328 }, { "contrastive_loss": 0.3912, "epoch": 2.5395033860045144, "grad_norm": 17.284080505371094, "learning_rate": 5.054414502963605e-06, "lm_loss": 5.5214, "loss": 1.3468, "step": 1125, "text_contrastive_loss": 0.8068, "train_positive_log_prob": -82.5795, "train_positive_token_accuracy": 0.0838, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3803, "epoch": 2.5417607223476297, "grad_norm": 12.933502197265625, "learning_rate": 5.0471594676047385e-06, "lm_loss": 5.5175, "loss": 1.3346, "step": 1126, "text_contrastive_loss": 0.805, "train_positive_log_prob": -81.4263, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3538, "epoch": 2.544018058690745, "grad_norm": 13.167275428771973, "learning_rate": 5.039904332945069e-06, "lm_loss": 5.4207, "loss": 1.2401, "step": 1127, "text_contrastive_loss": 0.6885, "train_positive_log_prob": -78.5817, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.4452, "epoch": 2.54627539503386, "grad_norm": 13.284455299377441, "learning_rate": 5.03264911426129e-06, "lm_loss": 5.3674, "loss": 1.3615, "step": 1128, "text_contrastive_loss": 0.7592, "train_positive_log_prob": -78.8357, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0328 }, { "contrastive_loss": 0.4752, "epoch": 2.5485327313769752, "grad_norm": 13.207563400268555, "learning_rate": 5.025393826830267e-06, "lm_loss": 5.5985, "loss": 1.4632, "step": 1129, "text_contrastive_loss": 0.8563, "train_positive_log_prob": -82.5769, "train_positive_token_accuracy": 0.0726, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4554, "epoch": 2.55079006772009, "grad_norm": 12.570433616638184, "learning_rate": 5.0181384859290215e-06, "lm_loss": 5.5167, "loss": 1.406, "step": 1130, "text_contrastive_loss": 0.7979, "train_positive_log_prob": -81.6048, "train_positive_token_accuracy": 0.0842, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4251, "epoch": 2.5530474040632054, "grad_norm": 14.778961181640625, "learning_rate": 5.010883106834676e-06, "lm_loss": 5.431, "loss": 1.4212, "step": 1131, "text_contrastive_loss": 0.906, "train_positive_log_prob": -81.4475, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4831, "epoch": 2.5553047404063207, "grad_norm": 16.45122528076172, "learning_rate": 5.003627704824438e-06, "lm_loss": 5.486, "loss": 1.4543, "step": 1132, "text_contrastive_loss": 0.8453, "train_positive_log_prob": -79.9769, "train_positive_token_accuracy": 0.0879, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.4322, "epoch": 2.5575620767494356, "grad_norm": 13.629638671875, "learning_rate": 4.996372295175563e-06, "lm_loss": 5.5377, "loss": 1.3377, "step": 1133, "text_contrastive_loss": 0.7035, "train_positive_log_prob": -82.1578, "train_positive_token_accuracy": 0.0753, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.3606, "epoch": 2.559819413092551, "grad_norm": 12.509625434875488, "learning_rate": 4.989116893165325e-06, "lm_loss": 5.4185, "loss": 1.2522, "step": 1134, "text_contrastive_loss": 0.6995, "train_positive_log_prob": -80.2624, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.5114, "epoch": 2.5620767494356658, "grad_norm": 14.97032642364502, "learning_rate": 4.981861514070979e-06, "lm_loss": 5.4655, "loss": 1.5477, "step": 1135, "text_contrastive_loss": 0.9796, "train_positive_log_prob": -80.784, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.4696, "epoch": 2.564334085778781, "grad_norm": 14.087468147277832, "learning_rate": 4.974606173169733e-06, "lm_loss": 5.4949, "loss": 1.4966, "step": 1136, "text_contrastive_loss": 0.9551, "train_positive_log_prob": -81.6525, "train_positive_token_accuracy": 0.0846, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.4408, "epoch": 2.5665914221218964, "grad_norm": 14.633216857910156, "learning_rate": 4.9673508857387115e-06, "lm_loss": 5.4085, "loss": 1.367, "step": 1137, "text_contrastive_loss": 0.7706, "train_positive_log_prob": -80.0688, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.4022, "epoch": 2.5688487584650113, "grad_norm": 13.321537017822266, "learning_rate": 4.9600956670549324e-06, "lm_loss": 5.5587, "loss": 1.357, "step": 1138, "text_contrastive_loss": 0.7979, "train_positive_log_prob": -83.3167, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4094, "epoch": 2.5711060948081266, "grad_norm": 12.423505783081055, "learning_rate": 4.952840532395262e-06, "lm_loss": 5.5307, "loss": 1.3634, "step": 1139, "text_contrastive_loss": 0.802, "train_positive_log_prob": -84.0355, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4286, "epoch": 2.5733634311512414, "grad_norm": 11.84165096282959, "learning_rate": 4.945585497036396e-06, "lm_loss": 5.498, "loss": 1.3595, "step": 1140, "text_contrastive_loss": 0.7622, "train_positive_log_prob": -79.1176, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4206, "epoch": 2.5756207674943568, "grad_norm": 15.220135688781738, "learning_rate": 4.938330576254817e-06, "lm_loss": 5.4074, "loss": 1.3147, "step": 1141, "text_contrastive_loss": 0.7069, "train_positive_log_prob": -81.8945, "train_positive_token_accuracy": 0.0838, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.4548, "epoch": 2.5778781038374716, "grad_norm": 14.214799880981445, "learning_rate": 4.931075785326767e-06, "lm_loss": 5.4122, "loss": 1.397, "step": 1142, "text_contrastive_loss": 0.8019, "train_positive_log_prob": -81.3377, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.4385, "epoch": 2.580135440180587, "grad_norm": 14.037745475769043, "learning_rate": 4.9238211395282156e-06, "lm_loss": 5.3858, "loss": 1.3714, "step": 1143, "text_contrastive_loss": 0.7886, "train_positive_log_prob": -77.5022, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3381, "epoch": 2.582392776523702, "grad_norm": 12.56302547454834, "learning_rate": 4.9165666541348265e-06, "lm_loss": 5.5134, "loss": 1.2769, "step": 1144, "text_contrastive_loss": 0.775, "train_positive_log_prob": -82.2902, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.5162, "epoch": 2.584650112866817, "grad_norm": 14.678620338439941, "learning_rate": 4.909312344421923e-06, "lm_loss": 5.5333, "loss": 1.5027, "step": 1145, "text_contrastive_loss": 0.8663, "train_positive_log_prob": -82.6164, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4099, "epoch": 2.5869074492099324, "grad_norm": 12.928022384643555, "learning_rate": 4.902058225664465e-06, "lm_loss": 5.4, "loss": 1.3616, "step": 1146, "text_contrastive_loss": 0.8232, "train_positive_log_prob": -77.6009, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.384, "epoch": 2.5891647855530473, "grad_norm": 15.01064682006836, "learning_rate": 4.8948043131370025e-06, "lm_loss": 5.579, "loss": 1.3733, "step": 1147, "text_contrastive_loss": 0.8629, "train_positive_log_prob": -81.9003, "train_positive_token_accuracy": 0.0718, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.3578, "epoch": 2.5914221218961626, "grad_norm": 12.526495933532715, "learning_rate": 4.887550622113657e-06, "lm_loss": 5.4814, "loss": 1.2652, "step": 1148, "text_contrastive_loss": 0.7185, "train_positive_log_prob": -79.1582, "train_positive_token_accuracy": 0.0855, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3802, "epoch": 2.5936794582392775, "grad_norm": 13.175474166870117, "learning_rate": 4.88029716786808e-06, "lm_loss": 5.5614, "loss": 1.306, "step": 1149, "text_contrastive_loss": 0.7393, "train_positive_log_prob": -83.9876, "train_positive_token_accuracy": 0.0846, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4673, "epoch": 2.595936794582393, "grad_norm": 14.35908317565918, "learning_rate": 4.873043965673427e-06, "lm_loss": 5.4945, "loss": 1.4968, "step": 1150, "text_contrastive_loss": 0.9602, "train_positive_log_prob": -82.3151, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.5568, "epoch": 2.598194130925508, "grad_norm": 16.738842010498047, "learning_rate": 4.8657910308023205e-06, "lm_loss": 5.543, "loss": 1.5993, "step": 1151, "text_contrastive_loss": 0.9765, "train_positive_log_prob": -81.545, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.2944, "epoch": 2.600451467268623, "grad_norm": 11.679557800292969, "learning_rate": 4.858538378526825e-06, "lm_loss": 5.3878, "loss": 1.1371, "step": 1152, "text_contrastive_loss": 0.608, "train_positive_log_prob": -78.2674, "train_positive_token_accuracy": 0.0828, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.4211, "epoch": 2.6027088036117383, "grad_norm": 13.320328712463379, "learning_rate": 4.851286024118402e-06, "lm_loss": 5.4369, "loss": 1.3524, "step": 1153, "text_contrastive_loss": 0.7754, "train_positive_log_prob": -79.2371, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.4434, "epoch": 2.604966139954853, "grad_norm": 13.534639358520508, "learning_rate": 4.844033982847893e-06, "lm_loss": 5.5048, "loss": 1.4477, "step": 1154, "text_contrastive_loss": 0.9077, "train_positive_log_prob": -79.8649, "train_positive_token_accuracy": 0.0872, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3938, "epoch": 2.6072234762979685, "grad_norm": 13.858593940734863, "learning_rate": 4.836782269985475e-06, "lm_loss": 5.4346, "loss": 1.3776, "step": 1155, "text_contrastive_loss": 0.8806, "train_positive_log_prob": -77.2752, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3625, "epoch": 2.609480812641084, "grad_norm": 13.214495658874512, "learning_rate": 4.829530900800638e-06, "lm_loss": 5.5532, "loss": 1.3706, "step": 1156, "text_contrastive_loss": 0.9055, "train_positive_log_prob": -81.7372, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.413, "epoch": 2.6117381489841986, "grad_norm": 12.766947746276855, "learning_rate": 4.8222798905621445e-06, "lm_loss": 5.5154, "loss": 1.3809, "step": 1157, "text_contrastive_loss": 0.8327, "train_positive_log_prob": -80.6022, "train_positive_token_accuracy": 0.0909, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3759, "epoch": 2.6139954853273135, "grad_norm": 12.303617477416992, "learning_rate": 4.815029254538003e-06, "lm_loss": 5.4039, "loss": 1.2761, "step": 1158, "text_contrastive_loss": 0.7196, "train_positive_log_prob": -79.8339, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.5641, "epoch": 2.616252821670429, "grad_norm": 19.04671287536621, "learning_rate": 4.807779007995434e-06, "lm_loss": 5.4291, "loss": 1.5322, "step": 1159, "text_contrastive_loss": 0.8503, "train_positive_log_prob": -78.6826, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.281, "epoch": 2.618510158013544, "grad_norm": 12.744864463806152, "learning_rate": 4.800529166200837e-06, "lm_loss": 5.4667, "loss": 1.2207, "step": 1160, "text_contrastive_loss": 0.786, "train_positive_log_prob": -80.6122, "train_positive_token_accuracy": 0.0885, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3549, "epoch": 2.620767494356659, "grad_norm": 12.05768871307373, "learning_rate": 4.7932797444197604e-06, "lm_loss": 5.5462, "loss": 1.3312, "step": 1161, "text_contrastive_loss": 0.8434, "train_positive_log_prob": -86.4028, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.4921, "epoch": 2.6230248306997743, "grad_norm": 14.103909492492676, "learning_rate": 4.786030757916868e-06, "lm_loss": 5.6144, "loss": 1.5285, "step": 1162, "text_contrastive_loss": 0.95, "train_positive_log_prob": -83.7298, "train_positive_token_accuracy": 0.0833, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4334, "epoch": 2.625282167042889, "grad_norm": 14.57669448852539, "learning_rate": 4.778782221955907e-06, "lm_loss": 5.4605, "loss": 1.3621, "step": 1163, "text_contrastive_loss": 0.7652, "train_positive_log_prob": -79.8643, "train_positive_token_accuracy": 0.0871, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3722, "epoch": 2.6275395033860045, "grad_norm": 12.507501602172852, "learning_rate": 4.771534151799676e-06, "lm_loss": 5.4844, "loss": 1.3388, "step": 1164, "text_contrastive_loss": 0.8363, "train_positive_log_prob": -82.9281, "train_positive_token_accuracy": 0.0814, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.4389, "epoch": 2.62979683972912, "grad_norm": 12.930022239685059, "learning_rate": 4.76428656270999e-06, "lm_loss": 5.6183, "loss": 1.3833, "step": 1165, "text_contrastive_loss": 0.7651, "train_positive_log_prob": -83.1572, "train_positive_token_accuracy": 0.0876, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3787, "epoch": 2.6320541760722347, "grad_norm": 13.370924949645996, "learning_rate": 4.757039469947658e-06, "lm_loss": 5.4669, "loss": 1.3421, "step": 1166, "text_contrastive_loss": 0.8335, "train_positive_log_prob": -82.8615, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3548, "epoch": 2.63431151241535, "grad_norm": 12.020811080932617, "learning_rate": 4.7497928887724325e-06, "lm_loss": 5.5947, "loss": 1.3023, "step": 1167, "text_contrastive_loss": 0.7759, "train_positive_log_prob": -82.8732, "train_positive_token_accuracy": 0.0744, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3843, "epoch": 2.636568848758465, "grad_norm": 13.562383651733398, "learning_rate": 4.7425468344430035e-06, "lm_loss": 5.414, "loss": 1.308, "step": 1168, "text_contrastive_loss": 0.7646, "train_positive_log_prob": -78.5706, "train_positive_token_accuracy": 0.0869, "train_positive_token_prob": 0.0328 }, { "contrastive_loss": 0.3854, "epoch": 2.63882618510158, "grad_norm": 13.353324890136719, "learning_rate": 4.73530132221694e-06, "lm_loss": 5.4934, "loss": 1.2806, "step": 1169, "text_contrastive_loss": 0.6917, "train_positive_log_prob": -82.8743, "train_positive_token_accuracy": 0.073, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4047, "epoch": 2.6410835214446955, "grad_norm": 12.325431823730469, "learning_rate": 4.7280563673506745e-06, "lm_loss": 5.5337, "loss": 1.3067, "step": 1170, "text_contrastive_loss": 0.6973, "train_positive_log_prob": -83.8513, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3929, "epoch": 2.6433408577878104, "grad_norm": 13.446803092956543, "learning_rate": 4.720811985099464e-06, "lm_loss": 5.5289, "loss": 1.3735, "step": 1171, "text_contrastive_loss": 0.8554, "train_positive_log_prob": -81.7638, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4169, "epoch": 2.6455981941309257, "grad_norm": 14.658951759338379, "learning_rate": 4.713568190717362e-06, "lm_loss": 5.5548, "loss": 1.3923, "step": 1172, "text_contrastive_loss": 0.8398, "train_positive_log_prob": -82.1077, "train_positive_token_accuracy": 0.074, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3331, "epoch": 2.6478555304740405, "grad_norm": 12.803595542907715, "learning_rate": 4.70632499945718e-06, "lm_loss": 5.4746, "loss": 1.2921, "step": 1173, "text_contrastive_loss": 0.823, "train_positive_log_prob": -81.1941, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3947, "epoch": 2.650112866817156, "grad_norm": 12.50619888305664, "learning_rate": 4.699082426570465e-06, "lm_loss": 5.4903, "loss": 1.4064, "step": 1174, "text_contrastive_loss": 0.9254, "train_positive_log_prob": -80.32, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.44, "epoch": 2.6523702031602707, "grad_norm": 14.369673728942871, "learning_rate": 4.6918404873074574e-06, "lm_loss": 5.4898, "loss": 1.4245, "step": 1175, "text_contrastive_loss": 0.8712, "train_positive_log_prob": -82.52, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3754, "epoch": 2.654627539503386, "grad_norm": 13.80208683013916, "learning_rate": 4.684599196917067e-06, "lm_loss": 5.5597, "loss": 1.2702, "step": 1176, "text_contrastive_loss": 0.6776, "train_positive_log_prob": -82.3752, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.3706, "epoch": 2.656884875846501, "grad_norm": 12.911955833435059, "learning_rate": 4.677358570646834e-06, "lm_loss": 5.5187, "loss": 1.3116, "step": 1177, "text_contrastive_loss": 0.7783, "train_positive_log_prob": -81.4929, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4332, "epoch": 2.659142212189616, "grad_norm": 15.9859619140625, "learning_rate": 4.670118623742904e-06, "lm_loss": 5.5002, "loss": 1.3827, "step": 1178, "text_contrastive_loss": 0.7989, "train_positive_log_prob": -81.5136, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4035, "epoch": 2.6613995485327315, "grad_norm": 13.92669677734375, "learning_rate": 4.662879371449987e-06, "lm_loss": 5.4052, "loss": 1.3365, "step": 1179, "text_contrastive_loss": 0.785, "train_positive_log_prob": -80.3391, "train_positive_token_accuracy": 0.0894, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3522, "epoch": 2.6636568848758464, "grad_norm": 12.085784912109375, "learning_rate": 4.655640829011335e-06, "lm_loss": 5.4177, "loss": 1.2824, "step": 1180, "text_contrastive_loss": 0.7769, "train_positive_log_prob": -79.3168, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3313, "epoch": 2.6659142212189617, "grad_norm": 14.719451904296875, "learning_rate": 4.6484030116687014e-06, "lm_loss": 5.4578, "loss": 1.2345, "step": 1181, "text_contrastive_loss": 0.7148, "train_positive_log_prob": -82.1677, "train_positive_token_accuracy": 0.0885, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.412, "epoch": 2.6681715575620766, "grad_norm": 13.43020248413086, "learning_rate": 4.64116593466232e-06, "lm_loss": 5.4645, "loss": 1.323, "step": 1182, "text_contrastive_loss": 0.7291, "train_positive_log_prob": -80.4075, "train_positive_token_accuracy": 0.0828, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.2984, "epoch": 2.670428893905192, "grad_norm": 10.837862968444824, "learning_rate": 4.633929613230855e-06, "lm_loss": 5.4854, "loss": 1.2469, "step": 1183, "text_contrastive_loss": 0.7997, "train_positive_log_prob": -81.377, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3425, "epoch": 2.672686230248307, "grad_norm": 13.186177253723145, "learning_rate": 4.626694062611387e-06, "lm_loss": 5.4388, "loss": 1.2543, "step": 1184, "text_contrastive_loss": 0.7358, "train_positive_log_prob": -82.0034, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3589, "epoch": 2.674943566591422, "grad_norm": 14.815667152404785, "learning_rate": 4.619459298039373e-06, "lm_loss": 5.4746, "loss": 1.2642, "step": 1185, "text_contrastive_loss": 0.7156, "train_positive_log_prob": -81.8436, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3478, "epoch": 2.6772009029345374, "grad_norm": 12.600274085998535, "learning_rate": 4.612225334748616e-06, "lm_loss": 5.4998, "loss": 1.221, "step": 1186, "text_contrastive_loss": 0.6465, "train_positive_log_prob": -84.3436, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.3724, "epoch": 2.6794582392776523, "grad_norm": 15.114175796508789, "learning_rate": 4.6049921879712254e-06, "lm_loss": 5.4519, "loss": 1.2851, "step": 1187, "text_contrastive_loss": 0.735, "train_positive_log_prob": -81.2476, "train_positive_token_accuracy": 0.0747, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4161, "epoch": 2.6817155756207676, "grad_norm": 13.305334091186523, "learning_rate": 4.597759872937597e-06, "lm_loss": 5.4548, "loss": 1.3299, "step": 1188, "text_contrastive_loss": 0.7367, "train_positive_log_prob": -79.6501, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4597, "epoch": 2.683972911963883, "grad_norm": 14.347017288208008, "learning_rate": 4.590528404876374e-06, "lm_loss": 5.4162, "loss": 1.434, "step": 1189, "text_contrastive_loss": 0.8654, "train_positive_log_prob": -80.7159, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4394, "epoch": 2.6862302483069977, "grad_norm": 13.696542739868164, "learning_rate": 4.5832977990144165e-06, "lm_loss": 5.5338, "loss": 1.4007, "step": 1190, "text_contrastive_loss": 0.8157, "train_positive_log_prob": -80.281, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3796, "epoch": 2.6884875846501126, "grad_norm": 13.836771965026855, "learning_rate": 4.5760680705767665e-06, "lm_loss": 5.4323, "loss": 1.2444, "step": 1191, "text_contrastive_loss": 0.643, "train_positive_log_prob": -80.3787, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3582, "epoch": 2.690744920993228, "grad_norm": 12.787728309631348, "learning_rate": 4.5688392347866226e-06, "lm_loss": 5.4675, "loss": 1.2694, "step": 1192, "text_contrastive_loss": 0.7288, "train_positive_log_prob": -81.4767, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3826, "epoch": 2.6930022573363432, "grad_norm": 15.042651176452637, "learning_rate": 4.561611306865299e-06, "lm_loss": 5.52, "loss": 1.3041, "step": 1193, "text_contrastive_loss": 0.739, "train_positive_log_prob": -82.8433, "train_positive_token_accuracy": 0.0836, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3628, "epoch": 2.695259593679458, "grad_norm": 14.1420259475708, "learning_rate": 4.554384302032204e-06, "lm_loss": 5.4638, "loss": 1.2849, "step": 1194, "text_contrastive_loss": 0.7515, "train_positive_log_prob": -80.3323, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4446, "epoch": 2.6975169300225734, "grad_norm": 13.776505470275879, "learning_rate": 4.547158235504797e-06, "lm_loss": 5.5475, "loss": 1.4387, "step": 1195, "text_contrastive_loss": 0.8788, "train_positive_log_prob": -82.4971, "train_positive_token_accuracy": 0.0817, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4056, "epoch": 2.6997742663656883, "grad_norm": 12.939220428466797, "learning_rate": 4.539933122498566e-06, "lm_loss": 5.4481, "loss": 1.4169, "step": 1196, "text_contrastive_loss": 0.9331, "train_positive_log_prob": -81.5348, "train_positive_token_accuracy": 0.0872, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.2591, "epoch": 2.7020316027088036, "grad_norm": 13.445302963256836, "learning_rate": 4.532708978226987e-06, "lm_loss": 5.4251, "loss": 1.1482, "step": 1197, "text_contrastive_loss": 0.6932, "train_positive_log_prob": -79.4235, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3294, "epoch": 2.704288939051919, "grad_norm": 13.859259605407715, "learning_rate": 4.525485817901499e-06, "lm_loss": 5.5595, "loss": 1.322, "step": 1198, "text_contrastive_loss": 0.8732, "train_positive_log_prob": -84.2796, "train_positive_token_accuracy": 0.083, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4215, "epoch": 2.706546275395034, "grad_norm": 13.112929344177246, "learning_rate": 4.518263656731468e-06, "lm_loss": 5.4648, "loss": 1.4274, "step": 1199, "text_contrastive_loss": 0.9189, "train_positive_log_prob": -81.3507, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.4598, "epoch": 2.708803611738149, "grad_norm": 14.781098365783691, "learning_rate": 4.511042509924157e-06, "lm_loss": 5.4864, "loss": 1.4536, "step": 1200, "text_contrastive_loss": 0.8903, "train_positive_log_prob": -81.4819, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.466, "epoch": 2.711060948081264, "grad_norm": 13.943631172180176, "learning_rate": 4.5038223926846905e-06, "lm_loss": 5.5542, "loss": 1.4778, "step": 1201, "text_contrastive_loss": 0.9129, "train_positive_log_prob": -82.2697, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4928, "epoch": 2.7133182844243793, "grad_norm": 16.02657699584961, "learning_rate": 4.49660332021603e-06, "lm_loss": 5.4441, "loss": 1.5473, "step": 1202, "text_contrastive_loss": 1.0202, "train_positive_log_prob": -78.9585, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.4403, "epoch": 2.7155756207674946, "grad_norm": 13.975430488586426, "learning_rate": 4.489385307718934e-06, "lm_loss": 5.5006, "loss": 1.3872, "step": 1203, "text_contrastive_loss": 0.7938, "train_positive_log_prob": -79.5866, "train_positive_token_accuracy": 0.0753, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3518, "epoch": 2.7178329571106095, "grad_norm": 12.17011833190918, "learning_rate": 4.482168370391931e-06, "lm_loss": 5.5338, "loss": 1.2789, "step": 1204, "text_contrastive_loss": 0.7475, "train_positive_log_prob": -82.0157, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.3903, "epoch": 2.7200902934537243, "grad_norm": 12.984262466430664, "learning_rate": 4.47495252343128e-06, "lm_loss": 5.4867, "loss": 1.3849, "step": 1205, "text_contrastive_loss": 0.8918, "train_positive_log_prob": -83.6725, "train_positive_token_accuracy": 0.074, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.3598, "epoch": 2.7223476297968396, "grad_norm": 12.920084953308105, "learning_rate": 4.467737782030951e-06, "lm_loss": 5.5757, "loss": 1.2959, "step": 1206, "text_contrastive_loss": 0.757, "train_positive_log_prob": -84.0959, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3264, "epoch": 2.724604966139955, "grad_norm": 11.541576385498047, "learning_rate": 4.460524161382582e-06, "lm_loss": 5.4051, "loss": 1.2119, "step": 1207, "text_contrastive_loss": 0.69, "train_positive_log_prob": -80.4468, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3539, "epoch": 2.72686230248307, "grad_norm": 11.823294639587402, "learning_rate": 4.453311676675453e-06, "lm_loss": 5.4722, "loss": 1.2801, "step": 1208, "text_contrastive_loss": 0.7579, "train_positive_log_prob": -82.7221, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4779, "epoch": 2.729119638826185, "grad_norm": 14.281606674194336, "learning_rate": 4.44610034309645e-06, "lm_loss": 5.4242, "loss": 1.4065, "step": 1209, "text_contrastive_loss": 0.7723, "train_positive_log_prob": -81.3001, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.302, "epoch": 2.7313769751693, "grad_norm": 12.948177337646484, "learning_rate": 4.438890175830039e-06, "lm_loss": 5.6239, "loss": 1.2313, "step": 1210, "text_contrastive_loss": 0.7339, "train_positive_log_prob": -83.7198, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4556, "epoch": 2.7336343115124153, "grad_norm": 14.67457389831543, "learning_rate": 4.431681190058224e-06, "lm_loss": 5.4522, "loss": 1.4931, "step": 1211, "text_contrastive_loss": 0.9847, "train_positive_log_prob": -80.8, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3598, "epoch": 2.7358916478555306, "grad_norm": 12.027202606201172, "learning_rate": 4.42447340096053e-06, "lm_loss": 5.4147, "loss": 1.2419, "step": 1212, "text_contrastive_loss": 0.6813, "train_positive_log_prob": -81.1546, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.4033, "epoch": 2.7381489841986455, "grad_norm": 14.120058059692383, "learning_rate": 4.417266823713953e-06, "lm_loss": 5.4045, "loss": 1.2572, "step": 1213, "text_contrastive_loss": 0.6269, "train_positive_log_prob": -79.2515, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.2904, "epoch": 2.740406320541761, "grad_norm": 12.804457664489746, "learning_rate": 4.410061473492943e-06, "lm_loss": 5.4582, "loss": 1.1259, "step": 1214, "text_contrastive_loss": 0.5793, "train_positive_log_prob": -81.4885, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3212, "epoch": 2.7426636568848757, "grad_norm": 13.253199577331543, "learning_rate": 4.402857365469364e-06, "lm_loss": 5.4853, "loss": 1.222, "step": 1215, "text_contrastive_loss": 0.7047, "train_positive_log_prob": -81.0128, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.323, "epoch": 2.744920993227991, "grad_norm": 11.764541625976562, "learning_rate": 4.3956545148124665e-06, "lm_loss": 5.4371, "loss": 1.2995, "step": 1216, "text_contrastive_loss": 0.8655, "train_positive_log_prob": -78.6893, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.4459, "epoch": 2.7471783295711063, "grad_norm": 15.989474296569824, "learning_rate": 4.38845293668885e-06, "lm_loss": 5.5859, "loss": 1.4857, "step": 1217, "text_contrastive_loss": 0.9625, "train_positive_log_prob": -82.6797, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.4463, "epoch": 2.749435665914221, "grad_norm": 13.160560607910156, "learning_rate": 4.381252646262437e-06, "lm_loss": 5.495, "loss": 1.4399, "step": 1218, "text_contrastive_loss": 0.8884, "train_positive_log_prob": -81.8069, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.458, "epoch": 2.7516930022573365, "grad_norm": 13.962233543395996, "learning_rate": 4.37405365869444e-06, "lm_loss": 5.536, "loss": 1.4393, "step": 1219, "text_contrastive_loss": 0.8555, "train_positive_log_prob": -82.6745, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3958, "epoch": 2.7539503386004514, "grad_norm": 12.87502384185791, "learning_rate": 4.366855989143326e-06, "lm_loss": 5.4709, "loss": 1.3712, "step": 1220, "text_contrastive_loss": 0.8567, "train_positive_log_prob": -79.346, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3836, "epoch": 2.7562076749435667, "grad_norm": 13.692797660827637, "learning_rate": 4.359659652764786e-06, "lm_loss": 5.503, "loss": 1.2807, "step": 1221, "text_contrastive_loss": 0.6935, "train_positive_log_prob": -83.8341, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.4108, "epoch": 2.758465011286682, "grad_norm": 13.382584571838379, "learning_rate": 4.352464664711706e-06, "lm_loss": 5.4594, "loss": 1.3804, "step": 1222, "text_contrastive_loss": 0.8473, "train_positive_log_prob": -80.6109, "train_positive_token_accuracy": 0.0839, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4057, "epoch": 2.760722347629797, "grad_norm": 13.29274845123291, "learning_rate": 4.345271040134129e-06, "lm_loss": 5.4844, "loss": 1.3814, "step": 1223, "text_contrastive_loss": 0.8546, "train_positive_log_prob": -80.6597, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.4529, "epoch": 2.7629796839729117, "grad_norm": 15.336840629577637, "learning_rate": 4.338078794179234e-06, "lm_loss": 5.5096, "loss": 1.3576, "step": 1224, "text_contrastive_loss": 0.7076, "train_positive_log_prob": -81.0948, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.5451, "epoch": 2.765237020316027, "grad_norm": 16.94712257385254, "learning_rate": 4.330887941991288e-06, "lm_loss": 5.5441, "loss": 1.481, "step": 1225, "text_contrastive_loss": 0.7628, "train_positive_log_prob": -83.5034, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.417, "epoch": 2.7674943566591423, "grad_norm": 12.747289657592773, "learning_rate": 4.323698498711634e-06, "lm_loss": 5.5802, "loss": 1.3896, "step": 1226, "text_contrastive_loss": 0.8292, "train_positive_log_prob": -78.9744, "train_positive_token_accuracy": 0.0687, "train_positive_token_prob": 0.028 }, { "contrastive_loss": 0.3553, "epoch": 2.769751693002257, "grad_norm": 12.221817016601562, "learning_rate": 4.316510479478636e-06, "lm_loss": 5.3802, "loss": 1.241, "step": 1227, "text_contrastive_loss": 0.6953, "train_positive_log_prob": -78.3718, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.43, "epoch": 2.7720090293453725, "grad_norm": 14.645344734191895, "learning_rate": 4.309323899427671e-06, "lm_loss": 5.373, "loss": 1.3379, "step": 1228, "text_contrastive_loss": 0.7412, "train_positive_log_prob": -78.6221, "train_positive_token_accuracy": 0.0841, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3726, "epoch": 2.7742663656884874, "grad_norm": 13.062455177307129, "learning_rate": 4.302138773691079e-06, "lm_loss": 5.4994, "loss": 1.2907, "step": 1229, "text_contrastive_loss": 0.7365, "train_positive_log_prob": -80.9441, "train_positive_token_accuracy": 0.0865, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.387, "epoch": 2.7765237020316027, "grad_norm": 14.095149040222168, "learning_rate": 4.294955117398139e-06, "lm_loss": 5.4908, "loss": 1.3432, "step": 1230, "text_contrastive_loss": 0.8143, "train_positive_log_prob": -78.8683, "train_positive_token_accuracy": 0.0713, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.3265, "epoch": 2.778781038374718, "grad_norm": 11.440180778503418, "learning_rate": 4.287772945675035e-06, "lm_loss": 5.5812, "loss": 1.255, "step": 1231, "text_contrastive_loss": 0.7408, "train_positive_log_prob": -83.3722, "train_positive_token_accuracy": 0.073, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.3287, "epoch": 2.781038374717833, "grad_norm": 14.378003120422363, "learning_rate": 4.280592273644829e-06, "lm_loss": 5.4748, "loss": 1.2209, "step": 1232, "text_contrastive_loss": 0.6894, "train_positive_log_prob": -81.1622, "train_positive_token_accuracy": 0.0854, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3838, "epoch": 2.783295711060948, "grad_norm": 12.829390525817871, "learning_rate": 4.273413116427419e-06, "lm_loss": 5.5321, "loss": 1.2876, "step": 1233, "text_contrastive_loss": 0.7011, "train_positive_log_prob": -81.3172, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.3836, "epoch": 2.785553047404063, "grad_norm": 14.463422775268555, "learning_rate": 4.26623548913952e-06, "lm_loss": 5.4773, "loss": 1.3214, "step": 1234, "text_contrastive_loss": 0.7801, "train_positive_log_prob": -80.2541, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4316, "epoch": 2.7878103837471784, "grad_norm": 13.590999603271484, "learning_rate": 4.259059406894619e-06, "lm_loss": 5.469, "loss": 1.394, "step": 1235, "text_contrastive_loss": 0.8311, "train_positive_log_prob": -82.0576, "train_positive_token_accuracy": 0.0752, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.471, "epoch": 2.7900677200902937, "grad_norm": 16.407123565673828, "learning_rate": 4.251884884802956e-06, "lm_loss": 5.3534, "loss": 1.3968, "step": 1236, "text_contrastive_loss": 0.781, "train_positive_log_prob": -77.7943, "train_positive_token_accuracy": 0.0836, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3975, "epoch": 2.7923250564334086, "grad_norm": 15.670740127563477, "learning_rate": 4.2447119379714805e-06, "lm_loss": 5.5612, "loss": 1.3721, "step": 1237, "text_contrastive_loss": 0.837, "train_positive_log_prob": -84.1368, "train_positive_token_accuracy": 0.0855, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3886, "epoch": 2.7945823927765234, "grad_norm": 13.319536209106445, "learning_rate": 4.237540581503831e-06, "lm_loss": 5.4816, "loss": 1.3145, "step": 1238, "text_contrastive_loss": 0.7554, "train_positive_log_prob": -81.7738, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3296, "epoch": 2.7968397291196387, "grad_norm": 12.3250093460083, "learning_rate": 4.23037083050029e-06, "lm_loss": 5.5549, "loss": 1.2445, "step": 1239, "text_contrastive_loss": 0.719, "train_positive_log_prob": -80.176, "train_positive_token_accuracy": 0.0733, "train_positive_token_prob": 0.0286 }, { "contrastive_loss": 0.3645, "epoch": 2.799097065462754, "grad_norm": 14.026390075683594, "learning_rate": 4.223202700057765e-06, "lm_loss": 5.495, "loss": 1.3381, "step": 1240, "text_contrastive_loss": 0.8483, "train_positive_log_prob": -82.6181, "train_positive_token_accuracy": 0.0725, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.3603, "epoch": 2.801354401805869, "grad_norm": 13.461612701416016, "learning_rate": 4.216036205269748e-06, "lm_loss": 5.4894, "loss": 1.3684, "step": 1241, "text_contrastive_loss": 0.9183, "train_positive_log_prob": -80.9055, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3864, "epoch": 2.8036117381489842, "grad_norm": 14.413870811462402, "learning_rate": 4.20887136122629e-06, "lm_loss": 5.4594, "loss": 1.3118, "step": 1242, "text_contrastive_loss": 0.7589, "train_positive_log_prob": -82.2732, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.528, "epoch": 2.805869074492099, "grad_norm": 14.763357162475586, "learning_rate": 4.201708183013963e-06, "lm_loss": 5.4852, "loss": 1.5696, "step": 1243, "text_contrastive_loss": 0.9862, "train_positive_log_prob": -80.1133, "train_positive_token_accuracy": 0.069, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.3172, "epoch": 2.8081264108352144, "grad_norm": 12.26828384399414, "learning_rate": 4.1945466857158336e-06, "lm_loss": 5.4729, "loss": 1.2154, "step": 1244, "text_contrastive_loss": 0.7019, "train_positive_log_prob": -81.5168, "train_positive_token_accuracy": 0.0831, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4179, "epoch": 2.8103837471783297, "grad_norm": 14.41314697265625, "learning_rate": 4.187386884411426e-06, "lm_loss": 5.5451, "loss": 1.3693, "step": 1245, "text_contrastive_loss": 0.7936, "train_positive_log_prob": -82.1774, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3806, "epoch": 2.8126410835214446, "grad_norm": 13.032148361206055, "learning_rate": 4.1802287941767e-06, "lm_loss": 5.4604, "loss": 1.393, "step": 1246, "text_contrastive_loss": 0.9328, "train_positive_log_prob": -83.8024, "train_positive_token_accuracy": 0.0687, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.3946, "epoch": 2.81489841986456, "grad_norm": 12.606998443603516, "learning_rate": 4.173072430084002e-06, "lm_loss": 5.5478, "loss": 1.4201, "step": 1247, "text_contrastive_loss": 0.9413, "train_positive_log_prob": -81.8941, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.3443, "epoch": 2.8171557562076748, "grad_norm": 13.290593147277832, "learning_rate": 4.165917807202055e-06, "lm_loss": 5.4816, "loss": 1.1863, "step": 1248, "text_contrastive_loss": 0.5876, "train_positive_log_prob": -82.2438, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3559, "epoch": 2.81941309255079, "grad_norm": 15.067920684814453, "learning_rate": 4.1587649405959065e-06, "lm_loss": 5.4577, "loss": 1.2862, "step": 1249, "text_contrastive_loss": 0.769, "train_positive_log_prob": -81.1565, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3956, "epoch": 2.8216704288939054, "grad_norm": 13.614821434020996, "learning_rate": 4.151613845326912e-06, "lm_loss": 5.5196, "loss": 1.3751, "step": 1250, "text_contrastive_loss": 0.855, "train_positive_log_prob": -81.5007, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.4083, "epoch": 2.8239277652370203, "grad_norm": 13.48894214630127, "learning_rate": 4.144464536452693e-06, "lm_loss": 5.5392, "loss": 1.3301, "step": 1251, "text_contrastive_loss": 0.7358, "train_positive_log_prob": -81.4915, "train_positive_token_accuracy": 0.0842, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3992, "epoch": 2.8261851015801356, "grad_norm": 14.535311698913574, "learning_rate": 4.137317029027111e-06, "lm_loss": 5.3489, "loss": 1.3523, "step": 1252, "text_contrastive_loss": 0.8362, "train_positive_log_prob": -78.2249, "train_positive_token_accuracy": 0.0871, "train_positive_token_prob": 0.0336 }, { "contrastive_loss": 0.4001, "epoch": 2.8284424379232505, "grad_norm": 14.125943183898926, "learning_rate": 4.1301713381002394e-06, "lm_loss": 5.3939, "loss": 1.3438, "step": 1253, "text_contrastive_loss": 0.8087, "train_positive_log_prob": -78.3838, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.4122, "epoch": 2.8306997742663658, "grad_norm": 13.226386070251465, "learning_rate": 4.123027478718318e-06, "lm_loss": 5.4704, "loss": 1.2586, "step": 1254, "text_contrastive_loss": 0.5988, "train_positive_log_prob": -81.179, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4602, "epoch": 2.832957110609481, "grad_norm": 15.592979431152344, "learning_rate": 4.115885465923734e-06, "lm_loss": 5.4381, "loss": 1.4722, "step": 1255, "text_contrastive_loss": 0.9363, "train_positive_log_prob": -79.7466, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4278, "epoch": 2.835214446952596, "grad_norm": 12.926896095275879, "learning_rate": 4.108745314754989e-06, "lm_loss": 5.4251, "loss": 1.3541, "step": 1256, "text_contrastive_loss": 0.7677, "train_positive_log_prob": -79.3661, "train_positive_token_accuracy": 0.0836, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3709, "epoch": 2.837471783295711, "grad_norm": 13.7692289352417, "learning_rate": 4.101607040246659e-06, "lm_loss": 5.4437, "loss": 1.3455, "step": 1257, "text_contrastive_loss": 0.8605, "train_positive_log_prob": -82.8454, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3342, "epoch": 2.839729119638826, "grad_norm": 12.793830871582031, "learning_rate": 4.094470657429374e-06, "lm_loss": 5.534, "loss": 1.3307, "step": 1258, "text_contrastive_loss": 0.8862, "train_positive_log_prob": -81.7586, "train_positive_token_accuracy": 0.0738, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.3849, "epoch": 2.8419864559819414, "grad_norm": 13.881046295166016, "learning_rate": 4.087336181329777e-06, "lm_loss": 5.522, "loss": 1.361, "step": 1259, "text_contrastive_loss": 0.8478, "train_positive_log_prob": -81.2762, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3566, "epoch": 2.8442437923250563, "grad_norm": 14.135019302368164, "learning_rate": 4.080203626970498e-06, "lm_loss": 5.496, "loss": 1.3519, "step": 1260, "text_contrastive_loss": 0.8914, "train_positive_log_prob": -79.7113, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3855, "epoch": 2.8465011286681716, "grad_norm": 12.44174575805664, "learning_rate": 4.0730730093701185e-06, "lm_loss": 5.4494, "loss": 1.3268, "step": 1261, "text_contrastive_loss": 0.7927, "train_positive_log_prob": -81.718, "train_positive_token_accuracy": 0.0836, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.385, "epoch": 2.8487584650112865, "grad_norm": 13.774590492248535, "learning_rate": 4.065944343543146e-06, "lm_loss": 5.5729, "loss": 1.324, "step": 1262, "text_contrastive_loss": 0.7635, "train_positive_log_prob": -84.2902, "train_positive_token_accuracy": 0.0728, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.4697, "epoch": 2.851015801354402, "grad_norm": 14.798707962036133, "learning_rate": 4.058817644499973e-06, "lm_loss": 5.5272, "loss": 1.4702, "step": 1263, "text_contrastive_loss": 0.8955, "train_positive_log_prob": -81.6971, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3595, "epoch": 2.853273137697517, "grad_norm": 13.562416076660156, "learning_rate": 4.051692927246857e-06, "lm_loss": 5.4418, "loss": 1.2281, "step": 1264, "text_contrastive_loss": 0.6488, "train_positive_log_prob": -80.7348, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3096, "epoch": 2.855530474040632, "grad_norm": 17.661754608154297, "learning_rate": 4.044570206785874e-06, "lm_loss": 5.4619, "loss": 1.2246, "step": 1265, "text_contrastive_loss": 0.7378, "train_positive_log_prob": -81.3721, "train_positive_token_accuracy": 0.0847, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3508, "epoch": 2.8577878103837473, "grad_norm": 11.998366355895996, "learning_rate": 4.037449498114903e-06, "lm_loss": 5.5167, "loss": 1.2914, "step": 1266, "text_contrastive_loss": 0.7778, "train_positive_log_prob": -80.4452, "train_positive_token_accuracy": 0.0841, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.4118, "epoch": 2.860045146726862, "grad_norm": 13.16608715057373, "learning_rate": 4.0303308162275835e-06, "lm_loss": 5.5101, "loss": 1.2936, "step": 1267, "text_contrastive_loss": 0.6617, "train_positive_log_prob": -83.7504, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3707, "epoch": 2.8623024830699775, "grad_norm": 13.012649536132812, "learning_rate": 4.0232141761132894e-06, "lm_loss": 5.3819, "loss": 1.3016, "step": 1268, "text_contrastive_loss": 0.7854, "train_positive_log_prob": -78.8567, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.448, "epoch": 2.864559819413093, "grad_norm": 13.595703125, "learning_rate": 4.016099592757091e-06, "lm_loss": 5.4091, "loss": 1.356, "step": 1269, "text_contrastive_loss": 0.7342, "train_positive_log_prob": -80.2895, "train_positive_token_accuracy": 0.0861, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.403, "epoch": 2.8668171557562077, "grad_norm": 14.504570960998535, "learning_rate": 4.008987081139734e-06, "lm_loss": 5.5506, "loss": 1.2738, "step": 1270, "text_contrastive_loss": 0.6315, "train_positive_log_prob": -84.0404, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4179, "epoch": 2.8690744920993225, "grad_norm": 13.538684844970703, "learning_rate": 4.0018766562375984e-06, "lm_loss": 5.4326, "loss": 1.3779, "step": 1271, "text_contrastive_loss": 0.8334, "train_positive_log_prob": -79.6862, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3203, "epoch": 2.871331828442438, "grad_norm": 12.093225479125977, "learning_rate": 3.994768333022669e-06, "lm_loss": 5.4881, "loss": 1.2175, "step": 1272, "text_contrastive_loss": 0.6968, "train_positive_log_prob": -82.7274, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.4798, "epoch": 2.873589164785553, "grad_norm": 13.538874626159668, "learning_rate": 3.987662126462507e-06, "lm_loss": 5.5, "loss": 1.4636, "step": 1273, "text_contrastive_loss": 0.8675, "train_positive_log_prob": -82.9512, "train_positive_token_accuracy": 0.0899, "train_positive_token_prob": 0.0329 }, { "contrastive_loss": 0.3468, "epoch": 2.875846501128668, "grad_norm": 13.044873237609863, "learning_rate": 3.980558051520218e-06, "lm_loss": 5.4294, "loss": 1.2759, "step": 1274, "text_contrastive_loss": 0.7723, "train_positive_log_prob": -80.0195, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.4188, "epoch": 2.8781038374717833, "grad_norm": 13.54685115814209, "learning_rate": 3.973456123154415e-06, "lm_loss": 5.5064, "loss": 1.3055, "step": 1275, "text_contrastive_loss": 0.6722, "train_positive_log_prob": -80.8951, "train_positive_token_accuracy": 0.0739, "train_positive_token_prob": 0.0291 }, { "contrastive_loss": 0.3674, "epoch": 2.880361173814898, "grad_norm": 12.428534507751465, "learning_rate": 3.966356356319196e-06, "lm_loss": 5.4792, "loss": 1.3216, "step": 1276, "text_contrastive_loss": 0.8125, "train_positive_log_prob": -80.7279, "train_positive_token_accuracy": 0.0901, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3046, "epoch": 2.8826185101580135, "grad_norm": 12.955340385437012, "learning_rate": 3.959258765964104e-06, "lm_loss": 5.424, "loss": 1.2631, "step": 1277, "text_contrastive_loss": 0.8323, "train_positive_log_prob": -80.3458, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3902, "epoch": 2.884875846501129, "grad_norm": 13.668217658996582, "learning_rate": 3.9521633670341005e-06, "lm_loss": 5.3886, "loss": 1.3276, "step": 1278, "text_contrastive_loss": 0.7971, "train_positive_log_prob": -78.66, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.425, "epoch": 2.8871331828442437, "grad_norm": 13.321612358093262, "learning_rate": 3.9450701744695325e-06, "lm_loss": 5.4796, "loss": 1.3674, "step": 1279, "text_contrastive_loss": 0.7889, "train_positive_log_prob": -80.8224, "train_positive_token_accuracy": 0.0694, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.3522, "epoch": 2.889390519187359, "grad_norm": 13.01663589477539, "learning_rate": 3.937979203206103e-06, "lm_loss": 5.4883, "loss": 1.282, "step": 1280, "text_contrastive_loss": 0.7619, "train_positive_log_prob": -83.9133, "train_positive_token_accuracy": 0.0818, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3696, "epoch": 2.891647855530474, "grad_norm": 13.588577270507812, "learning_rate": 3.930890468174833e-06, "lm_loss": 5.4038, "loss": 1.3386, "step": 1281, "text_contrastive_loss": 0.8573, "train_positive_log_prob": -79.368, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3557, "epoch": 2.893905191873589, "grad_norm": 13.21587085723877, "learning_rate": 3.92380398430204e-06, "lm_loss": 5.4722, "loss": 1.2746, "step": 1282, "text_contrastive_loss": 0.7435, "train_positive_log_prob": -82.4255, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.537, "epoch": 2.8961625282167045, "grad_norm": 19.4005069732666, "learning_rate": 3.916719766509297e-06, "lm_loss": 5.4177, "loss": 1.5095, "step": 1283, "text_contrastive_loss": 0.8615, "train_positive_log_prob": -81.1866, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4378, "epoch": 2.8984198645598194, "grad_norm": 13.29963207244873, "learning_rate": 3.9096378297134115e-06, "lm_loss": 5.4622, "loss": 1.3843, "step": 1284, "text_contrastive_loss": 0.8007, "train_positive_log_prob": -82.2274, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4134, "epoch": 2.9006772009029347, "grad_norm": 14.635613441467285, "learning_rate": 3.90255818882638e-06, "lm_loss": 5.5322, "loss": 1.3805, "step": 1285, "text_contrastive_loss": 0.8276, "train_positive_log_prob": -82.7852, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3661, "epoch": 2.9029345372460496, "grad_norm": 13.061617851257324, "learning_rate": 3.89548085875537e-06, "lm_loss": 5.5022, "loss": 1.323, "step": 1286, "text_contrastive_loss": 0.8134, "train_positive_log_prob": -81.9035, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3357, "epoch": 2.905191873589165, "grad_norm": 12.666919708251953, "learning_rate": 3.888405854402684e-06, "lm_loss": 5.4253, "loss": 1.3267, "step": 1287, "text_contrastive_loss": 0.8969, "train_positive_log_prob": -80.2976, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3475, "epoch": 2.90744920993228, "grad_norm": 13.228978157043457, "learning_rate": 3.881333190665723e-06, "lm_loss": 5.3635, "loss": 1.2762, "step": 1288, "text_contrastive_loss": 0.7846, "train_positive_log_prob": -78.2085, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4708, "epoch": 2.909706546275395, "grad_norm": 14.680399894714355, "learning_rate": 3.8742628824369624e-06, "lm_loss": 5.4786, "loss": 1.4769, "step": 1289, "text_contrastive_loss": 0.9165, "train_positive_log_prob": -80.2097, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.3534, "epoch": 2.91196388261851, "grad_norm": 13.228862762451172, "learning_rate": 3.86719494460392e-06, "lm_loss": 5.5715, "loss": 1.3555, "step": 1290, "text_contrastive_loss": 0.89, "train_positive_log_prob": -85.337, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4133, "epoch": 2.9142212189616252, "grad_norm": 13.871740341186523, "learning_rate": 3.8601293920491165e-06, "lm_loss": 5.5267, "loss": 1.4243, "step": 1291, "text_contrastive_loss": 0.9166, "train_positive_log_prob": -81.6705, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.4165, "epoch": 2.9164785553047405, "grad_norm": 15.968809127807617, "learning_rate": 3.853066239650055e-06, "lm_loss": 5.4784, "loss": 1.3839, "step": 1292, "text_contrastive_loss": 0.839, "train_positive_log_prob": -79.4615, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.4197, "epoch": 2.9187358916478554, "grad_norm": 12.649006843566895, "learning_rate": 3.846005502279182e-06, "lm_loss": 5.4442, "loss": 1.3339, "step": 1293, "text_contrastive_loss": 0.7395, "train_positive_log_prob": -79.8937, "train_positive_token_accuracy": 0.0692, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.411, "epoch": 2.9209932279909707, "grad_norm": 14.790594100952148, "learning_rate": 3.83894719480386e-06, "lm_loss": 5.406, "loss": 1.4348, "step": 1294, "text_contrastive_loss": 0.9665, "train_positive_log_prob": -79.2114, "train_positive_token_accuracy": 0.0845, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3828, "epoch": 2.9232505643340856, "grad_norm": 14.072667121887207, "learning_rate": 3.8318913320863355e-06, "lm_loss": 5.4848, "loss": 1.3355, "step": 1295, "text_contrastive_loss": 0.8084, "train_positive_log_prob": -80.8594, "train_positive_token_accuracy": 0.0843, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4175, "epoch": 2.925507900677201, "grad_norm": 13.898487091064453, "learning_rate": 3.8248379289837065e-06, "lm_loss": 5.3468, "loss": 1.3991, "step": 1296, "text_contrastive_loss": 0.8939, "train_positive_log_prob": -79.5724, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.4609, "epoch": 2.927765237020316, "grad_norm": 13.965980529785156, "learning_rate": 3.81778700034789e-06, "lm_loss": 5.4237, "loss": 1.4498, "step": 1297, "text_contrastive_loss": 0.8931, "train_positive_log_prob": -81.0562, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3697, "epoch": 2.930022573363431, "grad_norm": 12.408913612365723, "learning_rate": 3.810738561025599e-06, "lm_loss": 5.4696, "loss": 1.2408, "step": 1298, "text_contrastive_loss": 0.6483, "train_positive_log_prob": -81.6389, "train_positive_token_accuracy": 0.0817, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.4593, "epoch": 2.9322799097065464, "grad_norm": 15.56299877166748, "learning_rate": 3.803692625858295e-06, "lm_loss": 5.4225, "loss": 1.4455, "step": 1299, "text_contrastive_loss": 0.8881, "train_positive_log_prob": -79.0296, "train_positive_token_accuracy": 0.0729, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3879, "epoch": 2.9345372460496613, "grad_norm": 13.634164810180664, "learning_rate": 3.7966492096821773e-06, "lm_loss": 5.3584, "loss": 1.3445, "step": 1300, "text_contrastive_loss": 0.8417, "train_positive_log_prob": -79.0793, "train_positive_token_accuracy": 0.0866, "train_positive_token_prob": 0.0328 }, { "contrastive_loss": 0.3294, "epoch": 2.9367945823927766, "grad_norm": 12.517492294311523, "learning_rate": 3.7896083273281324e-06, "lm_loss": 5.5431, "loss": 1.2618, "step": 1301, "text_contrastive_loss": 0.7561, "train_positive_log_prob": -83.3422, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3643, "epoch": 2.939051918735892, "grad_norm": 12.977726936340332, "learning_rate": 3.7825699936217183e-06, "lm_loss": 5.3936, "loss": 1.2703, "step": 1302, "text_contrastive_loss": 0.7331, "train_positive_log_prob": -81.6006, "train_positive_token_accuracy": 0.0824, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.472, "epoch": 2.9413092550790068, "grad_norm": 13.751511573791504, "learning_rate": 3.7755342233831188e-06, "lm_loss": 5.3611, "loss": 1.4255, "step": 1303, "text_contrastive_loss": 0.8348, "train_positive_log_prob": -76.8571, "train_positive_token_accuracy": 0.0817, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.4073, "epoch": 2.9435665914221216, "grad_norm": 12.415215492248535, "learning_rate": 3.7685010314271287e-06, "lm_loss": 5.4899, "loss": 1.4332, "step": 1304, "text_contrastive_loss": 0.9538, "train_positive_log_prob": -80.5901, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3922, "epoch": 2.945823927765237, "grad_norm": 13.121039390563965, "learning_rate": 3.761470432563109e-06, "lm_loss": 5.5034, "loss": 1.3051, "step": 1305, "text_contrastive_loss": 0.7251, "train_positive_log_prob": -79.223, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4288, "epoch": 2.9480812641083523, "grad_norm": 15.263547897338867, "learning_rate": 3.75444244159496e-06, "lm_loss": 5.5268, "loss": 1.3455, "step": 1306, "text_contrastive_loss": 0.7279, "train_positive_log_prob": -82.5366, "train_positive_token_accuracy": 0.0735, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.4629, "epoch": 2.950338600451467, "grad_norm": 14.952875137329102, "learning_rate": 3.747417073321092e-06, "lm_loss": 5.4919, "loss": 1.3997, "step": 1307, "text_contrastive_loss": 0.7752, "train_positive_log_prob": -82.9756, "train_positive_token_accuracy": 0.0871, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3381, "epoch": 2.9525959367945824, "grad_norm": 11.371570587158203, "learning_rate": 3.740394342534394e-06, "lm_loss": 5.4794, "loss": 1.2245, "step": 1308, "text_contrastive_loss": 0.6769, "train_positive_log_prob": -80.9922, "train_positive_token_accuracy": 0.0859, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.4304, "epoch": 2.9548532731376973, "grad_norm": 13.57474422454834, "learning_rate": 3.7333742640221994e-06, "lm_loss": 5.4681, "loss": 1.3594, "step": 1309, "text_contrastive_loss": 0.7643, "train_positive_log_prob": -79.3054, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4388, "epoch": 2.9571106094808126, "grad_norm": 13.896613121032715, "learning_rate": 3.7263568525662574e-06, "lm_loss": 5.4731, "loss": 1.4319, "step": 1310, "text_contrastive_loss": 0.8916, "train_positive_log_prob": -80.9411, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3984, "epoch": 2.959367945823928, "grad_norm": 13.689347267150879, "learning_rate": 3.7193421229427017e-06, "lm_loss": 5.4342, "loss": 1.3539, "step": 1311, "text_contrastive_loss": 0.8243, "train_positive_log_prob": -80.1957, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4104, "epoch": 2.961625282167043, "grad_norm": 12.509286880493164, "learning_rate": 3.7123300899220193e-06, "lm_loss": 5.4652, "loss": 1.4173, "step": 1312, "text_contrastive_loss": 0.9208, "train_positive_log_prob": -80.463, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3278, "epoch": 2.963882618510158, "grad_norm": 13.086407661437988, "learning_rate": 3.7053207682690184e-06, "lm_loss": 5.4826, "loss": 1.248, "step": 1313, "text_contrastive_loss": 0.7438, "train_positive_log_prob": -80.9728, "train_positive_token_accuracy": 0.0733, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3843, "epoch": 2.966139954853273, "grad_norm": 13.62429428100586, "learning_rate": 3.698314172742799e-06, "lm_loss": 5.5315, "loss": 1.3238, "step": 1314, "text_contrastive_loss": 0.7727, "train_positive_log_prob": -83.8359, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3883, "epoch": 2.9683972911963883, "grad_norm": 13.54436206817627, "learning_rate": 3.691310318096719e-06, "lm_loss": 5.4706, "loss": 1.3076, "step": 1315, "text_contrastive_loss": 0.7445, "train_positive_log_prob": -81.0807, "train_positive_token_accuracy": 0.0793, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.4113, "epoch": 2.9706546275395036, "grad_norm": 14.356799125671387, "learning_rate": 3.684309219078368e-06, "lm_loss": 5.4237, "loss": 1.3562, "step": 1316, "text_contrastive_loss": 0.805, "train_positive_log_prob": -79.9827, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3692, "epoch": 2.9729119638826185, "grad_norm": 12.790327072143555, "learning_rate": 3.6773108904295294e-06, "lm_loss": 5.5083, "loss": 1.3468, "step": 1317, "text_contrastive_loss": 0.8535, "train_positive_log_prob": -81.4411, "train_positive_token_accuracy": 0.0753, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4919, "epoch": 2.975169300225734, "grad_norm": 15.638786315917969, "learning_rate": 3.6703153468861585e-06, "lm_loss": 5.4922, "loss": 1.4634, "step": 1318, "text_contrastive_loss": 0.8445, "train_positive_log_prob": -80.9253, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3833, "epoch": 2.9774266365688487, "grad_norm": 13.907543182373047, "learning_rate": 3.663322603178339e-06, "lm_loss": 5.4327, "loss": 1.3383, "step": 1319, "text_contrastive_loss": 0.8236, "train_positive_log_prob": -79.5552, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.4456, "epoch": 2.979683972911964, "grad_norm": 15.450085639953613, "learning_rate": 3.6563326740302664e-06, "lm_loss": 5.3582, "loss": 1.376, "step": 1320, "text_contrastive_loss": 0.789, "train_positive_log_prob": -78.1743, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.5325, "epoch": 2.9819413092550793, "grad_norm": 15.420104026794434, "learning_rate": 3.6493455741602035e-06, "lm_loss": 5.3336, "loss": 1.4703, "step": 1321, "text_contrastive_loss": 0.8088, "train_positive_log_prob": -76.8342, "train_positive_token_accuracy": 0.07, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3127, "epoch": 2.984198645598194, "grad_norm": 12.451042175292969, "learning_rate": 3.642361318280461e-06, "lm_loss": 5.3186, "loss": 1.1322, "step": 1322, "text_contrastive_loss": 0.5753, "train_positive_log_prob": -77.7446, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3505, "epoch": 2.986455981941309, "grad_norm": 14.17666244506836, "learning_rate": 3.635379921097359e-06, "lm_loss": 5.5185, "loss": 1.3039, "step": 1323, "text_contrastive_loss": 0.803, "train_positive_log_prob": -82.8006, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.429, "epoch": 2.9887133182844243, "grad_norm": 14.575055122375488, "learning_rate": 3.6284013973111962e-06, "lm_loss": 5.5456, "loss": 1.4523, "step": 1324, "text_contrastive_loss": 0.9376, "train_positive_log_prob": -84.5287, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3925, "epoch": 2.9909706546275396, "grad_norm": 13.344061851501465, "learning_rate": 3.621425761616224e-06, "lm_loss": 5.4794, "loss": 1.4149, "step": 1325, "text_contrastive_loss": 0.949, "train_positive_log_prob": -79.8587, "train_positive_token_accuracy": 0.0793, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.3149, "epoch": 2.9932279909706545, "grad_norm": 12.432286262512207, "learning_rate": 3.614453028700613e-06, "lm_loss": 5.3891, "loss": 1.2165, "step": 1326, "text_contrastive_loss": 0.7255, "train_positive_log_prob": -81.1121, "train_positive_token_accuracy": 0.0833, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.4244, "epoch": 2.99548532731377, "grad_norm": 15.432085037231445, "learning_rate": 3.6074832132464165e-06, "lm_loss": 5.51, "loss": 1.403, "step": 1327, "text_contrastive_loss": 0.8552, "train_positive_log_prob": -80.9323, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.4511, "epoch": 2.9977426636568847, "grad_norm": 13.709569931030273, "learning_rate": 3.600516329929551e-06, "lm_loss": 5.5615, "loss": 1.5017, "step": 1328, "text_contrastive_loss": 0.9889, "train_positive_log_prob": -82.1145, "train_positive_token_accuracy": 0.0833, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.2199, "epoch": 3.0, "grad_norm": 15.74201488494873, "learning_rate": 3.5935523934197537e-06, "lm_loss": 5.72, "loss": 1.1429, "step": 1329, "text_contrastive_loss": 0.7019, "train_positive_log_prob": -85.5839, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.3658, "epoch": 3.0022573363431153, "grad_norm": 11.930546760559082, "learning_rate": 3.5865914183805606e-06, "lm_loss": 5.4776, "loss": 1.28, "step": 1330, "text_contrastive_loss": 0.7328, "train_positive_log_prob": -80.9713, "train_positive_token_accuracy": 0.0832, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3738, "epoch": 3.00451467268623, "grad_norm": 13.340158462524414, "learning_rate": 3.5796334194692704e-06, "lm_loss": 5.5184, "loss": 1.3609, "step": 1331, "text_contrastive_loss": 0.8704, "train_positive_log_prob": -80.6185, "train_positive_token_accuracy": 0.0824, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3602, "epoch": 3.0067720090293455, "grad_norm": 13.094389915466309, "learning_rate": 3.572678411336916e-06, "lm_loss": 5.5166, "loss": 1.3185, "step": 1332, "text_contrastive_loss": 0.8133, "train_positive_log_prob": -83.9795, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3618, "epoch": 3.0090293453724604, "grad_norm": 11.701364517211914, "learning_rate": 3.5657264086282317e-06, "lm_loss": 5.5034, "loss": 1.3167, "step": 1333, "text_contrastive_loss": 0.8092, "train_positive_log_prob": -80.2494, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3653, "epoch": 3.0112866817155757, "grad_norm": 12.613174438476562, "learning_rate": 3.5587774259816234e-06, "lm_loss": 5.5188, "loss": 1.2239, "step": 1334, "text_contrastive_loss": 0.6134, "train_positive_log_prob": -83.8572, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4612, "epoch": 3.0135440180586905, "grad_norm": 13.1723051071167, "learning_rate": 3.5518314780291384e-06, "lm_loss": 5.3698, "loss": 1.4429, "step": 1335, "text_contrastive_loss": 0.8895, "train_positive_log_prob": -77.969, "train_positive_token_accuracy": 0.0853, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3863, "epoch": 3.015801354401806, "grad_norm": 14.11274528503418, "learning_rate": 3.544888579396435e-06, "lm_loss": 5.5404, "loss": 1.4009, "step": 1336, "text_contrastive_loss": 0.9211, "train_positive_log_prob": -82.5646, "train_positive_token_accuracy": 0.0818, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.4541, "epoch": 3.018058690744921, "grad_norm": 13.539448738098145, "learning_rate": 3.5379487447027483e-06, "lm_loss": 5.4008, "loss": 1.3906, "step": 1337, "text_contrastive_loss": 0.7927, "train_positive_log_prob": -79.4109, "train_positive_token_accuracy": 0.084, "train_positive_token_prob": 0.0328 }, { "contrastive_loss": 0.3204, "epoch": 3.020316027088036, "grad_norm": 13.184925079345703, "learning_rate": 3.5310119885608625e-06, "lm_loss": 5.4394, "loss": 1.201, "step": 1338, "text_contrastive_loss": 0.6732, "train_positive_log_prob": -81.7632, "train_positive_token_accuracy": 0.0845, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3226, "epoch": 3.0225733634311513, "grad_norm": 12.389911651611328, "learning_rate": 3.524078325577084e-06, "lm_loss": 5.4582, "loss": 1.2632, "step": 1339, "text_contrastive_loss": 0.7895, "train_positive_log_prob": -79.844, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4028, "epoch": 3.024830699774266, "grad_norm": 12.883729934692383, "learning_rate": 3.517147770351199e-06, "lm_loss": 5.5024, "loss": 1.2784, "step": 1340, "text_contrastive_loss": 0.6506, "train_positive_log_prob": -83.6307, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3951, "epoch": 3.0270880361173815, "grad_norm": 13.757779121398926, "learning_rate": 3.5102203374764555e-06, "lm_loss": 5.4794, "loss": 1.3501, "step": 1341, "text_contrastive_loss": 0.8143, "train_positive_log_prob": -81.5234, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.3902, "epoch": 3.0293453724604964, "grad_norm": 13.152948379516602, "learning_rate": 3.503296041539522e-06, "lm_loss": 5.463, "loss": 1.3479, "step": 1342, "text_contrastive_loss": 0.8229, "train_positive_log_prob": -79.1949, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3784, "epoch": 3.0316027088036117, "grad_norm": 14.146117210388184, "learning_rate": 3.496374897120467e-06, "lm_loss": 5.3706, "loss": 1.3808, "step": 1343, "text_contrastive_loss": 0.9308, "train_positive_log_prob": -79.1308, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.339, "epoch": 3.033860045146727, "grad_norm": 13.40596866607666, "learning_rate": 3.4894569187927204e-06, "lm_loss": 5.4452, "loss": 1.2462, "step": 1344, "text_contrastive_loss": 0.7253, "train_positive_log_prob": -81.1677, "train_positive_token_accuracy": 0.0747, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3532, "epoch": 3.036117381489842, "grad_norm": 11.866280555725098, "learning_rate": 3.4825421211230437e-06, "lm_loss": 5.592, "loss": 1.2654, "step": 1345, "text_contrastive_loss": 0.706, "train_positive_log_prob": -81.9996, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3521, "epoch": 3.038374717832957, "grad_norm": 13.236581802368164, "learning_rate": 3.4756305186715046e-06, "lm_loss": 5.6645, "loss": 1.3594, "step": 1346, "text_contrastive_loss": 0.8817, "train_positive_log_prob": -82.9983, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.3935, "epoch": 3.040632054176072, "grad_norm": 13.178022384643555, "learning_rate": 3.4687221259914394e-06, "lm_loss": 5.3523, "loss": 1.2888, "step": 1347, "text_contrastive_loss": 0.72, "train_positive_log_prob": -79.9937, "train_positive_token_accuracy": 0.087, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.351, "epoch": 3.0428893905191874, "grad_norm": 12.475351333618164, "learning_rate": 3.461816957629429e-06, "lm_loss": 5.4678, "loss": 1.2952, "step": 1348, "text_contrastive_loss": 0.7947, "train_positive_log_prob": -80.0998, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3867, "epoch": 3.0451467268623027, "grad_norm": 13.979798316955566, "learning_rate": 3.4549150281252635e-06, "lm_loss": 5.4229, "loss": 1.2811, "step": 1349, "text_contrastive_loss": 0.7044, "train_positive_log_prob": -82.2965, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.4382, "epoch": 3.0474040632054176, "grad_norm": 14.120234489440918, "learning_rate": 3.448016352011914e-06, "lm_loss": 5.4526, "loss": 1.3429, "step": 1350, "text_contrastive_loss": 0.7189, "train_positive_log_prob": -82.156, "train_positive_token_accuracy": 0.0812, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3654, "epoch": 3.049661399548533, "grad_norm": 13.496257781982422, "learning_rate": 3.441120943815497e-06, "lm_loss": 5.519, "loss": 1.2871, "step": 1351, "text_contrastive_loss": 0.7395, "train_positive_log_prob": -81.361, "train_positive_token_accuracy": 0.0782, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3403, "epoch": 3.0519187358916477, "grad_norm": 12.333660125732422, "learning_rate": 3.4342288180552556e-06, "lm_loss": 5.473, "loss": 1.2689, "step": 1352, "text_contrastive_loss": 0.7626, "train_positive_log_prob": -82.3552, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3127, "epoch": 3.054176072234763, "grad_norm": 13.125408172607422, "learning_rate": 3.427339989243514e-06, "lm_loss": 5.5455, "loss": 1.2053, "step": 1353, "text_contrastive_loss": 0.6761, "train_positive_log_prob": -82.2894, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3042, "epoch": 3.056433408577878, "grad_norm": 12.157618522644043, "learning_rate": 3.420454471885659e-06, "lm_loss": 5.4026, "loss": 1.1991, "step": 1354, "text_contrastive_loss": 0.7093, "train_positive_log_prob": -80.3646, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3313, "epoch": 3.0586907449209932, "grad_norm": 12.843050956726074, "learning_rate": 3.4135722804801004e-06, "lm_loss": 5.5239, "loss": 1.3181, "step": 1355, "text_contrastive_loss": 0.8687, "train_positive_log_prob": -83.3554, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4004, "epoch": 3.0609480812641086, "grad_norm": 13.376944541931152, "learning_rate": 3.4066934295182496e-06, "lm_loss": 5.486, "loss": 1.3579, "step": 1356, "text_contrastive_loss": 0.8178, "train_positive_log_prob": -80.9617, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.2915, "epoch": 3.0632054176072234, "grad_norm": 11.282021522521973, "learning_rate": 3.3998179334844823e-06, "lm_loss": 5.434, "loss": 1.1963, "step": 1357, "text_contrastive_loss": 0.7228, "train_positive_log_prob": -79.7677, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3228, "epoch": 3.0654627539503387, "grad_norm": 12.650505065917969, "learning_rate": 3.3929458068561073e-06, "lm_loss": 5.4454, "loss": 1.2475, "step": 1358, "text_contrastive_loss": 0.7602, "train_positive_log_prob": -80.2805, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.2636, "epoch": 3.0677200902934536, "grad_norm": 9.988433837890625, "learning_rate": 3.3860770641033417e-06, "lm_loss": 5.4381, "loss": 1.1262, "step": 1359, "text_contrastive_loss": 0.6376, "train_positive_log_prob": -80.5692, "train_positive_token_accuracy": 0.0838, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.3212, "epoch": 3.069977426636569, "grad_norm": 11.848210334777832, "learning_rate": 3.379211719689278e-06, "lm_loss": 5.594, "loss": 1.2387, "step": 1360, "text_contrastive_loss": 0.7162, "train_positive_log_prob": -82.9027, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3626, "epoch": 3.072234762979684, "grad_norm": 10.95758056640625, "learning_rate": 3.37234978806985e-06, "lm_loss": 5.3868, "loss": 1.2913, "step": 1361, "text_contrastive_loss": 0.7801, "train_positive_log_prob": -79.472, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3916, "epoch": 3.074492099322799, "grad_norm": 13.328849792480469, "learning_rate": 3.365491283693807e-06, "lm_loss": 5.3945, "loss": 1.3312, "step": 1362, "text_contrastive_loss": 0.8004, "train_positive_log_prob": -79.738, "train_positive_token_accuracy": 0.07, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3169, "epoch": 3.0767494356659144, "grad_norm": 13.587141036987305, "learning_rate": 3.358636221002682e-06, "lm_loss": 5.6589, "loss": 1.2384, "step": 1363, "text_contrastive_loss": 0.7112, "train_positive_log_prob": -82.4115, "train_positive_token_accuracy": 0.0716, "train_positive_token_prob": 0.0282 }, { "contrastive_loss": 0.3051, "epoch": 3.0790067720090293, "grad_norm": 12.500545501708984, "learning_rate": 3.351784614430761e-06, "lm_loss": 5.5049, "loss": 1.1483, "step": 1364, "text_contrastive_loss": 0.5855, "train_positive_log_prob": -81.1327, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3559, "epoch": 3.0812641083521446, "grad_norm": 12.103687286376953, "learning_rate": 3.3449364784050515e-06, "lm_loss": 5.3643, "loss": 1.3086, "step": 1365, "text_contrastive_loss": 0.8326, "train_positive_log_prob": -77.4453, "train_positive_token_accuracy": 0.085, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3158, "epoch": 3.0835214446952595, "grad_norm": 11.751809120178223, "learning_rate": 3.3380918273452557e-06, "lm_loss": 5.5469, "loss": 1.2403, "step": 1366, "text_contrastive_loss": 0.7396, "train_positive_log_prob": -80.1322, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3473, "epoch": 3.0857787810383748, "grad_norm": 12.667675018310547, "learning_rate": 3.3312506756637343e-06, "lm_loss": 5.4027, "loss": 1.2416, "step": 1367, "text_contrastive_loss": 0.708, "train_positive_log_prob": -79.3535, "train_positive_token_accuracy": 0.0856, "train_positive_token_prob": 0.0336 }, { "contrastive_loss": 0.3494, "epoch": 3.0880361173814896, "grad_norm": 12.610095977783203, "learning_rate": 3.324413037765483e-06, "lm_loss": 5.4826, "loss": 1.2818, "step": 1368, "text_contrastive_loss": 0.7682, "train_positive_log_prob": -81.7262, "train_positive_token_accuracy": 0.0731, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3474, "epoch": 3.090293453724605, "grad_norm": 12.490227699279785, "learning_rate": 3.317578928048096e-06, "lm_loss": 5.4635, "loss": 1.2691, "step": 1369, "text_contrastive_loss": 0.7506, "train_positive_log_prob": -79.4187, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3186, "epoch": 3.0925507900677203, "grad_norm": 11.91494083404541, "learning_rate": 3.310748360901741e-06, "lm_loss": 5.4459, "loss": 1.2483, "step": 1370, "text_contrastive_loss": 0.7703, "train_positive_log_prob": -82.114, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.2689, "epoch": 3.094808126410835, "grad_norm": 11.223448753356934, "learning_rate": 3.303921350709124e-06, "lm_loss": 5.4636, "loss": 1.1267, "step": 1371, "text_contrastive_loss": 0.623, "train_positive_log_prob": -81.017, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3487, "epoch": 3.0970654627539504, "grad_norm": 13.478902816772461, "learning_rate": 3.2970979118454616e-06, "lm_loss": 5.3685, "loss": 1.2357, "step": 1372, "text_contrastive_loss": 0.7001, "train_positive_log_prob": -77.8859, "train_positive_token_accuracy": 0.073, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3186, "epoch": 3.0993227990970653, "grad_norm": 12.863323211669922, "learning_rate": 3.2902780586784542e-06, "lm_loss": 5.5203, "loss": 1.2499, "step": 1373, "text_contrastive_loss": 0.7586, "train_positive_log_prob": -83.4758, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.4542, "epoch": 3.1015801354401806, "grad_norm": 13.092292785644531, "learning_rate": 3.283461805568246e-06, "lm_loss": 5.558, "loss": 1.4925, "step": 1374, "text_contrastive_loss": 0.965, "train_positive_log_prob": -80.5689, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3399, "epoch": 3.1038374717832955, "grad_norm": 13.868317604064941, "learning_rate": 3.276649166867406e-06, "lm_loss": 5.4346, "loss": 1.2537, "step": 1375, "text_contrastive_loss": 0.7407, "train_positive_log_prob": -81.5861, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.277, "epoch": 3.106094808126411, "grad_norm": 13.915228843688965, "learning_rate": 3.2698401569208883e-06, "lm_loss": 5.3979, "loss": 1.1634, "step": 1376, "text_contrastive_loss": 0.6931, "train_positive_log_prob": -78.2482, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3218, "epoch": 3.108352144469526, "grad_norm": 12.190874099731445, "learning_rate": 3.2630347900660094e-06, "lm_loss": 5.4945, "loss": 1.2482, "step": 1377, "text_contrastive_loss": 0.7537, "train_positive_log_prob": -84.8713, "train_positive_token_accuracy": 0.0736, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.393, "epoch": 3.110609480812641, "grad_norm": 16.03776741027832, "learning_rate": 3.256233080632414e-06, "lm_loss": 5.4982, "loss": 1.2598, "step": 1378, "text_contrastive_loss": 0.6339, "train_positive_log_prob": -80.8671, "train_positive_token_accuracy": 0.0752, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3224, "epoch": 3.1128668171557563, "grad_norm": 12.586856842041016, "learning_rate": 3.249435042942043e-06, "lm_loss": 5.5305, "loss": 1.2621, "step": 1379, "text_contrastive_loss": 0.7733, "train_positive_log_prob": -82.0357, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3985, "epoch": 3.115124153498871, "grad_norm": 14.891951560974121, "learning_rate": 3.242640691309111e-06, "lm_loss": 5.5164, "loss": 1.2823, "step": 1380, "text_contrastive_loss": 0.6644, "train_positive_log_prob": -81.1738, "train_positive_token_accuracy": 0.0847, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4055, "epoch": 3.1173814898419865, "grad_norm": 12.801090240478516, "learning_rate": 3.235850040040066e-06, "lm_loss": 5.5175, "loss": 1.3029, "step": 1381, "text_contrastive_loss": 0.6913, "train_positive_log_prob": -80.9535, "train_positive_token_accuracy": 0.0692, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3894, "epoch": 3.119638826185102, "grad_norm": 12.925514221191406, "learning_rate": 3.2290631034335684e-06, "lm_loss": 5.5601, "loss": 1.3589, "step": 1382, "text_contrastive_loss": 0.827, "train_positive_log_prob": -82.1636, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3831, "epoch": 3.1218961625282167, "grad_norm": 14.680846214294434, "learning_rate": 3.2222798957804524e-06, "lm_loss": 5.4835, "loss": 1.3201, "step": 1383, "text_contrastive_loss": 0.7772, "train_positive_log_prob": -80.6224, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.4091, "epoch": 3.124153498871332, "grad_norm": 13.382808685302734, "learning_rate": 3.215500431363706e-06, "lm_loss": 5.3571, "loss": 1.3014, "step": 1384, "text_contrastive_loss": 0.7131, "train_positive_log_prob": -81.1287, "train_positive_token_accuracy": 0.0851, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3998, "epoch": 3.126410835214447, "grad_norm": 14.403979301452637, "learning_rate": 3.20872472445843e-06, "lm_loss": 5.4311, "loss": 1.3408, "step": 1385, "text_contrastive_loss": 0.7959, "train_positive_log_prob": -81.9782, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.4187, "epoch": 3.128668171557562, "grad_norm": 13.64614200592041, "learning_rate": 3.2019527893318177e-06, "lm_loss": 5.4454, "loss": 1.3283, "step": 1386, "text_contrastive_loss": 0.73, "train_positive_log_prob": -80.7853, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3547, "epoch": 3.130925507900677, "grad_norm": 13.358538627624512, "learning_rate": 3.195184640243115e-06, "lm_loss": 5.389, "loss": 1.2771, "step": 1387, "text_contrastive_loss": 0.7669, "train_positive_log_prob": -79.9754, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4172, "epoch": 3.1331828442437923, "grad_norm": 13.655101776123047, "learning_rate": 3.1884202914436024e-06, "lm_loss": 5.4035, "loss": 1.3725, "step": 1388, "text_contrastive_loss": 0.83, "train_positive_log_prob": -78.4975, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3278, "epoch": 3.1354401805869077, "grad_norm": 13.055240631103516, "learning_rate": 3.1816597571765517e-06, "lm_loss": 5.4432, "loss": 1.26, "step": 1389, "text_contrastive_loss": 0.7758, "train_positive_log_prob": -78.3704, "train_positive_token_accuracy": 0.0705, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.3732, "epoch": 3.1376975169300225, "grad_norm": 12.419684410095215, "learning_rate": 3.1749030516772084e-06, "lm_loss": 5.4468, "loss": 1.2938, "step": 1390, "text_contrastive_loss": 0.7517, "train_positive_log_prob": -78.2568, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.315, "epoch": 3.139954853273138, "grad_norm": 12.234460830688477, "learning_rate": 3.168150189172754e-06, "lm_loss": 5.5778, "loss": 1.3066, "step": 1391, "text_contrastive_loss": 0.8676, "train_positive_log_prob": -82.2528, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.42, "epoch": 3.1422121896162527, "grad_norm": 15.418965339660645, "learning_rate": 3.1614011838822755e-06, "lm_loss": 5.5274, "loss": 1.3568, "step": 1392, "text_contrastive_loss": 0.7681, "train_positive_log_prob": -85.1195, "train_positive_token_accuracy": 0.0852, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.35, "epoch": 3.144469525959368, "grad_norm": 12.588700294494629, "learning_rate": 3.154656050016742e-06, "lm_loss": 5.4606, "loss": 1.3457, "step": 1393, "text_contrastive_loss": 0.8991, "train_positive_log_prob": -77.8964, "train_positive_token_accuracy": 0.072, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4081, "epoch": 3.146726862302483, "grad_norm": 12.298941612243652, "learning_rate": 3.1479148017789673e-06, "lm_loss": 5.4516, "loss": 1.3663, "step": 1394, "text_contrastive_loss": 0.8262, "train_positive_log_prob": -80.6229, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3261, "epoch": 3.148984198645598, "grad_norm": 12.022645950317383, "learning_rate": 3.1411774533635854e-06, "lm_loss": 5.4987, "loss": 1.2973, "step": 1395, "text_contrastive_loss": 0.8427, "train_positive_log_prob": -80.6333, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3343, "epoch": 3.1512415349887135, "grad_norm": 13.233353614807129, "learning_rate": 3.134444018957019e-06, "lm_loss": 5.4817, "loss": 1.2546, "step": 1396, "text_contrastive_loss": 0.7443, "train_positive_log_prob": -80.0181, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3314, "epoch": 3.1534988713318284, "grad_norm": 12.329866409301758, "learning_rate": 3.1277145127374475e-06, "lm_loss": 5.5327, "loss": 1.3321, "step": 1397, "text_contrastive_loss": 0.8949, "train_positive_log_prob": -81.3933, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3466, "epoch": 3.1557562076749437, "grad_norm": 12.84872055053711, "learning_rate": 3.1209889488747813e-06, "lm_loss": 5.5437, "loss": 1.2957, "step": 1398, "text_contrastive_loss": 0.7895, "train_positive_log_prob": -80.0808, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.4117, "epoch": 3.1580135440180586, "grad_norm": 12.506523132324219, "learning_rate": 3.114267341530627e-06, "lm_loss": 5.3671, "loss": 1.3359, "step": 1399, "text_contrastive_loss": 0.7751, "train_positive_log_prob": -78.6646, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.3116, "epoch": 3.160270880361174, "grad_norm": 10.065068244934082, "learning_rate": 3.1075497048582635e-06, "lm_loss": 5.3999, "loss": 1.1763, "step": 1400, "text_contrastive_loss": 0.6494, "train_positive_log_prob": -79.4544, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4121, "epoch": 3.1625282167042887, "grad_norm": 14.337026596069336, "learning_rate": 3.1008360530026053e-06, "lm_loss": 5.4837, "loss": 1.4533, "step": 1401, "text_contrastive_loss": 0.9857, "train_positive_log_prob": -80.9288, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3098, "epoch": 3.164785553047404, "grad_norm": 10.524683952331543, "learning_rate": 3.0941264001001796e-06, "lm_loss": 5.3857, "loss": 1.2367, "step": 1402, "text_contrastive_loss": 0.7766, "train_positive_log_prob": -77.7547, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3242, "epoch": 3.1670428893905194, "grad_norm": 11.856847763061523, "learning_rate": 3.0874207602790895e-06, "lm_loss": 5.3839, "loss": 1.2118, "step": 1403, "text_contrastive_loss": 0.6985, "train_positive_log_prob": -80.0418, "train_positive_token_accuracy": 0.0875, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.3542, "epoch": 3.1693002257336342, "grad_norm": 12.487991333007812, "learning_rate": 3.0807191476589926e-06, "lm_loss": 5.5494, "loss": 1.2514, "step": 1404, "text_contrastive_loss": 0.6846, "train_positive_log_prob": -79.9678, "train_positive_token_accuracy": 0.0718, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.33, "epoch": 3.1715575620767495, "grad_norm": 11.98621940612793, "learning_rate": 3.0740215763510617e-06, "lm_loss": 5.4704, "loss": 1.1744, "step": 1405, "text_contrastive_loss": 0.5947, "train_positive_log_prob": -82.208, "train_positive_token_accuracy": 0.0847, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3767, "epoch": 3.1738148984198644, "grad_norm": 12.796917915344238, "learning_rate": 3.0673280604579623e-06, "lm_loss": 5.3512, "loss": 1.2458, "step": 1406, "text_contrastive_loss": 0.6681, "train_positive_log_prob": -80.8947, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3847, "epoch": 3.1760722347629797, "grad_norm": 13.427523612976074, "learning_rate": 3.0606386140738253e-06, "lm_loss": 5.4488, "loss": 1.3555, "step": 1407, "text_contrastive_loss": 0.8518, "train_positive_log_prob": -79.1584, "train_positive_token_accuracy": 0.0836, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3398, "epoch": 3.1783295711060946, "grad_norm": 13.557880401611328, "learning_rate": 3.053953251284205e-06, "lm_loss": 5.5605, "loss": 1.2408, "step": 1408, "text_contrastive_loss": 0.6898, "train_positive_log_prob": -82.2353, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3556, "epoch": 3.18058690744921, "grad_norm": 14.373702049255371, "learning_rate": 3.047271986166061e-06, "lm_loss": 5.4678, "loss": 1.2604, "step": 1409, "text_contrastive_loss": 0.716, "train_positive_log_prob": -81.8427, "train_positive_token_accuracy": 0.0718, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.345, "epoch": 3.1828442437923252, "grad_norm": 11.91468334197998, "learning_rate": 3.0405948327877233e-06, "lm_loss": 5.3607, "loss": 1.2759, "step": 1410, "text_contrastive_loss": 0.7897, "train_positive_log_prob": -80.8505, "train_positive_token_accuracy": 0.0847, "train_positive_token_prob": 0.033 }, { "contrastive_loss": 0.4006, "epoch": 3.18510158013544, "grad_norm": 13.399833679199219, "learning_rate": 3.033921805208867e-06, "lm_loss": 5.3878, "loss": 1.381, "step": 1411, "text_contrastive_loss": 0.8832, "train_positive_log_prob": -79.133, "train_positive_token_accuracy": 0.0947, "train_positive_token_prob": 0.0336 }, { "contrastive_loss": 0.3332, "epoch": 3.1873589164785554, "grad_norm": 12.890466690063477, "learning_rate": 3.027252917480476e-06, "lm_loss": 5.4102, "loss": 1.3251, "step": 1412, "text_contrastive_loss": 0.9019, "train_positive_log_prob": -81.1528, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.385, "epoch": 3.1896162528216703, "grad_norm": 12.853949546813965, "learning_rate": 3.0205881836448186e-06, "lm_loss": 5.5684, "loss": 1.3291, "step": 1413, "text_contrastive_loss": 0.7746, "train_positive_log_prob": -81.4836, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3598, "epoch": 3.1918735891647856, "grad_norm": 12.92371654510498, "learning_rate": 3.0139276177354188e-06, "lm_loss": 5.5097, "loss": 1.2993, "step": 1414, "text_contrastive_loss": 0.777, "train_positive_log_prob": -83.2085, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4402, "epoch": 3.194130925507901, "grad_norm": 13.475192070007324, "learning_rate": 3.00727123377702e-06, "lm_loss": 5.5492, "loss": 1.4162, "step": 1415, "text_contrastive_loss": 0.842, "train_positive_log_prob": -81.8248, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3644, "epoch": 3.1963882618510158, "grad_norm": 12.008026123046875, "learning_rate": 3.0006190457855643e-06, "lm_loss": 5.601, "loss": 1.2532, "step": 1416, "text_contrastive_loss": 0.6573, "train_positive_log_prob": -83.509, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3773, "epoch": 3.198645598194131, "grad_norm": 14.657004356384277, "learning_rate": 2.9939710677681545e-06, "lm_loss": 5.381, "loss": 1.366, "step": 1417, "text_contrastive_loss": 0.9011, "train_positive_log_prob": -78.0458, "train_positive_token_accuracy": 0.0883, "train_positive_token_prob": 0.0329 }, { "contrastive_loss": 0.3871, "epoch": 3.200902934537246, "grad_norm": 13.67013931274414, "learning_rate": 2.987327313723033e-06, "lm_loss": 5.4455, "loss": 1.3054, "step": 1418, "text_contrastive_loss": 0.7476, "train_positive_log_prob": -80.408, "train_positive_token_accuracy": 0.0836, "train_positive_token_prob": 0.0326 }, { "contrastive_loss": 0.2903, "epoch": 3.2031602708803613, "grad_norm": 11.551298141479492, "learning_rate": 2.980687797639543e-06, "lm_loss": 5.5571, "loss": 1.213, "step": 1419, "text_contrastive_loss": 0.734, "train_positive_log_prob": -82.0749, "train_positive_token_accuracy": 0.0733, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.2487, "epoch": 3.205417607223476, "grad_norm": 12.48673152923584, "learning_rate": 2.9740525334981105e-06, "lm_loss": 5.4923, "loss": 1.0882, "step": 1420, "text_contrastive_loss": 0.5806, "train_positive_log_prob": -82.8596, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3514, "epoch": 3.2076749435665914, "grad_norm": 12.990463256835938, "learning_rate": 2.967421535270203e-06, "lm_loss": 5.4214, "loss": 1.2665, "step": 1421, "text_contrastive_loss": 0.7459, "train_positive_log_prob": -80.2828, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4896, "epoch": 3.2099322799097068, "grad_norm": 14.413372993469238, "learning_rate": 2.9607948169183077e-06, "lm_loss": 5.4621, "loss": 1.508, "step": 1422, "text_contrastive_loss": 0.9445, "train_positive_log_prob": -81.3139, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3271, "epoch": 3.2121896162528216, "grad_norm": 11.85389232635498, "learning_rate": 2.9541723923958975e-06, "lm_loss": 5.5029, "loss": 1.2189, "step": 1423, "text_contrastive_loss": 0.6829, "train_positive_log_prob": -80.7847, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3824, "epoch": 3.214446952595937, "grad_norm": 14.24267864227295, "learning_rate": 2.94755427564741e-06, "lm_loss": 5.4803, "loss": 1.2923, "step": 1424, "text_contrastive_loss": 0.7238, "train_positive_log_prob": -79.1848, "train_positive_token_accuracy": 0.0714, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3116, "epoch": 3.216704288939052, "grad_norm": 14.309062004089355, "learning_rate": 2.9409404806082077e-06, "lm_loss": 5.4512, "loss": 1.2347, "step": 1425, "text_contrastive_loss": 0.756, "train_positive_log_prob": -80.8562, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3572, "epoch": 3.218961625282167, "grad_norm": 12.542322158813477, "learning_rate": 2.934331021204551e-06, "lm_loss": 5.2986, "loss": 1.2568, "step": 1426, "text_contrastive_loss": 0.7394, "train_positive_log_prob": -76.5596, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0326 }, { "contrastive_loss": 0.2844, "epoch": 3.221218961625282, "grad_norm": 12.496670722961426, "learning_rate": 2.9277259113535774e-06, "lm_loss": 5.3349, "loss": 1.1623, "step": 1427, "text_contrastive_loss": 0.6887, "train_positive_log_prob": -78.5639, "train_positive_token_accuracy": 0.0867, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.3231, "epoch": 3.2234762979683973, "grad_norm": 11.824939727783203, "learning_rate": 2.9211251649632587e-06, "lm_loss": 5.3371, "loss": 1.2538, "step": 1428, "text_contrastive_loss": 0.7941, "train_positive_log_prob": -77.8502, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3712, "epoch": 3.2257336343115126, "grad_norm": 11.644871711730957, "learning_rate": 2.9145287959323852e-06, "lm_loss": 5.3979, "loss": 1.2941, "step": 1429, "text_contrastive_loss": 0.7664, "train_positive_log_prob": -79.6593, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3841, "epoch": 3.2279909706546275, "grad_norm": 13.726497650146484, "learning_rate": 2.9079368181505263e-06, "lm_loss": 5.4403, "loss": 1.297, "step": 1430, "text_contrastive_loss": 0.7378, "train_positive_log_prob": -81.8983, "train_positive_token_accuracy": 0.0742, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3483, "epoch": 3.230248306997743, "grad_norm": 12.534523963928223, "learning_rate": 2.9013492454980074e-06, "lm_loss": 5.462, "loss": 1.2156, "step": 1431, "text_contrastive_loss": 0.6423, "train_positive_log_prob": -81.3106, "train_positive_token_accuracy": 0.0782, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3205, "epoch": 3.2325056433408577, "grad_norm": 13.735143661499023, "learning_rate": 2.894766091845873e-06, "lm_loss": 5.5468, "loss": 1.3025, "step": 1432, "text_contrastive_loss": 0.8546, "train_positive_log_prob": -81.8362, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4447, "epoch": 3.234762979683973, "grad_norm": 13.709455490112305, "learning_rate": 2.88818737105587e-06, "lm_loss": 5.4476, "loss": 1.4373, "step": 1433, "text_contrastive_loss": 0.8956, "train_positive_log_prob": -79.8429, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3643, "epoch": 3.237020316027088, "grad_norm": 12.758544921875, "learning_rate": 2.881613096980407e-06, "lm_loss": 5.3446, "loss": 1.2587, "step": 1434, "text_contrastive_loss": 0.7199, "train_positive_log_prob": -78.6063, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3323, "epoch": 3.239277652370203, "grad_norm": 11.02239990234375, "learning_rate": 2.8750432834625312e-06, "lm_loss": 5.4404, "loss": 1.2886, "step": 1435, "text_contrastive_loss": 0.8245, "train_positive_log_prob": -79.8645, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3958, "epoch": 3.2415349887133185, "grad_norm": 12.147967338562012, "learning_rate": 2.8684779443358945e-06, "lm_loss": 5.5308, "loss": 1.4073, "step": 1436, "text_contrastive_loss": 0.9167, "train_positive_log_prob": -82.5297, "train_positive_token_accuracy": 0.0738, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4072, "epoch": 3.2437923250564333, "grad_norm": 14.212855339050293, "learning_rate": 2.861917093424731e-06, "lm_loss": 5.4445, "loss": 1.3616, "step": 1437, "text_contrastive_loss": 0.82, "train_positive_log_prob": -80.8615, "train_positive_token_accuracy": 0.0915, "train_positive_token_prob": 0.033 }, { "contrastive_loss": 0.2872, "epoch": 3.2460496613995486, "grad_norm": 12.381433486938477, "learning_rate": 2.855360744543822e-06, "lm_loss": 5.3889, "loss": 1.2503, "step": 1438, "text_contrastive_loss": 0.8485, "train_positive_log_prob": -79.6539, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3199, "epoch": 3.2483069977426635, "grad_norm": 11.74273681640625, "learning_rate": 2.8488089114984725e-06, "lm_loss": 5.4003, "loss": 1.232, "step": 1439, "text_contrastive_loss": 0.7442, "train_positive_log_prob": -79.4633, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.35, "epoch": 3.250564334085779, "grad_norm": 15.128246307373047, "learning_rate": 2.84226160808447e-06, "lm_loss": 5.5677, "loss": 1.2912, "step": 1440, "text_contrastive_loss": 0.7687, "train_positive_log_prob": -81.7769, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4455, "epoch": 3.2528216704288937, "grad_norm": 13.406543731689453, "learning_rate": 2.835718848088076e-06, "lm_loss": 5.3907, "loss": 1.4058, "step": 1441, "text_contrastive_loss": 0.8426, "train_positive_log_prob": -79.1141, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3688, "epoch": 3.255079006772009, "grad_norm": 13.79780101776123, "learning_rate": 2.8291806452859803e-06, "lm_loss": 5.4736, "loss": 1.2779, "step": 1442, "text_contrastive_loss": 0.7234, "train_positive_log_prob": -81.1199, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.4313, "epoch": 3.2573363431151243, "grad_norm": 13.48995590209961, "learning_rate": 2.822647013445272e-06, "lm_loss": 5.4517, "loss": 1.4321, "step": 1443, "text_contrastive_loss": 0.9113, "train_positive_log_prob": -80.9007, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4178, "epoch": 3.259593679458239, "grad_norm": 12.608296394348145, "learning_rate": 2.8161179663234215e-06, "lm_loss": 5.4421, "loss": 1.4412, "step": 1444, "text_contrastive_loss": 0.9585, "train_positive_log_prob": -78.8458, "train_positive_token_accuracy": 0.0828, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3429, "epoch": 3.2618510158013545, "grad_norm": 13.537189483642578, "learning_rate": 2.809593517668243e-06, "lm_loss": 5.4962, "loss": 1.2356, "step": 1445, "text_contrastive_loss": 0.6861, "train_positive_log_prob": -81.7324, "train_positive_token_accuracy": 0.0735, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4539, "epoch": 3.2641083521444694, "grad_norm": 13.725131034851074, "learning_rate": 2.8030736812178717e-06, "lm_loss": 5.3237, "loss": 1.4391, "step": 1446, "text_contrastive_loss": 0.9055, "train_positive_log_prob": -77.3248, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3744, "epoch": 3.2663656884875847, "grad_norm": 11.78693962097168, "learning_rate": 2.796558470700723e-06, "lm_loss": 5.4174, "loss": 1.3114, "step": 1447, "text_contrastive_loss": 0.7906, "train_positive_log_prob": -80.3055, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4067, "epoch": 3.2686230248307, "grad_norm": 13.28524398803711, "learning_rate": 2.790047899835479e-06, "lm_loss": 5.3779, "loss": 1.3395, "step": 1448, "text_contrastive_loss": 0.79, "train_positive_log_prob": -79.4962, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.2783, "epoch": 3.270880361173815, "grad_norm": 12.303711891174316, "learning_rate": 2.7835419823310507e-06, "lm_loss": 5.4055, "loss": 1.2131, "step": 1449, "text_contrastive_loss": 0.7884, "train_positive_log_prob": -80.2617, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3437, "epoch": 3.27313769751693, "grad_norm": 13.363794326782227, "learning_rate": 2.777040731886549e-06, "lm_loss": 5.4144, "loss": 1.2726, "step": 1450, "text_contrastive_loss": 0.7748, "train_positive_log_prob": -78.5691, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.2811, "epoch": 3.275395033860045, "grad_norm": 11.258405685424805, "learning_rate": 2.770544162191261e-06, "lm_loss": 5.482, "loss": 1.1472, "step": 1451, "text_contrastive_loss": 0.6357, "train_positive_log_prob": -82.4903, "train_positive_token_accuracy": 0.0833, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.4406, "epoch": 3.2776523702031604, "grad_norm": 14.803228378295898, "learning_rate": 2.7640522869246134e-06, "lm_loss": 5.3397, "loss": 1.279, "step": 1452, "text_contrastive_loss": 0.6088, "train_positive_log_prob": -79.1339, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3859, "epoch": 3.2799097065462752, "grad_norm": 13.501431465148926, "learning_rate": 2.7575651197561504e-06, "lm_loss": 5.3894, "loss": 1.3452, "step": 1453, "text_contrastive_loss": 0.8405, "train_positive_log_prob": -79.9568, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3808, "epoch": 3.2821670428893905, "grad_norm": 14.436296463012695, "learning_rate": 2.7510826743455037e-06, "lm_loss": 5.5601, "loss": 1.3028, "step": 1454, "text_contrastive_loss": 0.732, "train_positive_log_prob": -84.3522, "train_positive_token_accuracy": 0.0687, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.409, "epoch": 3.2844243792325054, "grad_norm": 12.698884963989258, "learning_rate": 2.744604964342364e-06, "lm_loss": 5.4928, "loss": 1.3901, "step": 1455, "text_contrastive_loss": 0.8636, "train_positive_log_prob": -80.6962, "train_positive_token_accuracy": 0.083, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3434, "epoch": 3.2866817155756207, "grad_norm": 11.475349426269531, "learning_rate": 2.7381320033864434e-06, "lm_loss": 5.362, "loss": 1.2695, "step": 1456, "text_contrastive_loss": 0.7796, "train_positive_log_prob": -79.7615, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3341, "epoch": 3.288939051918736, "grad_norm": 14.666303634643555, "learning_rate": 2.7316638051074605e-06, "lm_loss": 5.4915, "loss": 1.2745, "step": 1457, "text_contrastive_loss": 0.7826, "train_positive_log_prob": -80.5508, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3401, "epoch": 3.291196388261851, "grad_norm": 12.208320617675781, "learning_rate": 2.72520038312511e-06, "lm_loss": 5.4205, "loss": 1.2532, "step": 1458, "text_contrastive_loss": 0.742, "train_positive_log_prob": -81.0321, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.4117, "epoch": 3.293453724604966, "grad_norm": 12.495759010314941, "learning_rate": 2.7187417510490176e-06, "lm_loss": 5.4519, "loss": 1.291, "step": 1459, "text_contrastive_loss": 0.6683, "train_positive_log_prob": -79.7556, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3489, "epoch": 3.295711060948081, "grad_norm": 13.113781929016113, "learning_rate": 2.7122879224787315e-06, "lm_loss": 5.5168, "loss": 1.3154, "step": 1460, "text_contrastive_loss": 0.8298, "train_positive_log_prob": -81.1143, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4159, "epoch": 3.2979683972911964, "grad_norm": 14.516807556152344, "learning_rate": 2.7058389110036835e-06, "lm_loss": 5.4569, "loss": 1.3379, "step": 1461, "text_contrastive_loss": 0.7525, "train_positive_log_prob": -80.6998, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3721, "epoch": 3.3002257336343117, "grad_norm": 13.572257995605469, "learning_rate": 2.6993947302031643e-06, "lm_loss": 5.3711, "loss": 1.3431, "step": 1462, "text_contrastive_loss": 0.8677, "train_positive_log_prob": -78.4474, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.2717, "epoch": 3.3024830699774266, "grad_norm": 13.101594924926758, "learning_rate": 2.692955393646286e-06, "lm_loss": 5.4834, "loss": 1.1467, "step": 1463, "text_contrastive_loss": 0.6534, "train_positive_log_prob": -81.4774, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.2453, "epoch": 3.304740406320542, "grad_norm": 10.775618553161621, "learning_rate": 2.686520914891968e-06, "lm_loss": 5.4244, "loss": 1.0384, "step": 1464, "text_contrastive_loss": 0.5013, "train_positive_log_prob": -79.544, "train_positive_token_accuracy": 0.0855, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3128, "epoch": 3.3069977426636568, "grad_norm": 12.02344799041748, "learning_rate": 2.6800913074888984e-06, "lm_loss": 5.5315, "loss": 1.2232, "step": 1465, "text_contrastive_loss": 0.7147, "train_positive_log_prob": -83.6844, "train_positive_token_accuracy": 0.086, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.2648, "epoch": 3.309255079006772, "grad_norm": 12.345636367797852, "learning_rate": 2.6736665849755073e-06, "lm_loss": 5.3845, "loss": 1.1419, "step": 1466, "text_contrastive_loss": 0.6773, "train_positive_log_prob": -78.5004, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.4061, "epoch": 3.311512415349887, "grad_norm": 13.457576751708984, "learning_rate": 2.6672467608799413e-06, "lm_loss": 5.5098, "loss": 1.4108, "step": 1467, "text_contrastive_loss": 0.9074, "train_positive_log_prob": -81.1176, "train_positive_token_accuracy": 0.0616, "train_positive_token_prob": 0.0283 }, { "contrastive_loss": 0.3699, "epoch": 3.3137697516930023, "grad_norm": 11.337600708007812, "learning_rate": 2.660831848720028e-06, "lm_loss": 5.4729, "loss": 1.2979, "step": 1468, "text_contrastive_loss": 0.7615, "train_positive_log_prob": -80.855, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3656, "epoch": 3.3160270880361176, "grad_norm": 15.963889122009277, "learning_rate": 2.654421862003256e-06, "lm_loss": 5.5073, "loss": 1.2696, "step": 1469, "text_contrastive_loss": 0.7065, "train_positive_log_prob": -80.6729, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3328, "epoch": 3.3182844243792324, "grad_norm": 14.723601341247559, "learning_rate": 2.648016814226742e-06, "lm_loss": 5.6061, "loss": 1.2822, "step": 1470, "text_contrastive_loss": 0.7774, "train_positive_log_prob": -84.2654, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.3971, "epoch": 3.3205417607223477, "grad_norm": 13.494935035705566, "learning_rate": 2.6416167188772052e-06, "lm_loss": 5.5084, "loss": 1.3143, "step": 1471, "text_contrastive_loss": 0.7327, "train_positive_log_prob": -82.9996, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.4222, "epoch": 3.3227990970654626, "grad_norm": 14.805681228637695, "learning_rate": 2.6352215894309306e-06, "lm_loss": 5.3873, "loss": 1.3416, "step": 1472, "text_contrastive_loss": 0.7614, "train_positive_log_prob": -80.7656, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0329 }, { "contrastive_loss": 0.3509, "epoch": 3.325056433408578, "grad_norm": 11.837203025817871, "learning_rate": 2.6288314393537522e-06, "lm_loss": 5.5112, "loss": 1.2858, "step": 1473, "text_contrastive_loss": 0.7677, "train_positive_log_prob": -81.2044, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.37, "epoch": 3.327313769751693, "grad_norm": 10.98058032989502, "learning_rate": 2.6224462821010185e-06, "lm_loss": 5.5068, "loss": 1.3272, "step": 1474, "text_contrastive_loss": 0.813, "train_positive_log_prob": -81.1172, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3709, "epoch": 3.329571106094808, "grad_norm": 13.405777931213379, "learning_rate": 2.616066131117563e-06, "lm_loss": 5.4351, "loss": 1.2832, "step": 1475, "text_contrastive_loss": 0.7377, "train_positive_log_prob": -79.0198, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3743, "epoch": 3.3318284424379234, "grad_norm": 12.909689903259277, "learning_rate": 2.6096909998376794e-06, "lm_loss": 5.4092, "loss": 1.3607, "step": 1476, "text_contrastive_loss": 0.8911, "train_positive_log_prob": -81.3746, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3567, "epoch": 3.3340857787810383, "grad_norm": 13.252840995788574, "learning_rate": 2.6033209016850926e-06, "lm_loss": 5.5363, "loss": 1.2997, "step": 1477, "text_contrastive_loss": 0.7788, "train_positive_log_prob": -83.0844, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3543, "epoch": 3.3363431151241536, "grad_norm": 13.664084434509277, "learning_rate": 2.596955850072928e-06, "lm_loss": 5.4244, "loss": 1.281, "step": 1478, "text_contrastive_loss": 0.7686, "train_positive_log_prob": -80.1999, "train_positive_token_accuracy": 0.084, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4341, "epoch": 3.3386004514672685, "grad_norm": 13.399177551269531, "learning_rate": 2.5905958584036826e-06, "lm_loss": 5.6161, "loss": 1.4276, "step": 1479, "text_contrastive_loss": 0.8638, "train_positive_log_prob": -82.4656, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.2895, "epoch": 3.340857787810384, "grad_norm": 11.764191627502441, "learning_rate": 2.5842409400692026e-06, "lm_loss": 5.5003, "loss": 1.1359, "step": 1480, "text_contrastive_loss": 0.5926, "train_positive_log_prob": -80.0552, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3489, "epoch": 3.343115124153499, "grad_norm": 12.049650192260742, "learning_rate": 2.577891108450651e-06, "lm_loss": 5.5732, "loss": 1.29, "step": 1481, "text_contrastive_loss": 0.7676, "train_positive_log_prob": -82.0718, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3308, "epoch": 3.345372460496614, "grad_norm": 12.622515678405762, "learning_rate": 2.571546376918479e-06, "lm_loss": 5.4124, "loss": 1.2243, "step": 1482, "text_contrastive_loss": 0.7046, "train_positive_log_prob": -80.6769, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.394, "epoch": 3.3476297968397293, "grad_norm": 12.73591136932373, "learning_rate": 2.5652067588324015e-06, "lm_loss": 5.4206, "loss": 1.3097, "step": 1483, "text_contrastive_loss": 0.7472, "train_positive_log_prob": -78.704, "train_positive_token_accuracy": 0.0815, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3021, "epoch": 3.349887133182844, "grad_norm": 11.945694923400879, "learning_rate": 2.55887226754136e-06, "lm_loss": 5.3205, "loss": 1.2063, "step": 1484, "text_contrastive_loss": 0.7444, "train_positive_log_prob": -76.5879, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.4803, "epoch": 3.3521444695259595, "grad_norm": 15.011481285095215, "learning_rate": 2.552542916383507e-06, "lm_loss": 5.4357, "loss": 1.4293, "step": 1485, "text_contrastive_loss": 0.811, "train_positive_log_prob": -77.498, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3935, "epoch": 3.3544018058690743, "grad_norm": 13.88205337524414, "learning_rate": 2.5462187186861697e-06, "lm_loss": 5.4123, "loss": 1.3346, "step": 1486, "text_contrastive_loss": 0.7997, "train_positive_log_prob": -81.2688, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.2888, "epoch": 3.3566591422121896, "grad_norm": 12.709901809692383, "learning_rate": 2.5398996877658256e-06, "lm_loss": 5.5052, "loss": 1.1977, "step": 1487, "text_contrastive_loss": 0.7167, "train_positive_log_prob": -82.7467, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3343, "epoch": 3.3589164785553045, "grad_norm": 13.5997314453125, "learning_rate": 2.5335858369280674e-06, "lm_loss": 5.4754, "loss": 1.2899, "step": 1488, "text_contrastive_loss": 0.8161, "train_positive_log_prob": -83.2009, "train_positive_token_accuracy": 0.0761, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3642, "epoch": 3.36117381489842, "grad_norm": 14.860273361206055, "learning_rate": 2.5272771794675866e-06, "lm_loss": 5.6381, "loss": 1.3343, "step": 1489, "text_contrastive_loss": 0.8127, "train_positive_log_prob": -85.8706, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.4325, "epoch": 3.363431151241535, "grad_norm": 13.943649291992188, "learning_rate": 2.5209737286681367e-06, "lm_loss": 5.4665, "loss": 1.4305, "step": 1490, "text_contrastive_loss": 0.9026, "train_positive_log_prob": -81.1655, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3042, "epoch": 3.36568848758465, "grad_norm": 13.22721004486084, "learning_rate": 2.514675497802508e-06, "lm_loss": 5.5726, "loss": 1.2556, "step": 1491, "text_contrastive_loss": 0.7882, "train_positive_log_prob": -81.8022, "train_positive_token_accuracy": 0.0716, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.2725, "epoch": 3.3679458239277653, "grad_norm": 11.849224090576172, "learning_rate": 2.508382500132499e-06, "lm_loss": 5.4561, "loss": 1.1832, "step": 1492, "text_contrastive_loss": 0.7303, "train_positive_log_prob": -80.438, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.2978, "epoch": 3.37020316027088, "grad_norm": 11.576635360717773, "learning_rate": 2.50209474890889e-06, "lm_loss": 5.3959, "loss": 1.2757, "step": 1493, "text_contrastive_loss": 0.8767, "train_positive_log_prob": -79.1441, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3249, "epoch": 3.3724604966139955, "grad_norm": 11.91965103149414, "learning_rate": 2.495812257371416e-06, "lm_loss": 5.4112, "loss": 1.2655, "step": 1494, "text_contrastive_loss": 0.799, "train_positive_log_prob": -79.7504, "train_positive_token_accuracy": 0.071, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3196, "epoch": 3.374717832957111, "grad_norm": 13.613897323608398, "learning_rate": 2.4895350387487304e-06, "lm_loss": 5.6697, "loss": 1.268, "step": 1495, "text_contrastive_loss": 0.7627, "train_positive_log_prob": -83.8037, "train_positive_token_accuracy": 0.0756, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.2848, "epoch": 3.3769751693002257, "grad_norm": 11.66408920288086, "learning_rate": 2.4832631062583906e-06, "lm_loss": 5.3674, "loss": 1.2153, "step": 1496, "text_contrastive_loss": 0.7875, "train_positive_log_prob": -78.7697, "train_positive_token_accuracy": 0.0844, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.4446, "epoch": 3.379232505643341, "grad_norm": 14.82761001586914, "learning_rate": 2.47699647310682e-06, "lm_loss": 5.5406, "loss": 1.4435, "step": 1497, "text_contrastive_loss": 0.8895, "train_positive_log_prob": -85.1436, "train_positive_token_accuracy": 0.0752, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.2982, "epoch": 3.381489841986456, "grad_norm": 12.15412425994873, "learning_rate": 2.470735152489287e-06, "lm_loss": 5.4322, "loss": 1.132, "step": 1498, "text_contrastive_loss": 0.5812, "train_positive_log_prob": -81.164, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.4166, "epoch": 3.383747178329571, "grad_norm": 13.879342079162598, "learning_rate": 2.4644791575898665e-06, "lm_loss": 5.5772, "loss": 1.408, "step": 1499, "text_contrastive_loss": 0.8673, "train_positive_log_prob": -83.6823, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3544, "epoch": 3.386004514672686, "grad_norm": 12.835597038269043, "learning_rate": 2.4582285015814263e-06, "lm_loss": 5.4296, "loss": 1.2063, "step": 1500, "text_contrastive_loss": 0.6179, "train_positive_log_prob": -81.8884, "train_positive_token_accuracy": 0.0852, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.3748, "epoch": 3.3882618510158014, "grad_norm": 13.693696022033691, "learning_rate": 2.4519831976255892e-06, "lm_loss": 5.4888, "loss": 1.3371, "step": 1501, "text_contrastive_loss": 0.8268, "train_positive_log_prob": -80.0147, "train_positive_token_accuracy": 0.0852, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.2995, "epoch": 3.3905191873589167, "grad_norm": 12.810537338256836, "learning_rate": 2.445743258872711e-06, "lm_loss": 5.4853, "loss": 1.1739, "step": 1502, "text_contrastive_loss": 0.6517, "train_positive_log_prob": -81.5071, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4199, "epoch": 3.3927765237020315, "grad_norm": 14.674356460571289, "learning_rate": 2.4395086984618486e-06, "lm_loss": 5.5201, "loss": 1.3155, "step": 1503, "text_contrastive_loss": 0.6871, "train_positive_log_prob": -80.0595, "train_positive_token_accuracy": 0.0843, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3283, "epoch": 3.395033860045147, "grad_norm": 12.382877349853516, "learning_rate": 2.433279529520732e-06, "lm_loss": 5.4382, "loss": 1.2897, "step": 1504, "text_contrastive_loss": 0.8351, "train_positive_log_prob": -80.4912, "train_positive_token_accuracy": 0.0847, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3853, "epoch": 3.3972911963882617, "grad_norm": 12.306852340698242, "learning_rate": 2.427055765165741e-06, "lm_loss": 5.4388, "loss": 1.349, "step": 1505, "text_contrastive_loss": 0.8397, "train_positive_log_prob": -80.6895, "train_positive_token_accuracy": 0.0814, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3212, "epoch": 3.399548532731377, "grad_norm": 12.166094779968262, "learning_rate": 2.420837418501876e-06, "lm_loss": 5.4144, "loss": 1.2337, "step": 1506, "text_contrastive_loss": 0.7421, "train_positive_log_prob": -78.0012, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3518, "epoch": 3.401805869074492, "grad_norm": 13.620892524719238, "learning_rate": 2.414624502622731e-06, "lm_loss": 5.4152, "loss": 1.2913, "step": 1507, "text_contrastive_loss": 0.7961, "train_positive_log_prob": -78.2482, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3299, "epoch": 3.404063205417607, "grad_norm": 13.580711364746094, "learning_rate": 2.408417030610457e-06, "lm_loss": 5.4487, "loss": 1.2475, "step": 1508, "text_contrastive_loss": 0.7454, "train_positive_log_prob": -79.8124, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3441, "epoch": 3.4063205417607225, "grad_norm": 14.103965759277344, "learning_rate": 2.4022150155357526e-06, "lm_loss": 5.5329, "loss": 1.2285, "step": 1509, "text_contrastive_loss": 0.6623, "train_positive_log_prob": -84.2947, "train_positive_token_accuracy": 0.0851, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3348, "epoch": 3.4085778781038374, "grad_norm": 11.755352020263672, "learning_rate": 2.396018470457821e-06, "lm_loss": 5.3933, "loss": 1.2474, "step": 1510, "text_contrastive_loss": 0.7464, "train_positive_log_prob": -79.17, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.2613, "epoch": 3.4108352144469527, "grad_norm": 11.900193214416504, "learning_rate": 2.389827408424345e-06, "lm_loss": 5.5358, "loss": 1.1067, "step": 1511, "text_contrastive_loss": 0.5836, "train_positive_log_prob": -83.0834, "train_positive_token_accuracy": 0.0703, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4277, "epoch": 3.4130925507900676, "grad_norm": 13.55148983001709, "learning_rate": 2.3836418424714665e-06, "lm_loss": 5.3704, "loss": 1.4147, "step": 1512, "text_contrastive_loss": 0.8998, "train_positive_log_prob": -79.9694, "train_positive_token_accuracy": 0.0871, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.334, "epoch": 3.415349887133183, "grad_norm": 13.18596363067627, "learning_rate": 2.377461785623752e-06, "lm_loss": 5.4027, "loss": 1.2542, "step": 1513, "text_contrastive_loss": 0.7598, "train_positive_log_prob": -78.9727, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3319, "epoch": 3.417607223476298, "grad_norm": 11.630409240722656, "learning_rate": 2.3712872508941714e-06, "lm_loss": 5.4037, "loss": 1.201, "step": 1514, "text_contrastive_loss": 0.6573, "train_positive_log_prob": -82.0735, "train_positive_token_accuracy": 0.0782, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3014, "epoch": 3.419864559819413, "grad_norm": 12.5684175491333, "learning_rate": 2.3651182512840604e-06, "lm_loss": 5.4383, "loss": 1.2324, "step": 1515, "text_contrastive_loss": 0.7744, "train_positive_log_prob": -80.0666, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.3529, "epoch": 3.4221218961625284, "grad_norm": 12.914410591125488, "learning_rate": 2.358954799783106e-06, "lm_loss": 5.3883, "loss": 1.3431, "step": 1516, "text_contrastive_loss": 0.9027, "train_positive_log_prob": -79.8367, "train_positive_token_accuracy": 0.0838, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3331, "epoch": 3.4243792325056432, "grad_norm": 12.251567840576172, "learning_rate": 2.3527969093693105e-06, "lm_loss": 5.4971, "loss": 1.2708, "step": 1517, "text_contrastive_loss": 0.776, "train_positive_log_prob": -81.6597, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3004, "epoch": 3.4266365688487586, "grad_norm": 10.214859008789062, "learning_rate": 2.346644593008966e-06, "lm_loss": 5.4587, "loss": 1.1951, "step": 1518, "text_contrastive_loss": 0.6976, "train_positive_log_prob": -82.3247, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.2544, "epoch": 3.4288939051918734, "grad_norm": 10.06332015991211, "learning_rate": 2.3404978636566312e-06, "lm_loss": 5.4006, "loss": 1.1817, "step": 1519, "text_contrastive_loss": 0.7745, "train_positive_log_prob": -81.0751, "train_positive_token_accuracy": 0.0869, "train_positive_token_prob": 0.0329 }, { "contrastive_loss": 0.3118, "epoch": 3.4311512415349887, "grad_norm": 13.071005821228027, "learning_rate": 2.3343567342550933e-06, "lm_loss": 5.4059, "loss": 1.2044, "step": 1520, "text_contrastive_loss": 0.7041, "train_positive_log_prob": -78.6801, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3277, "epoch": 3.4334085778781036, "grad_norm": 12.789834022521973, "learning_rate": 2.328221217735355e-06, "lm_loss": 5.375, "loss": 1.1977, "step": 1521, "text_contrastive_loss": 0.6651, "train_positive_log_prob": -78.9508, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3906, "epoch": 3.435665914221219, "grad_norm": 13.222907066345215, "learning_rate": 2.322091327016597e-06, "lm_loss": 5.3968, "loss": 1.3879, "step": 1522, "text_contrastive_loss": 0.9151, "train_positive_log_prob": -79.7508, "train_positive_token_accuracy": 0.0708, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3174, "epoch": 3.4379232505643342, "grad_norm": 12.321296691894531, "learning_rate": 2.3159670750061563e-06, "lm_loss": 5.4893, "loss": 1.1912, "step": 1523, "text_contrastive_loss": 0.6497, "train_positive_log_prob": -81.9154, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3686, "epoch": 3.440180586907449, "grad_norm": 13.29212474822998, "learning_rate": 2.3098484745994933e-06, "lm_loss": 5.4338, "loss": 1.3172, "step": 1524, "text_contrastive_loss": 0.8103, "train_positive_log_prob": -83.5652, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.333, "epoch": 3.4424379232505644, "grad_norm": 12.273012161254883, "learning_rate": 2.3037355386801683e-06, "lm_loss": 5.4078, "loss": 1.1939, "step": 1525, "text_contrastive_loss": 0.6401, "train_positive_log_prob": -79.3, "train_positive_token_accuracy": 0.086, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.3193, "epoch": 3.4446952595936793, "grad_norm": 12.238722801208496, "learning_rate": 2.2976282801198237e-06, "lm_loss": 5.4413, "loss": 1.1801, "step": 1526, "text_contrastive_loss": 0.6333, "train_positive_log_prob": -80.6191, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4134, "epoch": 3.4469525959367946, "grad_norm": 14.603962898254395, "learning_rate": 2.2915267117781328e-06, "lm_loss": 5.4377, "loss": 1.411, "step": 1527, "text_contrastive_loss": 0.9075, "train_positive_log_prob": -80.6354, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3273, "epoch": 3.44920993227991, "grad_norm": 11.297776222229004, "learning_rate": 2.2854308465027963e-06, "lm_loss": 5.4826, "loss": 1.2509, "step": 1528, "text_contrastive_loss": 0.7507, "train_positive_log_prob": -82.5831, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3296, "epoch": 3.4514672686230248, "grad_norm": 11.965171813964844, "learning_rate": 2.279340697129505e-06, "lm_loss": 5.4535, "loss": 1.2298, "step": 1529, "text_contrastive_loss": 0.7097, "train_positive_log_prob": -81.1854, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.3828, "epoch": 3.45372460496614, "grad_norm": 12.046841621398926, "learning_rate": 2.2732562764819157e-06, "lm_loss": 5.464, "loss": 1.297, "step": 1530, "text_contrastive_loss": 0.7356, "train_positive_log_prob": -81.8761, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.28, "epoch": 3.455981941309255, "grad_norm": 11.561809539794922, "learning_rate": 2.267177597371616e-06, "lm_loss": 5.3392, "loss": 1.1943, "step": 1531, "text_contrastive_loss": 0.7609, "train_positive_log_prob": -81.2562, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3527, "epoch": 3.4582392776523703, "grad_norm": 12.117919921875, "learning_rate": 2.26110467259811e-06, "lm_loss": 5.3512, "loss": 1.28, "step": 1532, "text_contrastive_loss": 0.7843, "train_positive_log_prob": -78.7829, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.3226, "epoch": 3.460496613995485, "grad_norm": 12.698823928833008, "learning_rate": 2.255037514948785e-06, "lm_loss": 5.3925, "loss": 1.2494, "step": 1533, "text_contrastive_loss": 0.7751, "train_positive_log_prob": -79.8815, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.4023, "epoch": 3.4627539503386005, "grad_norm": 14.095758438110352, "learning_rate": 2.2489761371988826e-06, "lm_loss": 5.3957, "loss": 1.3687, "step": 1534, "text_contrastive_loss": 0.8536, "train_positive_log_prob": -81.2942, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3481, "epoch": 3.4650112866817158, "grad_norm": 15.021332740783691, "learning_rate": 2.242920552111473e-06, "lm_loss": 5.4151, "loss": 1.2612, "step": 1535, "text_contrastive_loss": 0.7432, "train_positive_log_prob": -80.8017, "train_positive_token_accuracy": 0.084, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.396, "epoch": 3.4672686230248306, "grad_norm": 13.502098083496094, "learning_rate": 2.236870772437433e-06, "lm_loss": 5.5101, "loss": 1.3803, "step": 1536, "text_contrastive_loss": 0.8665, "train_positive_log_prob": -81.231, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.2971, "epoch": 3.469525959367946, "grad_norm": 12.005428314208984, "learning_rate": 2.2308268109154126e-06, "lm_loss": 5.5129, "loss": 1.2055, "step": 1537, "text_contrastive_loss": 0.7142, "train_positive_log_prob": -82.2874, "train_positive_token_accuracy": 0.0866, "train_positive_token_prob": 0.0326 }, { "contrastive_loss": 0.3754, "epoch": 3.471783295711061, "grad_norm": 13.41446590423584, "learning_rate": 2.224788680271811e-06, "lm_loss": 5.4155, "loss": 1.2911, "step": 1538, "text_contrastive_loss": 0.7482, "train_positive_log_prob": -81.6538, "train_positive_token_accuracy": 0.0736, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3638, "epoch": 3.474040632054176, "grad_norm": 13.16190242767334, "learning_rate": 2.218756393220753e-06, "lm_loss": 5.5326, "loss": 1.3163, "step": 1539, "text_contrastive_loss": 0.7986, "train_positive_log_prob": -81.5605, "train_positive_token_accuracy": 0.0702, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.2733, "epoch": 3.476297968397291, "grad_norm": 10.823956489562988, "learning_rate": 2.212729962464051e-06, "lm_loss": 5.47, "loss": 1.1562, "step": 1540, "text_contrastive_loss": 0.6717, "train_positive_log_prob": -80.3626, "train_positive_token_accuracy": 0.0861, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.405, "epoch": 3.4785553047404063, "grad_norm": 14.038640975952148, "learning_rate": 2.2067094006911943e-06, "lm_loss": 5.3801, "loss": 1.4272, "step": 1541, "text_contrastive_loss": 0.9683, "train_positive_log_prob": -79.5456, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.3287, "epoch": 3.4808126410835216, "grad_norm": 13.235450744628906, "learning_rate": 2.2006947205793107e-06, "lm_loss": 5.3748, "loss": 1.2709, "step": 1542, "text_contrastive_loss": 0.8094, "train_positive_log_prob": -78.2735, "train_positive_token_accuracy": 0.0738, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3925, "epoch": 3.4830699774266365, "grad_norm": 14.204618453979492, "learning_rate": 2.1946859347931442e-06, "lm_loss": 5.3882, "loss": 1.3161, "step": 1543, "text_contrastive_loss": 0.7695, "train_positive_log_prob": -78.5115, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3835, "epoch": 3.485327313769752, "grad_norm": 13.043344497680664, "learning_rate": 2.1886830559850264e-06, "lm_loss": 5.4147, "loss": 1.2988, "step": 1544, "text_contrastive_loss": 0.7475, "train_positive_log_prob": -78.4912, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3383, "epoch": 3.4875846501128667, "grad_norm": 12.718570709228516, "learning_rate": 2.182686096794852e-06, "lm_loss": 5.4734, "loss": 1.2949, "step": 1545, "text_contrastive_loss": 0.8186, "train_positive_log_prob": -81.6073, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3272, "epoch": 3.489841986455982, "grad_norm": 13.003602027893066, "learning_rate": 2.176695069850053e-06, "lm_loss": 5.4558, "loss": 1.2379, "step": 1546, "text_contrastive_loss": 0.7304, "train_positive_log_prob": -79.3602, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.381, "epoch": 3.4920993227990973, "grad_norm": 12.9622802734375, "learning_rate": 2.1707099877655634e-06, "lm_loss": 5.3894, "loss": 1.313, "step": 1547, "text_contrastive_loss": 0.7862, "train_positive_log_prob": -80.6528, "train_positive_token_accuracy": 0.0856, "train_positive_token_prob": 0.0328 }, { "contrastive_loss": 0.2719, "epoch": 3.494356659142212, "grad_norm": 11.515785217285156, "learning_rate": 2.1647308631438068e-06, "lm_loss": 5.355, "loss": 1.1773, "step": 1548, "text_contrastive_loss": 0.7398, "train_positive_log_prob": -79.4634, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3136, "epoch": 3.4966139954853275, "grad_norm": 12.572412490844727, "learning_rate": 2.1587577085746596e-06, "lm_loss": 5.4211, "loss": 1.207, "step": 1549, "text_contrastive_loss": 0.7026, "train_positive_log_prob": -78.3742, "train_positive_token_accuracy": 0.0857, "train_positive_token_prob": 0.0326 }, { "contrastive_loss": 0.4071, "epoch": 3.4988713318284423, "grad_norm": 14.194604873657227, "learning_rate": 2.1527905366354292e-06, "lm_loss": 5.3583, "loss": 1.2807, "step": 1550, "text_contrastive_loss": 0.6754, "train_positive_log_prob": -79.7542, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4247, "epoch": 3.5011286681715577, "grad_norm": 13.476304054260254, "learning_rate": 2.14682935989082e-06, "lm_loss": 5.4014, "loss": 1.3515, "step": 1551, "text_contrastive_loss": 0.7734, "train_positive_log_prob": -78.1968, "train_positive_token_accuracy": 0.0747, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3518, "epoch": 3.5033860045146725, "grad_norm": 13.156872749328613, "learning_rate": 2.14087419089292e-06, "lm_loss": 5.4082, "loss": 1.2955, "step": 1552, "text_contrastive_loss": 0.8059, "train_positive_log_prob": -80.3814, "train_positive_token_accuracy": 0.0739, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.2908, "epoch": 3.505643340857788, "grad_norm": 10.860188484191895, "learning_rate": 2.1349250421811622e-06, "lm_loss": 5.363, "loss": 1.1888, "step": 1553, "text_contrastive_loss": 0.7233, "train_positive_log_prob": -80.6763, "train_positive_token_accuracy": 0.0875, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.2545, "epoch": 3.5079006772009027, "grad_norm": 12.058480262756348, "learning_rate": 2.1289819262823065e-06, "lm_loss": 5.4619, "loss": 1.1261, "step": 1554, "text_contrastive_loss": 0.6509, "train_positive_log_prob": -79.2798, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3596, "epoch": 3.510158013544018, "grad_norm": 13.837039947509766, "learning_rate": 2.1230448557104087e-06, "lm_loss": 5.3523, "loss": 1.2333, "step": 1555, "text_contrastive_loss": 0.677, "train_positive_log_prob": -77.6497, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3374, "epoch": 3.5124153498871333, "grad_norm": 12.251382827758789, "learning_rate": 2.117113842966792e-06, "lm_loss": 5.5174, "loss": 1.2319, "step": 1556, "text_contrastive_loss": 0.6856, "train_positive_log_prob": -81.2086, "train_positive_token_accuracy": 0.0717, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4057, "epoch": 3.514672686230248, "grad_norm": 14.353056907653809, "learning_rate": 2.111188900540028e-06, "lm_loss": 5.4156, "loss": 1.3047, "step": 1557, "text_contrastive_loss": 0.7148, "train_positive_log_prob": -82.061, "train_positive_token_accuracy": 0.0874, "train_positive_token_prob": 0.033 }, { "contrastive_loss": 0.2912, "epoch": 3.5169300225733635, "grad_norm": 11.068564414978027, "learning_rate": 2.1052700409059057e-06, "lm_loss": 5.4744, "loss": 1.1417, "step": 1558, "text_contrastive_loss": 0.6062, "train_positive_log_prob": -82.2836, "train_positive_token_accuracy": 0.0865, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3046, "epoch": 3.5191873589164784, "grad_norm": 13.130361557006836, "learning_rate": 2.0993572765274044e-06, "lm_loss": 5.5494, "loss": 1.2723, "step": 1559, "text_contrastive_loss": 0.8256, "train_positive_log_prob": -81.2457, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.2906, "epoch": 3.5214446952595937, "grad_norm": 12.09571647644043, "learning_rate": 2.093450619854671e-06, "lm_loss": 5.5055, "loss": 1.2892, "step": 1560, "text_contrastive_loss": 0.8961, "train_positive_log_prob": -81.8086, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3227, "epoch": 3.523702031602709, "grad_norm": 13.012192726135254, "learning_rate": 2.08755008332499e-06, "lm_loss": 5.4112, "loss": 1.2437, "step": 1561, "text_contrastive_loss": 0.7597, "train_positive_log_prob": -80.7691, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3763, "epoch": 3.525959367945824, "grad_norm": 12.909019470214844, "learning_rate": 2.0816556793627624e-06, "lm_loss": 5.4735, "loss": 1.3205, "step": 1562, "text_contrastive_loss": 0.7937, "train_positive_log_prob": -82.2685, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.4227, "epoch": 3.528216704288939, "grad_norm": 13.299750328063965, "learning_rate": 2.0757674203794696e-06, "lm_loss": 5.5079, "loss": 1.3434, "step": 1563, "text_contrastive_loss": 0.7399, "train_positive_log_prob": -83.6727, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3216, "epoch": 3.530474040632054, "grad_norm": 12.418523788452148, "learning_rate": 2.06988531877366e-06, "lm_loss": 5.2851, "loss": 1.2102, "step": 1564, "text_contrastive_loss": 0.7203, "train_positive_log_prob": -77.6855, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.363, "epoch": 3.5327313769751694, "grad_norm": 13.458390235900879, "learning_rate": 2.064009386930915e-06, "lm_loss": 5.4871, "loss": 1.335, "step": 1565, "text_contrastive_loss": 0.8465, "train_positive_log_prob": -80.0791, "train_positive_token_accuracy": 0.0865, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.4234, "epoch": 3.5349887133182847, "grad_norm": 14.445470809936523, "learning_rate": 2.0581396372238254e-06, "lm_loss": 5.3556, "loss": 1.3654, "step": 1566, "text_contrastive_loss": 0.813, "train_positive_log_prob": -78.2426, "train_positive_token_accuracy": 0.0861, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.334, "epoch": 3.5372460496613995, "grad_norm": 13.816523551940918, "learning_rate": 2.0522760820119615e-06, "lm_loss": 5.5365, "loss": 1.2447, "step": 1567, "text_contrastive_loss": 0.7139, "train_positive_log_prob": -83.1731, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.476, "epoch": 3.5395033860045144, "grad_norm": 19.811813354492188, "learning_rate": 2.046418733641853e-06, "lm_loss": 5.5185, "loss": 1.467, "step": 1568, "text_contrastive_loss": 0.8782, "train_positive_log_prob": -81.4403, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3066, "epoch": 3.5417607223476297, "grad_norm": 13.256061553955078, "learning_rate": 2.04056760444696e-06, "lm_loss": 5.5971, "loss": 1.2254, "step": 1569, "text_contrastive_loss": 0.7181, "train_positive_log_prob": -86.1801, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3382, "epoch": 3.544018058690745, "grad_norm": 12.53451919555664, "learning_rate": 2.0347227067476478e-06, "lm_loss": 5.3916, "loss": 1.3546, "step": 1570, "text_contrastive_loss": 0.9546, "train_positive_log_prob": -78.4316, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.2651, "epoch": 3.54627539503386, "grad_norm": 10.973885536193848, "learning_rate": 2.02888405285116e-06, "lm_loss": 5.453, "loss": 1.1802, "step": 1571, "text_contrastive_loss": 0.7395, "train_positive_log_prob": -78.8148, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0292 }, { "contrastive_loss": 0.4255, "epoch": 3.5485327313769752, "grad_norm": 13.16361141204834, "learning_rate": 2.02305165505159e-06, "lm_loss": 5.4203, "loss": 1.3472, "step": 1572, "text_contrastive_loss": 0.7595, "train_positive_log_prob": -80.8616, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.4142, "epoch": 3.55079006772009, "grad_norm": 12.9069242477417, "learning_rate": 2.0172255256298623e-06, "lm_loss": 5.4061, "loss": 1.3569, "step": 1573, "text_contrastive_loss": 0.8041, "train_positive_log_prob": -80.7105, "train_positive_token_accuracy": 0.0824, "train_positive_token_prob": 0.0326 }, { "contrastive_loss": 0.2933, "epoch": 3.5530474040632054, "grad_norm": 11.150215148925781, "learning_rate": 2.0114056768537005e-06, "lm_loss": 5.3737, "loss": 1.1758, "step": 1574, "text_contrastive_loss": 0.6903, "train_positive_log_prob": -80.7757, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3092, "epoch": 3.5553047404063207, "grad_norm": 12.42226791381836, "learning_rate": 2.005592120977606e-06, "lm_loss": 5.4272, "loss": 1.2342, "step": 1575, "text_contrastive_loss": 0.7646, "train_positive_log_prob": -78.9682, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.4042, "epoch": 3.5575620767494356, "grad_norm": 14.214899063110352, "learning_rate": 1.9997848702428226e-06, "lm_loss": 5.367, "loss": 1.4012, "step": 1576, "text_contrastive_loss": 0.9206, "train_positive_log_prob": -79.9787, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.4442, "epoch": 3.559819413092551, "grad_norm": 14.875078201293945, "learning_rate": 1.9939839368773267e-06, "lm_loss": 5.6141, "loss": 1.4326, "step": 1577, "text_contrastive_loss": 0.854, "train_positive_log_prob": -82.4555, "train_positive_token_accuracy": 0.0684, "train_positive_token_prob": 0.0285 }, { "contrastive_loss": 0.4697, "epoch": 3.5620767494356658, "grad_norm": 16.47623634338379, "learning_rate": 1.9881893330957893e-06, "lm_loss": 5.4831, "loss": 1.5145, "step": 1578, "text_contrastive_loss": 0.993, "train_positive_log_prob": -81.1563, "train_positive_token_accuracy": 0.0839, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.2458, "epoch": 3.564334085778781, "grad_norm": 11.240406036376953, "learning_rate": 1.982401071099549e-06, "lm_loss": 5.398, "loss": 1.2106, "step": 1579, "text_contrastive_loss": 0.85, "train_positive_log_prob": -82.7413, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.4033, "epoch": 3.5665914221218964, "grad_norm": 12.894579887390137, "learning_rate": 1.9766191630765964e-06, "lm_loss": 5.5431, "loss": 1.3649, "step": 1580, "text_contrastive_loss": 0.8146, "train_positive_log_prob": -82.0647, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.247, "epoch": 3.5688487584650113, "grad_norm": 9.906365394592285, "learning_rate": 1.970843621201541e-06, "lm_loss": 5.4329, "loss": 1.1266, "step": 1581, "text_contrastive_loss": 0.6727, "train_positive_log_prob": -79.608, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3531, "epoch": 3.5711060948081266, "grad_norm": 12.115705490112305, "learning_rate": 1.9650744576355894e-06, "lm_loss": 5.4785, "loss": 1.2599, "step": 1582, "text_contrastive_loss": 0.7179, "train_positive_log_prob": -81.0406, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.4402, "epoch": 3.5733634311512414, "grad_norm": 13.983465194702148, "learning_rate": 1.959311684526513e-06, "lm_loss": 5.51, "loss": 1.4032, "step": 1583, "text_contrastive_loss": 0.824, "train_positive_log_prob": -84.3919, "train_positive_token_accuracy": 0.0824, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.375, "epoch": 3.5756207674943568, "grad_norm": 14.502907752990723, "learning_rate": 1.9535553140086322e-06, "lm_loss": 5.3925, "loss": 1.332, "step": 1584, "text_contrastive_loss": 0.8354, "train_positive_log_prob": -79.2664, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3706, "epoch": 3.5778781038374716, "grad_norm": 13.97586441040039, "learning_rate": 1.9478053582027826e-06, "lm_loss": 5.4038, "loss": 1.3105, "step": 1585, "text_contrastive_loss": 0.7989, "train_positive_log_prob": -78.4822, "train_positive_token_accuracy": 0.0842, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4562, "epoch": 3.580135440180587, "grad_norm": 13.074638366699219, "learning_rate": 1.9420618292162974e-06, "lm_loss": 5.3254, "loss": 1.4316, "step": 1586, "text_contrastive_loss": 0.8857, "train_positive_log_prob": -80.0487, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3422, "epoch": 3.582392776523702, "grad_norm": 12.996149063110352, "learning_rate": 1.9363247391429695e-06, "lm_loss": 5.4479, "loss": 1.2868, "step": 1587, "text_contrastive_loss": 0.7995, "train_positive_log_prob": -81.6767, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3504, "epoch": 3.584650112866817, "grad_norm": 11.976738929748535, "learning_rate": 1.93059410006304e-06, "lm_loss": 5.5108, "loss": 1.2936, "step": 1588, "text_contrastive_loss": 0.7843, "train_positive_log_prob": -83.397, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3133, "epoch": 3.5869074492099324, "grad_norm": 11.291887283325195, "learning_rate": 1.924869924043165e-06, "lm_loss": 5.395, "loss": 1.2656, "step": 1589, "text_contrastive_loss": 0.8256, "train_positive_log_prob": -80.3596, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3633, "epoch": 3.5891647855530473, "grad_norm": 12.659879684448242, "learning_rate": 1.919152223136391e-06, "lm_loss": 5.439, "loss": 1.3022, "step": 1590, "text_contrastive_loss": 0.7901, "train_positive_log_prob": -80.862, "train_positive_token_accuracy": 0.0793, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3369, "epoch": 3.5914221218961626, "grad_norm": 14.898165702819824, "learning_rate": 1.913441009382133e-06, "lm_loss": 5.3437, "loss": 1.2725, "step": 1591, "text_contrastive_loss": 0.8025, "train_positive_log_prob": -79.0059, "train_positive_token_accuracy": 0.0859, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3797, "epoch": 3.5936794582392775, "grad_norm": 12.831680297851562, "learning_rate": 1.9077362948061404e-06, "lm_loss": 5.3652, "loss": 1.3208, "step": 1592, "text_contrastive_loss": 0.8091, "train_positive_log_prob": -79.7247, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4952, "epoch": 3.595936794582393, "grad_norm": 13.396096229553223, "learning_rate": 1.902038091420481e-06, "lm_loss": 5.4346, "loss": 1.4233, "step": 1593, "text_contrastive_loss": 0.7693, "train_positive_log_prob": -79.5267, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3941, "epoch": 3.598194130925508, "grad_norm": 12.79609203338623, "learning_rate": 1.8963464112235185e-06, "lm_loss": 5.4697, "loss": 1.3875, "step": 1594, "text_contrastive_loss": 0.8929, "train_positive_log_prob": -79.6926, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4476, "epoch": 3.600451467268623, "grad_norm": 16.410837173461914, "learning_rate": 1.8906612661998698e-06, "lm_loss": 5.4431, "loss": 1.3496, "step": 1595, "text_contrastive_loss": 0.7154, "train_positive_log_prob": -80.6089, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3113, "epoch": 3.6027088036117383, "grad_norm": 12.252687454223633, "learning_rate": 1.884982668320398e-06, "lm_loss": 5.4839, "loss": 1.2253, "step": 1596, "text_contrastive_loss": 0.7312, "train_positive_log_prob": -78.9801, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3348, "epoch": 3.604966139954853, "grad_norm": 12.842511177062988, "learning_rate": 1.8793106295421797e-06, "lm_loss": 5.3726, "loss": 1.2057, "step": 1597, "text_contrastive_loss": 0.6672, "train_positive_log_prob": -77.9063, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.393, "epoch": 3.6072234762979685, "grad_norm": 14.63471794128418, "learning_rate": 1.873645161808481e-06, "lm_loss": 5.366, "loss": 1.3704, "step": 1598, "text_contrastive_loss": 0.8817, "train_positive_log_prob": -78.3344, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3527, "epoch": 3.609480812641084, "grad_norm": 12.025533676147461, "learning_rate": 1.8679862770487273e-06, "lm_loss": 5.3853, "loss": 1.2841, "step": 1599, "text_contrastive_loss": 0.7857, "train_positive_log_prob": -78.1483, "train_positive_token_accuracy": 0.0851, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3843, "epoch": 3.6117381489841986, "grad_norm": 12.612031936645508, "learning_rate": 1.8623339871784869e-06, "lm_loss": 5.4367, "loss": 1.3268, "step": 1600, "text_contrastive_loss": 0.7976, "train_positive_log_prob": -82.7199, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.4264, "epoch": 3.6139954853273135, "grad_norm": 13.698080062866211, "learning_rate": 1.8566883040994411e-06, "lm_loss": 5.4968, "loss": 1.4221, "step": 1601, "text_contrastive_loss": 0.8921, "train_positive_log_prob": -82.1514, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3204, "epoch": 3.616252821670429, "grad_norm": 11.818224906921387, "learning_rate": 1.8510492396993595e-06, "lm_loss": 5.4532, "loss": 1.2383, "step": 1602, "text_contrastive_loss": 0.7451, "train_positive_log_prob": -83.2307, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.4045, "epoch": 3.618510158013544, "grad_norm": 13.190351486206055, "learning_rate": 1.8454168058520732e-06, "lm_loss": 5.505, "loss": 1.395, "step": 1603, "text_contrastive_loss": 0.8799, "train_positive_log_prob": -80.2099, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3406, "epoch": 3.620767494356659, "grad_norm": 12.244832038879395, "learning_rate": 1.8397910144174536e-06, "lm_loss": 5.3262, "loss": 1.2494, "step": 1604, "text_contrastive_loss": 0.7522, "train_positive_log_prob": -78.8156, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3156, "epoch": 3.6230248306997743, "grad_norm": 11.526841163635254, "learning_rate": 1.8341718772413852e-06, "lm_loss": 5.3288, "loss": 1.164, "step": 1605, "text_contrastive_loss": 0.6311, "train_positive_log_prob": -77.2912, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.4301, "epoch": 3.625282167042889, "grad_norm": 12.750216484069824, "learning_rate": 1.8285594061557421e-06, "lm_loss": 5.3824, "loss": 1.3957, "step": 1606, "text_contrastive_loss": 0.8547, "train_positive_log_prob": -80.4866, "train_positive_token_accuracy": 0.0836, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.35, "epoch": 3.6275395033860045, "grad_norm": 13.768596649169922, "learning_rate": 1.822953612978362e-06, "lm_loss": 5.5162, "loss": 1.2777, "step": 1607, "text_contrastive_loss": 0.7523, "train_positive_log_prob": -83.8653, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.405, "epoch": 3.62979683972912, "grad_norm": 12.813020706176758, "learning_rate": 1.817354509513017e-06, "lm_loss": 5.3793, "loss": 1.3395, "step": 1608, "text_contrastive_loss": 0.7932, "train_positive_log_prob": -80.2028, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3253, "epoch": 3.6320541760722347, "grad_norm": 12.3479585647583, "learning_rate": 1.8117621075493979e-06, "lm_loss": 5.4163, "loss": 1.2278, "step": 1609, "text_contrastive_loss": 0.7216, "train_positive_log_prob": -81.3096, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3627, "epoch": 3.63431151241535, "grad_norm": 11.790746688842773, "learning_rate": 1.8061764188630831e-06, "lm_loss": 5.3681, "loss": 1.3255, "step": 1610, "text_contrastive_loss": 0.852, "train_positive_log_prob": -79.1585, "train_positive_token_accuracy": 0.0832, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.2754, "epoch": 3.636568848758465, "grad_norm": 12.158080101013184, "learning_rate": 1.8005974552155158e-06, "lm_loss": 5.4961, "loss": 1.1475, "step": 1611, "text_contrastive_loss": 0.645, "train_positive_log_prob": -81.9944, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3996, "epoch": 3.63882618510158, "grad_norm": 12.607054710388184, "learning_rate": 1.7950252283539776e-06, "lm_loss": 5.5635, "loss": 1.389, "step": 1612, "text_contrastive_loss": 0.8661, "train_positive_log_prob": -83.1741, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3213, "epoch": 3.6410835214446955, "grad_norm": 12.518956184387207, "learning_rate": 1.7894597500115657e-06, "lm_loss": 5.4323, "loss": 1.2948, "step": 1613, "text_contrastive_loss": 0.8607, "train_positive_log_prob": -80.6768, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4059, "epoch": 3.6433408577878104, "grad_norm": 15.735977172851562, "learning_rate": 1.7839010319071687e-06, "lm_loss": 5.3952, "loss": 1.3833, "step": 1614, "text_contrastive_loss": 0.8759, "train_positive_log_prob": -79.9137, "train_positive_token_accuracy": 0.0736, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3427, "epoch": 3.6455981941309257, "grad_norm": 13.345383644104004, "learning_rate": 1.7783490857454354e-06, "lm_loss": 5.5722, "loss": 1.3157, "step": 1615, "text_contrastive_loss": 0.8315, "train_positive_log_prob": -81.996, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.3228, "epoch": 3.6478555304740405, "grad_norm": 11.710400581359863, "learning_rate": 1.7728039232167603e-06, "lm_loss": 5.4267, "loss": 1.2269, "step": 1616, "text_contrastive_loss": 0.7228, "train_positive_log_prob": -80.3139, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3458, "epoch": 3.650112866817156, "grad_norm": 13.129372596740723, "learning_rate": 1.7672655559972535e-06, "lm_loss": 5.3303, "loss": 1.2842, "step": 1617, "text_contrastive_loss": 0.8108, "train_positive_log_prob": -78.4302, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.2252, "epoch": 3.6523702031602707, "grad_norm": 11.039880752563477, "learning_rate": 1.7617339957487167e-06, "lm_loss": 5.4686, "loss": 1.1095, "step": 1618, "text_contrastive_loss": 0.675, "train_positive_log_prob": -82.1572, "train_positive_token_accuracy": 0.0728, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3163, "epoch": 3.654627539503386, "grad_norm": 12.812634468078613, "learning_rate": 1.7562092541186144e-06, "lm_loss": 5.4963, "loss": 1.2151, "step": 1619, "text_contrastive_loss": 0.6984, "train_positive_log_prob": -82.1396, "train_positive_token_accuracy": 0.0737, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.3392, "epoch": 3.656884875846501, "grad_norm": 12.152909278869629, "learning_rate": 1.750691342740058e-06, "lm_loss": 5.4037, "loss": 1.256, "step": 1620, "text_contrastive_loss": 0.7528, "train_positive_log_prob": -78.6081, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.4045, "epoch": 3.659142212189616, "grad_norm": 13.417214393615723, "learning_rate": 1.7451802732317763e-06, "lm_loss": 5.3755, "loss": 1.2903, "step": 1621, "text_contrastive_loss": 0.6964, "train_positive_log_prob": -81.431, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.2638, "epoch": 3.6613995485327315, "grad_norm": 12.613409996032715, "learning_rate": 1.7396760571980902e-06, "lm_loss": 5.3969, "loss": 1.1343, "step": 1622, "text_contrastive_loss": 0.6617, "train_positive_log_prob": -80.4469, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.323, "epoch": 3.6636568848758464, "grad_norm": 12.255485534667969, "learning_rate": 1.7341787062288928e-06, "lm_loss": 5.4692, "loss": 1.2179, "step": 1623, "text_contrastive_loss": 0.6959, "train_positive_log_prob": -81.6074, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3705, "epoch": 3.6659142212189617, "grad_norm": 12.568070411682129, "learning_rate": 1.7286882318996162e-06, "lm_loss": 5.4433, "loss": 1.3011, "step": 1624, "text_contrastive_loss": 0.7726, "train_positive_log_prob": -80.1824, "train_positive_token_accuracy": 0.0833, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3608, "epoch": 3.6681715575620766, "grad_norm": 13.158514976501465, "learning_rate": 1.7232046457712164e-06, "lm_loss": 5.3833, "loss": 1.2496, "step": 1625, "text_contrastive_loss": 0.701, "train_positive_log_prob": -79.0456, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4922, "epoch": 3.670428893905192, "grad_norm": 14.625532150268555, "learning_rate": 1.7177279593901463e-06, "lm_loss": 5.4737, "loss": 1.4734, "step": 1626, "text_contrastive_loss": 0.8676, "train_positive_log_prob": -83.0258, "train_positive_token_accuracy": 0.085, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.3635, "epoch": 3.672686230248307, "grad_norm": 12.785755157470703, "learning_rate": 1.712258184288328e-06, "lm_loss": 5.4296, "loss": 1.3403, "step": 1627, "text_contrastive_loss": 0.8677, "train_positive_log_prob": -80.3248, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3955, "epoch": 3.674943566591422, "grad_norm": 13.993976593017578, "learning_rate": 1.7067953319831327e-06, "lm_loss": 5.2966, "loss": 1.3552, "step": 1628, "text_contrastive_loss": 0.86, "train_positive_log_prob": -77.5487, "train_positive_token_accuracy": 0.0846, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.4406, "epoch": 3.6772009029345374, "grad_norm": 13.542219161987305, "learning_rate": 1.7013394139773537e-06, "lm_loss": 5.4495, "loss": 1.4451, "step": 1629, "text_contrastive_loss": 0.9192, "train_positive_log_prob": -80.5491, "train_positive_token_accuracy": 0.0872, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3337, "epoch": 3.6794582392776523, "grad_norm": 13.1850004196167, "learning_rate": 1.6958904417591853e-06, "lm_loss": 5.512, "loss": 1.304, "step": 1630, "text_contrastive_loss": 0.8381, "train_positive_log_prob": -83.4348, "train_positive_token_accuracy": 0.0815, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.4008, "epoch": 3.6817155756207676, "grad_norm": 15.749434471130371, "learning_rate": 1.6904484268021915e-06, "lm_loss": 5.4665, "loss": 1.37, "step": 1631, "text_contrastive_loss": 0.8451, "train_positive_log_prob": -82.7882, "train_positive_token_accuracy": 0.0818, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3308, "epoch": 3.683972911963883, "grad_norm": 12.142971992492676, "learning_rate": 1.6850133805652907e-06, "lm_loss": 5.5244, "loss": 1.3, "step": 1632, "text_contrastive_loss": 0.8335, "train_positive_log_prob": -81.2679, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.2907, "epoch": 3.6862302483069977, "grad_norm": 11.403225898742676, "learning_rate": 1.6795853144927282e-06, "lm_loss": 5.5715, "loss": 1.2479, "step": 1633, "text_contrastive_loss": 0.8001, "train_positive_log_prob": -82.9844, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.288, "epoch": 3.6884875846501126, "grad_norm": 11.208388328552246, "learning_rate": 1.6741642400140513e-06, "lm_loss": 5.3952, "loss": 1.1811, "step": 1634, "text_contrastive_loss": 0.7071, "train_positive_log_prob": -81.3046, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.2854, "epoch": 3.690744920993228, "grad_norm": 11.621232032775879, "learning_rate": 1.668750168544081e-06, "lm_loss": 5.4768, "loss": 1.2199, "step": 1635, "text_contrastive_loss": 0.7738, "train_positive_log_prob": -80.2905, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.2686, "epoch": 3.6930022573363432, "grad_norm": 12.664349555969238, "learning_rate": 1.663343111482898e-06, "lm_loss": 5.3322, "loss": 1.1795, "step": 1636, "text_contrastive_loss": 0.7554, "train_positive_log_prob": -77.382, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3965, "epoch": 3.695259593679458, "grad_norm": 14.684317588806152, "learning_rate": 1.657943080215812e-06, "lm_loss": 5.3943, "loss": 1.3464, "step": 1637, "text_contrastive_loss": 0.821, "train_positive_log_prob": -80.0894, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3085, "epoch": 3.6975169300225734, "grad_norm": 11.034719467163086, "learning_rate": 1.6525500861133386e-06, "lm_loss": 5.572, "loss": 1.2273, "step": 1638, "text_contrastive_loss": 0.7231, "train_positive_log_prob": -81.7108, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3509, "epoch": 3.6997742663656883, "grad_norm": 12.65117359161377, "learning_rate": 1.6471641405311727e-06, "lm_loss": 5.2876, "loss": 1.2707, "step": 1639, "text_contrastive_loss": 0.782, "train_positive_log_prob": -78.7163, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3723, "epoch": 3.7020316027088036, "grad_norm": 13.441893577575684, "learning_rate": 1.641785254810172e-06, "lm_loss": 5.4307, "loss": 1.3708, "step": 1640, "text_contrastive_loss": 0.9108, "train_positive_log_prob": -82.3086, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3514, "epoch": 3.704288939051919, "grad_norm": 12.292375564575195, "learning_rate": 1.636413440276326e-06, "lm_loss": 5.3901, "loss": 1.3336, "step": 1641, "text_contrastive_loss": 0.8864, "train_positive_log_prob": -82.3257, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.033 }, { "contrastive_loss": 0.3296, "epoch": 3.706546275395034, "grad_norm": 10.76636791229248, "learning_rate": 1.631048708240736e-06, "lm_loss": 5.5827, "loss": 1.2695, "step": 1642, "text_contrastive_loss": 0.7634, "train_positive_log_prob": -83.3671, "train_positive_token_accuracy": 0.0735, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3658, "epoch": 3.708803611738149, "grad_norm": 12.98524284362793, "learning_rate": 1.6256910699995921e-06, "lm_loss": 5.3857, "loss": 1.2606, "step": 1643, "text_contrastive_loss": 0.7125, "train_positive_log_prob": -79.5867, "train_positive_token_accuracy": 0.0814, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.2739, "epoch": 3.711060948081264, "grad_norm": 11.67389965057373, "learning_rate": 1.620340536834139e-06, "lm_loss": 5.4435, "loss": 1.1752, "step": 1644, "text_contrastive_loss": 0.7139, "train_positive_log_prob": -80.2704, "train_positive_token_accuracy": 0.0879, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3485, "epoch": 3.7133182844243793, "grad_norm": 13.924629211425781, "learning_rate": 1.6149971200106723e-06, "lm_loss": 5.3682, "loss": 1.3164, "step": 1645, "text_contrastive_loss": 0.8623, "train_positive_log_prob": -78.9962, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3264, "epoch": 3.7155756207674946, "grad_norm": 12.823797225952148, "learning_rate": 1.6096608307804973e-06, "lm_loss": 5.3535, "loss": 1.2174, "step": 1646, "text_contrastive_loss": 0.7113, "train_positive_log_prob": -77.8711, "train_positive_token_accuracy": 0.0738, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3062, "epoch": 3.7178329571106095, "grad_norm": 10.675153732299805, "learning_rate": 1.604331680379908e-06, "lm_loss": 5.4783, "loss": 1.164, "step": 1647, "text_contrastive_loss": 0.6199, "train_positive_log_prob": -82.2805, "train_positive_token_accuracy": 0.0793, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.2938, "epoch": 3.7200902934537243, "grad_norm": 12.304091453552246, "learning_rate": 1.599009680030173e-06, "lm_loss": 5.5528, "loss": 1.287, "step": 1648, "text_contrastive_loss": 0.8758, "train_positive_log_prob": -83.4886, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3439, "epoch": 3.7223476297968396, "grad_norm": 14.056093215942383, "learning_rate": 1.5936948409375007e-06, "lm_loss": 5.4472, "loss": 1.3349, "step": 1649, "text_contrastive_loss": 0.8927, "train_positive_log_prob": -81.1791, "train_positive_token_accuracy": 0.0868, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.4163, "epoch": 3.724604966139955, "grad_norm": 14.787209510803223, "learning_rate": 1.5883871742930257e-06, "lm_loss": 5.5042, "loss": 1.332, "step": 1650, "text_contrastive_loss": 0.7305, "train_positive_log_prob": -81.7026, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.33, "epoch": 3.72686230248307, "grad_norm": 11.547781944274902, "learning_rate": 1.5830866912727722e-06, "lm_loss": 5.4291, "loss": 1.2499, "step": 1651, "text_contrastive_loss": 0.754, "train_positive_log_prob": -81.3766, "train_positive_token_accuracy": 0.0847, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3267, "epoch": 3.729119638826185, "grad_norm": 13.305439949035645, "learning_rate": 1.5777934030376445e-06, "lm_loss": 5.3766, "loss": 1.2505, "step": 1652, "text_contrastive_loss": 0.7724, "train_positive_log_prob": -81.0454, "train_positive_token_accuracy": 0.085, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.2939, "epoch": 3.7313769751693, "grad_norm": 12.66342830657959, "learning_rate": 1.5725073207333963e-06, "lm_loss": 5.3784, "loss": 1.2235, "step": 1653, "text_contrastive_loss": 0.7835, "train_positive_log_prob": -82.6539, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4104, "epoch": 3.7336343115124153, "grad_norm": 15.684741020202637, "learning_rate": 1.5672284554906087e-06, "lm_loss": 5.4047, "loss": 1.2856, "step": 1654, "text_contrastive_loss": 0.6696, "train_positive_log_prob": -80.0549, "train_positive_token_accuracy": 0.0741, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4281, "epoch": 3.7358916478555306, "grad_norm": 13.89198112487793, "learning_rate": 1.561956818424661e-06, "lm_loss": 5.4007, "loss": 1.2978, "step": 1655, "text_contrastive_loss": 0.6592, "train_positive_log_prob": -79.3625, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3196, "epoch": 3.7381489841986455, "grad_norm": 13.032612800598145, "learning_rate": 1.5566924206357187e-06, "lm_loss": 5.4677, "loss": 1.1883, "step": 1656, "text_contrastive_loss": 0.6438, "train_positive_log_prob": -80.6727, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3218, "epoch": 3.740406320541761, "grad_norm": 12.352042198181152, "learning_rate": 1.5514352732087024e-06, "lm_loss": 5.4245, "loss": 1.1817, "step": 1657, "text_contrastive_loss": 0.6349, "train_positive_log_prob": -80.8585, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4089, "epoch": 3.7426636568848757, "grad_norm": 12.809606552124023, "learning_rate": 1.5461853872132648e-06, "lm_loss": 5.3186, "loss": 1.357, "step": 1658, "text_contrastive_loss": 0.8324, "train_positive_log_prob": -78.6184, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.332, "epoch": 3.744920993227991, "grad_norm": 11.478001594543457, "learning_rate": 1.5409427737037713e-06, "lm_loss": 5.3247, "loss": 1.2684, "step": 1659, "text_contrastive_loss": 0.8079, "train_positive_log_prob": -79.268, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.323, "epoch": 3.7471783295711063, "grad_norm": 12.333276748657227, "learning_rate": 1.5357074437192688e-06, "lm_loss": 5.4138, "loss": 1.2353, "step": 1660, "text_contrastive_loss": 0.7418, "train_positive_log_prob": -78.9674, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.328, "epoch": 3.749435665914221, "grad_norm": 12.25515079498291, "learning_rate": 1.5304794082834713e-06, "lm_loss": 5.4627, "loss": 1.2554, "step": 1661, "text_contrastive_loss": 0.7624, "train_positive_log_prob": -81.5765, "train_positive_token_accuracy": 0.0829, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3308, "epoch": 3.7516930022573365, "grad_norm": 12.769706726074219, "learning_rate": 1.5252586784047374e-06, "lm_loss": 5.51, "loss": 1.3018, "step": 1662, "text_contrastive_loss": 0.84, "train_positive_log_prob": -81.1442, "train_positive_token_accuracy": 0.0857, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3141, "epoch": 3.7539503386004514, "grad_norm": 12.316630363464355, "learning_rate": 1.520045265076034e-06, "lm_loss": 5.4767, "loss": 1.1797, "step": 1663, "text_contrastive_loss": 0.6358, "train_positive_log_prob": -80.6773, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3972, "epoch": 3.7562076749435667, "grad_norm": 13.816545486450195, "learning_rate": 1.5148391792749272e-06, "lm_loss": 5.3845, "loss": 1.3507, "step": 1664, "text_contrastive_loss": 0.83, "train_positive_log_prob": -78.4986, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3861, "epoch": 3.758465011286682, "grad_norm": 13.981588363647461, "learning_rate": 1.5096404319635533e-06, "lm_loss": 5.2765, "loss": 1.3035, "step": 1665, "text_contrastive_loss": 0.7795, "train_positive_log_prob": -76.9203, "train_positive_token_accuracy": 0.0898, "train_positive_token_prob": 0.033 }, { "contrastive_loss": 0.2476, "epoch": 3.760722347629797, "grad_norm": 11.404553413391113, "learning_rate": 1.5044490340885987e-06, "lm_loss": 5.3712, "loss": 1.074, "step": 1666, "text_contrastive_loss": 0.5786, "train_positive_log_prob": -79.924, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.457, "epoch": 3.7629796839729117, "grad_norm": 15.401835441589355, "learning_rate": 1.4992649965812673e-06, "lm_loss": 5.3711, "loss": 1.4998, "step": 1667, "text_contrastive_loss": 1.0114, "train_positive_log_prob": -76.517, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.3473, "epoch": 3.765237020316027, "grad_norm": 10.748358726501465, "learning_rate": 1.4940883303572724e-06, "lm_loss": 5.3974, "loss": 1.2184, "step": 1668, "text_contrastive_loss": 0.6627, "train_positive_log_prob": -81.0664, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.4122, "epoch": 3.7674943566591423, "grad_norm": 13.811262130737305, "learning_rate": 1.4889190463168019e-06, "lm_loss": 5.3403, "loss": 1.322, "step": 1669, "text_contrastive_loss": 0.7516, "train_positive_log_prob": -79.8809, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.35, "epoch": 3.769751693002257, "grad_norm": 12.622626304626465, "learning_rate": 1.483757155344503e-06, "lm_loss": 5.5095, "loss": 1.3177, "step": 1670, "text_contrastive_loss": 0.8334, "train_positive_log_prob": -80.8839, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3512, "epoch": 3.7720090293453725, "grad_norm": 12.186516761779785, "learning_rate": 1.47860266830945e-06, "lm_loss": 5.4187, "loss": 1.2653, "step": 1671, "text_contrastive_loss": 0.7445, "train_positive_log_prob": -80.6886, "train_positive_token_accuracy": 0.0829, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.2086, "epoch": 3.7742663656884874, "grad_norm": 9.920574188232422, "learning_rate": 1.473455596065133e-06, "lm_loss": 5.4306, "loss": 1.0939, "step": 1672, "text_contrastive_loss": 0.6845, "train_positive_log_prob": -80.9688, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3392, "epoch": 3.7765237020316027, "grad_norm": 13.45047378540039, "learning_rate": 1.4683159494494259e-06, "lm_loss": 5.4241, "loss": 1.2624, "step": 1673, "text_contrastive_loss": 0.7617, "train_positive_log_prob": -79.3485, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4582, "epoch": 3.778781038374718, "grad_norm": 13.828205108642578, "learning_rate": 1.4631837392845694e-06, "lm_loss": 5.4319, "loss": 1.4233, "step": 1674, "text_contrastive_loss": 0.8439, "train_positive_log_prob": -81.7987, "train_positive_token_accuracy": 0.0744, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.315, "epoch": 3.781038374717833, "grad_norm": 11.94735336303711, "learning_rate": 1.4580589763771413e-06, "lm_loss": 5.4159, "loss": 1.2562, "step": 1675, "text_contrastive_loss": 0.7992, "train_positive_log_prob": -80.2065, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3714, "epoch": 3.783295711060948, "grad_norm": 12.656200408935547, "learning_rate": 1.4529416715180434e-06, "lm_loss": 5.364, "loss": 1.2811, "step": 1676, "text_contrastive_loss": 0.7466, "train_positive_log_prob": -80.126, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.2728, "epoch": 3.785553047404063, "grad_norm": 11.234992027282715, "learning_rate": 1.44783183548247e-06, "lm_loss": 5.4339, "loss": 1.1794, "step": 1677, "text_contrastive_loss": 0.7265, "train_positive_log_prob": -80.6438, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.25, "epoch": 3.7878103837471784, "grad_norm": 11.732927322387695, "learning_rate": 1.4427294790298902e-06, "lm_loss": 5.4708, "loss": 1.1064, "step": 1678, "text_contrastive_loss": 0.6186, "train_positive_log_prob": -81.0598, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3426, "epoch": 3.7900677200902937, "grad_norm": 12.424903869628906, "learning_rate": 1.4376346129040243e-06, "lm_loss": 5.4416, "loss": 1.2513, "step": 1679, "text_contrastive_loss": 0.7291, "train_positive_log_prob": -79.2237, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3115, "epoch": 3.7923250564334086, "grad_norm": 12.56497859954834, "learning_rate": 1.432547247832819e-06, "lm_loss": 5.417, "loss": 1.2445, "step": 1680, "text_contrastive_loss": 0.7825, "train_positive_log_prob": -79.5602, "train_positive_token_accuracy": 0.0742, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3519, "epoch": 3.7945823927765234, "grad_norm": 14.999197959899902, "learning_rate": 1.4274673945284278e-06, "lm_loss": 5.364, "loss": 1.2122, "step": 1681, "text_contrastive_loss": 0.6477, "train_positive_log_prob": -80.2879, "train_positive_token_accuracy": 0.0856, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3976, "epoch": 3.7968397291196387, "grad_norm": 13.894376754760742, "learning_rate": 1.422395063687188e-06, "lm_loss": 5.4293, "loss": 1.304, "step": 1682, "text_contrastive_loss": 0.7269, "train_positive_log_prob": -80.506, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.4057, "epoch": 3.799097065462754, "grad_norm": 16.181882858276367, "learning_rate": 1.4173302659895938e-06, "lm_loss": 5.462, "loss": 1.2965, "step": 1683, "text_contrastive_loss": 0.6891, "train_positive_log_prob": -80.4106, "train_positive_token_accuracy": 0.0844, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3992, "epoch": 3.801354401805869, "grad_norm": 14.336645126342773, "learning_rate": 1.4122730121002808e-06, "lm_loss": 5.4631, "loss": 1.3213, "step": 1684, "text_contrastive_loss": 0.7515, "train_positive_log_prob": -81.6489, "train_positive_token_accuracy": 0.0831, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.2964, "epoch": 3.8036117381489842, "grad_norm": 12.844280242919922, "learning_rate": 1.4072233126679985e-06, "lm_loss": 5.5475, "loss": 1.1546, "step": 1685, "text_contrastive_loss": 0.607, "train_positive_log_prob": -82.3203, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3262, "epoch": 3.805869074492099, "grad_norm": 12.758700370788574, "learning_rate": 1.4021811783255912e-06, "lm_loss": 5.5363, "loss": 1.271, "step": 1686, "text_contrastive_loss": 0.7824, "train_positive_log_prob": -80.8609, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.396, "epoch": 3.8081264108352144, "grad_norm": 14.34169864654541, "learning_rate": 1.3971466196899697e-06, "lm_loss": 5.5657, "loss": 1.3342, "step": 1687, "text_contrastive_loss": 0.7632, "train_positive_log_prob": -81.226, "train_positive_token_accuracy": 0.0739, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3609, "epoch": 3.8103837471783297, "grad_norm": 12.53060531616211, "learning_rate": 1.3921196473620975e-06, "lm_loss": 5.4392, "loss": 1.2955, "step": 1688, "text_contrastive_loss": 0.7814, "train_positive_log_prob": -80.1826, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.299, "epoch": 3.8126410835214446, "grad_norm": 12.580927848815918, "learning_rate": 1.3871002719269616e-06, "lm_loss": 5.4511, "loss": 1.2568, "step": 1689, "text_contrastive_loss": 0.8254, "train_positive_log_prob": -78.8264, "train_positive_token_accuracy": 0.0851, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3082, "epoch": 3.81489841986456, "grad_norm": 12.516457557678223, "learning_rate": 1.3820885039535564e-06, "lm_loss": 5.3756, "loss": 1.1959, "step": 1690, "text_contrastive_loss": 0.7003, "train_positive_log_prob": -79.8363, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3517, "epoch": 3.8171557562076748, "grad_norm": 14.051484107971191, "learning_rate": 1.3770843539948508e-06, "lm_loss": 5.5623, "loss": 1.2839, "step": 1691, "text_contrastive_loss": 0.7519, "train_positive_log_prob": -81.8257, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3826, "epoch": 3.81941309255079, "grad_norm": 14.231378555297852, "learning_rate": 1.3720878325877785e-06, "lm_loss": 5.3975, "loss": 1.3171, "step": 1692, "text_contrastive_loss": 0.7895, "train_positive_log_prob": -79.0235, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3225, "epoch": 3.8216704288939054, "grad_norm": 12.910148620605469, "learning_rate": 1.3670989502532089e-06, "lm_loss": 5.4088, "loss": 1.3314, "step": 1693, "text_contrastive_loss": 0.936, "train_positive_log_prob": -81.1756, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3416, "epoch": 3.8239277652370203, "grad_norm": 13.2839994430542, "learning_rate": 1.362117717495926e-06, "lm_loss": 5.3833, "loss": 1.3033, "step": 1694, "text_contrastive_loss": 0.8468, "train_positive_log_prob": -78.4732, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3205, "epoch": 3.8261851015801356, "grad_norm": 12.949750900268555, "learning_rate": 1.3571441448046086e-06, "lm_loss": 5.4558, "loss": 1.2313, "step": 1695, "text_contrastive_loss": 0.7303, "train_positive_log_prob": -81.1304, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3511, "epoch": 3.8284424379232505, "grad_norm": 12.76517105102539, "learning_rate": 1.3521782426517988e-06, "lm_loss": 5.4325, "loss": 1.3254, "step": 1696, "text_contrastive_loss": 0.8621, "train_positive_log_prob": -79.7705, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3614, "epoch": 3.8306997742663658, "grad_norm": 12.243370056152344, "learning_rate": 1.3472200214938974e-06, "lm_loss": 5.552, "loss": 1.2319, "step": 1697, "text_contrastive_loss": 0.6306, "train_positive_log_prob": -84.407, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.4119, "epoch": 3.832957110609481, "grad_norm": 12.767352104187012, "learning_rate": 1.3422694917711276e-06, "lm_loss": 5.4179, "loss": 1.3749, "step": 1698, "text_contrastive_loss": 0.8424, "train_positive_log_prob": -79.1732, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3748, "epoch": 3.835214446952596, "grad_norm": 13.090643882751465, "learning_rate": 1.3373266639075134e-06, "lm_loss": 5.3874, "loss": 1.281, "step": 1699, "text_contrastive_loss": 0.7349, "train_positive_log_prob": -81.1469, "train_positive_token_accuracy": 0.083, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3711, "epoch": 3.837471783295711, "grad_norm": 13.619207382202148, "learning_rate": 1.3323915483108662e-06, "lm_loss": 5.3796, "loss": 1.2763, "step": 1700, "text_contrastive_loss": 0.7345, "train_positive_log_prob": -76.7199, "train_positive_token_accuracy": 0.0715, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4036, "epoch": 3.839729119638826, "grad_norm": 12.674599647521973, "learning_rate": 1.3274641553727568e-06, "lm_loss": 5.448, "loss": 1.3591, "step": 1701, "text_contrastive_loss": 0.8213, "train_positive_log_prob": -80.635, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4016, "epoch": 3.8419864559819414, "grad_norm": 12.56098747253418, "learning_rate": 1.3225444954684962e-06, "lm_loss": 5.2899, "loss": 1.3108, "step": 1702, "text_contrastive_loss": 0.7605, "train_positive_log_prob": -78.6033, "train_positive_token_accuracy": 0.0833, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3472, "epoch": 3.8442437923250563, "grad_norm": 14.493278503417969, "learning_rate": 1.3176325789571075e-06, "lm_loss": 5.4614, "loss": 1.3333, "step": 1703, "text_contrastive_loss": 0.88, "train_positive_log_prob": -80.5549, "train_positive_token_accuracy": 0.0845, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.3124, "epoch": 3.8465011286681716, "grad_norm": 11.662528991699219, "learning_rate": 1.3127284161813153e-06, "lm_loss": 5.4786, "loss": 1.2094, "step": 1704, "text_contrastive_loss": 0.6982, "train_positive_log_prob": -80.4709, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3871, "epoch": 3.8487584650112865, "grad_norm": 14.075630187988281, "learning_rate": 1.3078320174675141e-06, "lm_loss": 5.5057, "loss": 1.3205, "step": 1705, "text_contrastive_loss": 0.7656, "train_positive_log_prob": -82.8651, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3457, "epoch": 3.851015801354402, "grad_norm": 12.885129928588867, "learning_rate": 1.3029433931257524e-06, "lm_loss": 5.4407, "loss": 1.2158, "step": 1706, "text_contrastive_loss": 0.652, "train_positive_log_prob": -80.4417, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.3413, "epoch": 3.853273137697517, "grad_norm": 11.768065452575684, "learning_rate": 1.2980625534497037e-06, "lm_loss": 5.3975, "loss": 1.3302, "step": 1707, "text_contrastive_loss": 0.8983, "train_positive_log_prob": -79.2785, "train_positive_token_accuracy": 0.0753, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3214, "epoch": 3.855530474040632, "grad_norm": 12.895203590393066, "learning_rate": 1.2931895087166551e-06, "lm_loss": 5.4598, "loss": 1.2505, "step": 1708, "text_contrastive_loss": 0.7661, "train_positive_log_prob": -79.2966, "train_positive_token_accuracy": 0.0753, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.302, "epoch": 3.8577878103837473, "grad_norm": 11.259028434753418, "learning_rate": 1.2883242691874792e-06, "lm_loss": 5.463, "loss": 1.1774, "step": 1709, "text_contrastive_loss": 0.6582, "train_positive_log_prob": -81.8617, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3362, "epoch": 3.860045146726862, "grad_norm": 11.875563621520996, "learning_rate": 1.2834668451066118e-06, "lm_loss": 5.3559, "loss": 1.2851, "step": 1710, "text_contrastive_loss": 0.8265, "train_positive_log_prob": -82.1208, "train_positive_token_accuracy": 0.0752, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3458, "epoch": 3.8623024830699775, "grad_norm": 12.237154960632324, "learning_rate": 1.2786172467020357e-06, "lm_loss": 5.3498, "loss": 1.2954, "step": 1711, "text_contrastive_loss": 0.8292, "train_positive_log_prob": -79.0932, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.4083, "epoch": 3.864559819413093, "grad_norm": 12.66635799407959, "learning_rate": 1.2737754841852501e-06, "lm_loss": 5.4067, "loss": 1.3198, "step": 1712, "text_contrastive_loss": 0.7417, "train_positive_log_prob": -80.6356, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.2855, "epoch": 3.8668171557562077, "grad_norm": 10.827605247497559, "learning_rate": 1.2689415677512574e-06, "lm_loss": 5.3867, "loss": 1.2149, "step": 1713, "text_contrastive_loss": 0.7815, "train_positive_log_prob": -79.4785, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3468, "epoch": 3.8690744920993225, "grad_norm": 11.63575553894043, "learning_rate": 1.2641155075785444e-06, "lm_loss": 5.4074, "loss": 1.3457, "step": 1714, "text_contrastive_loss": 0.9163, "train_positive_log_prob": -81.1527, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3969, "epoch": 3.871331828442438, "grad_norm": 15.075748443603516, "learning_rate": 1.259297313829046e-06, "lm_loss": 5.369, "loss": 1.2969, "step": 1715, "text_contrastive_loss": 0.7261, "train_positive_log_prob": -78.6736, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3952, "epoch": 3.873589164785553, "grad_norm": 13.494071960449219, "learning_rate": 1.2544869966481389e-06, "lm_loss": 5.5108, "loss": 1.2823, "step": 1716, "text_contrastive_loss": 0.6721, "train_positive_log_prob": -82.2344, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3156, "epoch": 3.875846501128668, "grad_norm": 11.806557655334473, "learning_rate": 1.249684566164614e-06, "lm_loss": 5.3949, "loss": 1.2527, "step": 1717, "text_contrastive_loss": 0.7952, "train_positive_log_prob": -78.6854, "train_positive_token_accuracy": 0.0814, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.4408, "epoch": 3.8781038374717833, "grad_norm": 13.002033233642578, "learning_rate": 1.2448900324906559e-06, "lm_loss": 5.3442, "loss": 1.3488, "step": 1718, "text_contrastive_loss": 0.7472, "train_positive_log_prob": -78.8077, "train_positive_token_accuracy": 0.0879, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.3307, "epoch": 3.880361173814898, "grad_norm": 13.509655952453613, "learning_rate": 1.2401034057218181e-06, "lm_loss": 5.316, "loss": 1.2387, "step": 1719, "text_contrastive_loss": 0.7528, "train_positive_log_prob": -76.8592, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.363, "epoch": 3.8826185101580135, "grad_norm": 12.645583152770996, "learning_rate": 1.2353246959370086e-06, "lm_loss": 5.4205, "loss": 1.3323, "step": 1720, "text_contrastive_loss": 0.8545, "train_positive_log_prob": -80.23, "train_positive_token_accuracy": 0.0744, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.42, "epoch": 3.884875846501129, "grad_norm": 14.332308769226074, "learning_rate": 1.2305539131984646e-06, "lm_loss": 5.3384, "loss": 1.4214, "step": 1721, "text_contrastive_loss": 0.9351, "train_positive_log_prob": -78.5759, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.4018, "epoch": 3.8871331828442437, "grad_norm": 12.9856538772583, "learning_rate": 1.2257910675517315e-06, "lm_loss": 5.3644, "loss": 1.3223, "step": 1722, "text_contrastive_loss": 0.7682, "train_positive_log_prob": -78.6038, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.2709, "epoch": 3.889390519187359, "grad_norm": 11.077625274658203, "learning_rate": 1.22103616902564e-06, "lm_loss": 5.3067, "loss": 1.2186, "step": 1723, "text_contrastive_loss": 0.8341, "train_positive_log_prob": -78.7509, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.4286, "epoch": 3.891647855530474, "grad_norm": 15.429622650146484, "learning_rate": 1.21628922763229e-06, "lm_loss": 5.379, "loss": 1.3677, "step": 1724, "text_contrastive_loss": 0.8025, "train_positive_log_prob": -78.4739, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3737, "epoch": 3.893905191873589, "grad_norm": 13.851577758789062, "learning_rate": 1.2115502533670253e-06, "lm_loss": 5.6074, "loss": 1.3588, "step": 1725, "text_contrastive_loss": 0.8488, "train_positive_log_prob": -84.7875, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4194, "epoch": 3.8961625282167045, "grad_norm": 15.04504108428955, "learning_rate": 1.2068192562084146e-06, "lm_loss": 5.2553, "loss": 1.3722, "step": 1726, "text_contrastive_loss": 0.8546, "train_positive_log_prob": -76.0595, "train_positive_token_accuracy": 0.0856, "train_positive_token_prob": 0.0333 }, { "contrastive_loss": 0.3581, "epoch": 3.8984198645598194, "grad_norm": 11.373048782348633, "learning_rate": 1.2020962461182268e-06, "lm_loss": 5.4031, "loss": 1.2518, "step": 1727, "text_contrastive_loss": 0.7068, "train_positive_log_prob": -80.0769, "train_positive_token_accuracy": 0.0793, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.338, "epoch": 3.9006772009029347, "grad_norm": 15.169660568237305, "learning_rate": 1.1973812330414159e-06, "lm_loss": 5.3122, "loss": 1.2035, "step": 1728, "text_contrastive_loss": 0.6685, "train_positive_log_prob": -77.5892, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3461, "epoch": 3.9029345372460496, "grad_norm": 13.146347045898438, "learning_rate": 1.1926742269060965e-06, "lm_loss": 5.4069, "loss": 1.2937, "step": 1729, "text_contrastive_loss": 0.8137, "train_positive_log_prob": -79.2476, "train_positive_token_accuracy": 0.0858, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3596, "epoch": 3.905191873589165, "grad_norm": 12.391469955444336, "learning_rate": 1.1879752376235231e-06, "lm_loss": 5.4798, "loss": 1.2918, "step": 1730, "text_contrastive_loss": 0.7685, "train_positive_log_prob": -78.5356, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3001, "epoch": 3.90744920993228, "grad_norm": 11.998820304870605, "learning_rate": 1.1832842750880702e-06, "lm_loss": 5.4692, "loss": 1.2222, "step": 1731, "text_contrastive_loss": 0.7504, "train_positive_log_prob": -79.6677, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3504, "epoch": 3.909706546275395, "grad_norm": 12.540609359741211, "learning_rate": 1.1786013491772103e-06, "lm_loss": 5.3138, "loss": 1.277, "step": 1732, "text_contrastive_loss": 0.7904, "train_positive_log_prob": -78.1723, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.3438, "epoch": 3.91196388261851, "grad_norm": 13.03815746307373, "learning_rate": 1.173926469751493e-06, "lm_loss": 5.4266, "loss": 1.2882, "step": 1733, "text_contrastive_loss": 0.8034, "train_positive_log_prob": -79.0074, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3338, "epoch": 3.9142212189616252, "grad_norm": 10.691126823425293, "learning_rate": 1.1692596466545275e-06, "lm_loss": 5.4213, "loss": 1.2798, "step": 1734, "text_contrastive_loss": 0.8077, "train_positive_log_prob": -81.9562, "train_positive_token_accuracy": 0.0879, "train_positive_token_prob": 0.0329 }, { "contrastive_loss": 0.2949, "epoch": 3.9164785553047405, "grad_norm": 11.609354972839355, "learning_rate": 1.1646008897129546e-06, "lm_loss": 5.4752, "loss": 1.2089, "step": 1735, "text_contrastive_loss": 0.733, "train_positive_log_prob": -80.8048, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3394, "epoch": 3.9187358916478554, "grad_norm": 13.065603256225586, "learning_rate": 1.1599502087364345e-06, "lm_loss": 5.5566, "loss": 1.227, "step": 1736, "text_contrastive_loss": 0.6638, "train_positive_log_prob": -83.5901, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4092, "epoch": 3.9209932279909707, "grad_norm": 12.14127254486084, "learning_rate": 1.1553076135176222e-06, "lm_loss": 5.5211, "loss": 1.3445, "step": 1737, "text_contrastive_loss": 0.7664, "train_positive_log_prob": -80.5517, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.328, "epoch": 3.9232505643340856, "grad_norm": 12.346694946289062, "learning_rate": 1.1506731138321474e-06, "lm_loss": 5.4705, "loss": 1.2178, "step": 1738, "text_contrastive_loss": 0.6854, "train_positive_log_prob": -78.8081, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4454, "epoch": 3.925507900677201, "grad_norm": 14.74303913116455, "learning_rate": 1.1460467194385889e-06, "lm_loss": 5.3789, "loss": 1.4999, "step": 1739, "text_contrastive_loss": 1.0331, "train_positive_log_prob": -79.3599, "train_positive_token_accuracy": 0.0855, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.329, "epoch": 3.927765237020316, "grad_norm": 12.316030502319336, "learning_rate": 1.1414284400784643e-06, "lm_loss": 5.4031, "loss": 1.2632, "step": 1740, "text_contrastive_loss": 0.7877, "train_positive_log_prob": -80.7535, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.2936, "epoch": 3.930022573363431, "grad_norm": 11.090486526489258, "learning_rate": 1.1368182854762005e-06, "lm_loss": 5.49, "loss": 1.1501, "step": 1741, "text_contrastive_loss": 0.6149, "train_positive_log_prob": -81.276, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.2949, "epoch": 3.9322799097065464, "grad_norm": 13.24492359161377, "learning_rate": 1.13221626533912e-06, "lm_loss": 5.5015, "loss": 1.1919, "step": 1742, "text_contrastive_loss": 0.6937, "train_positive_log_prob": -81.1034, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.364, "epoch": 3.9345372460496613, "grad_norm": 13.615939140319824, "learning_rate": 1.1276223893574123e-06, "lm_loss": 5.4266, "loss": 1.3158, "step": 1743, "text_contrastive_loss": 0.8182, "train_positive_log_prob": -80.4011, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3225, "epoch": 3.9367945823927766, "grad_norm": 11.627803802490234, "learning_rate": 1.1230366672041216e-06, "lm_loss": 5.4074, "loss": 1.2319, "step": 1744, "text_contrastive_loss": 0.7374, "train_positive_log_prob": -80.5205, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3449, "epoch": 3.939051918735892, "grad_norm": 12.176133155822754, "learning_rate": 1.118459108535122e-06, "lm_loss": 5.3875, "loss": 1.253, "step": 1745, "text_contrastive_loss": 0.7387, "train_positive_log_prob": -78.4769, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3757, "epoch": 3.9413092550790068, "grad_norm": 13.935737609863281, "learning_rate": 1.1138897229890995e-06, "lm_loss": 5.4409, "loss": 1.3611, "step": 1746, "text_contrastive_loss": 0.8826, "train_positive_log_prob": -82.0837, "train_positive_token_accuracy": 0.0738, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4234, "epoch": 3.9435665914221216, "grad_norm": 13.838811874389648, "learning_rate": 1.109328520187528e-06, "lm_loss": 5.5712, "loss": 1.3746, "step": 1747, "text_contrastive_loss": 0.7882, "train_positive_log_prob": -85.3294, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.2762, "epoch": 3.945823927765237, "grad_norm": 12.039306640625, "learning_rate": 1.1047755097346541e-06, "lm_loss": 5.4991, "loss": 1.2051, "step": 1748, "text_contrastive_loss": 0.7581, "train_positive_log_prob": -79.0288, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.4056, "epoch": 3.9480812641083523, "grad_norm": 13.613452911376953, "learning_rate": 1.100230701217473e-06, "lm_loss": 5.2911, "loss": 1.3528, "step": 1749, "text_contrastive_loss": 0.8362, "train_positive_log_prob": -78.0426, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.396, "epoch": 3.950338600451467, "grad_norm": 13.061095237731934, "learning_rate": 1.0956941042057106e-06, "lm_loss": 5.4116, "loss": 1.3418, "step": 1750, "text_contrastive_loss": 0.8094, "train_positive_log_prob": -80.1604, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3525, "epoch": 3.9525959367945824, "grad_norm": 12.334271430969238, "learning_rate": 1.091165728251799e-06, "lm_loss": 5.4548, "loss": 1.3168, "step": 1751, "text_contrastive_loss": 0.8377, "train_positive_log_prob": -81.3919, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3543, "epoch": 3.9548532731376973, "grad_norm": 12.155789375305176, "learning_rate": 1.0866455828908634e-06, "lm_loss": 5.4532, "loss": 1.259, "step": 1752, "text_contrastive_loss": 0.7188, "train_positive_log_prob": -80.2235, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3295, "epoch": 3.9571106094808126, "grad_norm": 12.81718635559082, "learning_rate": 1.082133677640697e-06, "lm_loss": 5.4989, "loss": 1.2976, "step": 1753, "text_contrastive_loss": 0.8363, "train_positive_log_prob": -80.4246, "train_positive_token_accuracy": 0.0839, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3965, "epoch": 3.959367945823928, "grad_norm": 12.595709800720215, "learning_rate": 1.0776300220017437e-06, "lm_loss": 5.3596, "loss": 1.3096, "step": 1754, "text_contrastive_loss": 0.7541, "train_positive_log_prob": -78.7819, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3105, "epoch": 3.961625282167043, "grad_norm": 12.437752723693848, "learning_rate": 1.0731346254570735e-06, "lm_loss": 5.309, "loss": 1.1856, "step": 1755, "text_contrastive_loss": 0.6882, "train_positive_log_prob": -79.6097, "train_positive_token_accuracy": 0.0815, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3407, "epoch": 3.963882618510158, "grad_norm": 12.470183372497559, "learning_rate": 1.068647497472368e-06, "lm_loss": 5.4084, "loss": 1.2713, "step": 1756, "text_contrastive_loss": 0.7796, "train_positive_log_prob": -81.0631, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3843, "epoch": 3.966139954853273, "grad_norm": 13.911128044128418, "learning_rate": 1.064168647495899e-06, "lm_loss": 5.3746, "loss": 1.402, "step": 1757, "text_contrastive_loss": 0.9603, "train_positive_log_prob": -78.6689, "train_positive_token_accuracy": 0.0833, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.4406, "epoch": 3.9683972911963883, "grad_norm": 13.523388862609863, "learning_rate": 1.0596980849585065e-06, "lm_loss": 5.4742, "loss": 1.3819, "step": 1758, "text_contrastive_loss": 0.7877, "train_positive_log_prob": -81.9404, "train_positive_token_accuracy": 0.0817, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3728, "epoch": 3.9706546275395036, "grad_norm": 11.978256225585938, "learning_rate": 1.0552358192735784e-06, "lm_loss": 5.4214, "loss": 1.2713, "step": 1759, "text_contrastive_loss": 0.7129, "train_positive_log_prob": -80.2448, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3509, "epoch": 3.9729119638826185, "grad_norm": 13.259681701660156, "learning_rate": 1.0507818598370355e-06, "lm_loss": 5.5014, "loss": 1.2838, "step": 1760, "text_contrastive_loss": 0.7654, "train_positive_log_prob": -84.0741, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.2749, "epoch": 3.975169300225734, "grad_norm": 11.363874435424805, "learning_rate": 1.0463362160273076e-06, "lm_loss": 5.3643, "loss": 1.1328, "step": 1761, "text_contrastive_loss": 0.6431, "train_positive_log_prob": -78.9544, "train_positive_token_accuracy": 0.0924, "train_positive_token_prob": 0.0338 }, { "contrastive_loss": 0.3748, "epoch": 3.9774266365688487, "grad_norm": 14.530806541442871, "learning_rate": 1.0418988972053162e-06, "lm_loss": 5.4712, "loss": 1.3201, "step": 1762, "text_contrastive_loss": 0.7964, "train_positive_log_prob": -80.787, "train_positive_token_accuracy": 0.0836, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3066, "epoch": 3.979683972911964, "grad_norm": 11.596506118774414, "learning_rate": 1.037469912714449e-06, "lm_loss": 5.514, "loss": 1.1512, "step": 1763, "text_contrastive_loss": 0.5864, "train_positive_log_prob": -82.52, "train_positive_token_accuracy": 0.0744, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3326, "epoch": 3.9819413092550793, "grad_norm": 11.653396606445312, "learning_rate": 1.0330492718805469e-06, "lm_loss": 5.3952, "loss": 1.2658, "step": 1764, "text_contrastive_loss": 0.7874, "train_positive_log_prob": -79.6244, "train_positive_token_accuracy": 0.0847, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3091, "epoch": 3.984198645598194, "grad_norm": 11.884001731872559, "learning_rate": 1.0286369840118859e-06, "lm_loss": 5.4429, "loss": 1.2438, "step": 1765, "text_contrastive_loss": 0.7809, "train_positive_log_prob": -80.0117, "train_positive_token_accuracy": 0.0837, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3805, "epoch": 3.986455981941309, "grad_norm": 12.454544067382812, "learning_rate": 1.0242330583991507e-06, "lm_loss": 5.3399, "loss": 1.3185, "step": 1766, "text_contrastive_loss": 0.8081, "train_positive_log_prob": -78.6391, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3767, "epoch": 3.9887133182844243, "grad_norm": 12.833479881286621, "learning_rate": 1.0198375043154142e-06, "lm_loss": 5.4009, "loss": 1.3558, "step": 1767, "text_contrastive_loss": 0.878, "train_positive_log_prob": -80.6305, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3691, "epoch": 3.9909706546275396, "grad_norm": 12.905168533325195, "learning_rate": 1.0154503310161269e-06, "lm_loss": 5.4242, "loss": 1.3104, "step": 1768, "text_contrastive_loss": 0.7979, "train_positive_log_prob": -80.2363, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3059, "epoch": 3.9932279909706545, "grad_norm": 12.173251152038574, "learning_rate": 1.0110715477390915e-06, "lm_loss": 5.3933, "loss": 1.2139, "step": 1769, "text_contrastive_loss": 0.7373, "train_positive_log_prob": -77.7596, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.4048, "epoch": 3.99548532731377, "grad_norm": 14.231117248535156, "learning_rate": 1.006701163704445e-06, "lm_loss": 5.4088, "loss": 1.3117, "step": 1770, "text_contrastive_loss": 0.7321, "train_positive_log_prob": -79.8205, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3499, "epoch": 3.9977426636568847, "grad_norm": 11.990342140197754, "learning_rate": 1.0023391881146349e-06, "lm_loss": 5.3637, "loss": 1.2202, "step": 1771, "text_contrastive_loss": 0.6679, "train_positive_log_prob": -77.0421, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.2676, "epoch": 4.0, "grad_norm": 16.870798110961914, "learning_rate": 9.97985630154407e-07, "lm_loss": 5.3585, "loss": 1.0692, "step": 1772, "text_contrastive_loss": 0.5314, "train_positive_log_prob": -81.5245, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0329 }, { "contrastive_loss": 0.2872, "epoch": 4.002257336343115, "grad_norm": 11.189910888671875, "learning_rate": 9.936404989907828e-07, "lm_loss": 5.3116, "loss": 1.1275, "step": 1773, "text_contrastive_loss": 0.6182, "train_positive_log_prob": -78.2434, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.3803, "epoch": 4.004514672686231, "grad_norm": 12.189338684082031, "learning_rate": 9.89303803773039e-07, "lm_loss": 5.3849, "loss": 1.3012, "step": 1774, "text_contrastive_loss": 0.7648, "train_positive_log_prob": -78.9199, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3288, "epoch": 4.006772009029345, "grad_norm": 12.933213233947754, "learning_rate": 9.849755536326866e-07, "lm_loss": 5.5121, "loss": 1.2728, "step": 1775, "text_contrastive_loss": 0.7855, "train_positive_log_prob": -81.8424, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.4056, "epoch": 4.00902934537246, "grad_norm": 13.452736854553223, "learning_rate": 9.806557576834591e-07, "lm_loss": 5.4404, "loss": 1.4022, "step": 1776, "text_contrastive_loss": 0.9051, "train_positive_log_prob": -83.0911, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.2943, "epoch": 4.011286681715576, "grad_norm": 12.769314765930176, "learning_rate": 9.763444250212855e-07, "lm_loss": 5.428, "loss": 1.1971, "step": 1777, "text_contrastive_loss": 0.7199, "train_positive_log_prob": -82.0135, "train_positive_token_accuracy": 0.0812, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.2997, "epoch": 4.013544018058691, "grad_norm": 11.931144714355469, "learning_rate": 9.72041564724277e-07, "lm_loss": 5.364, "loss": 1.2264, "step": 1778, "text_contrastive_loss": 0.7807, "train_positive_log_prob": -79.0299, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3139, "epoch": 4.015801354401806, "grad_norm": 11.053611755371094, "learning_rate": 9.677471858526998e-07, "lm_loss": 5.3535, "loss": 1.1793, "step": 1779, "text_contrastive_loss": 0.66, "train_positive_log_prob": -78.2922, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3045, "epoch": 4.018058690744921, "grad_norm": 11.501129150390625, "learning_rate": 9.63461297448966e-07, "lm_loss": 5.3834, "loss": 1.1888, "step": 1780, "text_contrastive_loss": 0.692, "train_positive_log_prob": -79.2184, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3407, "epoch": 4.020316027088036, "grad_norm": 11.808714866638184, "learning_rate": 9.59183908537607e-07, "lm_loss": 5.4025, "loss": 1.2999, "step": 1781, "text_contrastive_loss": 0.8379, "train_positive_log_prob": -81.1241, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.2588, "epoch": 4.022573363431151, "grad_norm": 11.92055606842041, "learning_rate": 9.549150281252633e-07, "lm_loss": 5.5553, "loss": 1.2261, "step": 1782, "text_contrastive_loss": 0.8235, "train_positive_log_prob": -82.0505, "train_positive_token_accuracy": 0.0857, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.291, "epoch": 4.024830699774267, "grad_norm": 12.611737251281738, "learning_rate": 9.506546652006504e-07, "lm_loss": 5.4777, "loss": 1.1931, "step": 1783, "text_contrastive_loss": 0.7086, "train_positive_log_prob": -80.498, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0326 }, { "contrastive_loss": 0.2773, "epoch": 4.027088036117381, "grad_norm": 12.908148765563965, "learning_rate": 9.464028287345551e-07, "lm_loss": 5.3637, "loss": 1.1642, "step": 1784, "text_contrastive_loss": 0.701, "train_positive_log_prob": -80.2282, "train_positive_token_accuracy": 0.0867, "train_positive_token_prob": 0.0337 }, { "contrastive_loss": 0.3173, "epoch": 4.029345372460496, "grad_norm": 12.55312728881836, "learning_rate": 9.421595276798084e-07, "lm_loss": 5.4336, "loss": 1.2938, "step": 1785, "text_contrastive_loss": 0.8663, "train_positive_log_prob": -79.3157, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3421, "epoch": 4.031602708803612, "grad_norm": 11.590481758117676, "learning_rate": 9.379247709712725e-07, "lm_loss": 5.4248, "loss": 1.2646, "step": 1786, "text_contrastive_loss": 0.76, "train_positive_log_prob": -79.2937, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3528, "epoch": 4.033860045146727, "grad_norm": 12.176640510559082, "learning_rate": 9.336985675258109e-07, "lm_loss": 5.3758, "loss": 1.272, "step": 1787, "text_contrastive_loss": 0.7634, "train_positive_log_prob": -78.9601, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3157, "epoch": 4.036117381489842, "grad_norm": 10.483368873596191, "learning_rate": 9.294809262422838e-07, "lm_loss": 5.5366, "loss": 1.2181, "step": 1788, "text_contrastive_loss": 0.6974, "train_positive_log_prob": -83.069, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3807, "epoch": 4.038374717832957, "grad_norm": 12.850399017333984, "learning_rate": 9.2527185600152e-07, "lm_loss": 5.4233, "loss": 1.2933, "step": 1789, "text_contrastive_loss": 0.7405, "train_positive_log_prob": -80.9718, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.2874, "epoch": 4.040632054176072, "grad_norm": 11.69446849822998, "learning_rate": 9.210713656663023e-07, "lm_loss": 5.4678, "loss": 1.1651, "step": 1790, "text_contrastive_loss": 0.6618, "train_positive_log_prob": -81.3588, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.2661, "epoch": 4.042889390519187, "grad_norm": 12.541370391845703, "learning_rate": 9.168794640813428e-07, "lm_loss": 5.3809, "loss": 1.1086, "step": 1791, "text_contrastive_loss": 0.6087, "train_positive_log_prob": -80.3596, "train_positive_token_accuracy": 0.073, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3006, "epoch": 4.045146726862303, "grad_norm": 10.826007843017578, "learning_rate": 9.126961600732742e-07, "lm_loss": 5.3845, "loss": 1.1871, "step": 1792, "text_contrastive_loss": 0.6963, "train_positive_log_prob": -80.2194, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3142, "epoch": 4.047404063205418, "grad_norm": 12.387374877929688, "learning_rate": 9.085214624506228e-07, "lm_loss": 5.3772, "loss": 1.279, "step": 1793, "text_contrastive_loss": 0.8542, "train_positive_log_prob": -79.5236, "train_positive_token_accuracy": 0.0885, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.3609, "epoch": 4.049661399548532, "grad_norm": 12.288250923156738, "learning_rate": 9.043553800037952e-07, "lm_loss": 5.333, "loss": 1.2493, "step": 1794, "text_contrastive_loss": 0.7101, "train_positive_log_prob": -77.4764, "train_positive_token_accuracy": 0.0815, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.2918, "epoch": 4.051918735891648, "grad_norm": 12.319785118103027, "learning_rate": 9.001979215050544e-07, "lm_loss": 5.4524, "loss": 1.2632, "step": 1795, "text_contrastive_loss": 0.8522, "train_positive_log_prob": -78.662, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3115, "epoch": 4.054176072234763, "grad_norm": 11.470542907714844, "learning_rate": 8.960490957085061e-07, "lm_loss": 5.3993, "loss": 1.1714, "step": 1796, "text_contrastive_loss": 0.64, "train_positive_log_prob": -79.3431, "train_positive_token_accuracy": 0.0845, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.4237, "epoch": 4.056433408577878, "grad_norm": 13.134199142456055, "learning_rate": 8.919089113500795e-07, "lm_loss": 5.328, "loss": 1.3832, "step": 1797, "text_contrastive_loss": 0.8535, "train_positive_log_prob": -78.2578, "train_positive_token_accuracy": 0.0952, "train_positive_token_prob": 0.0341 }, { "contrastive_loss": 0.332, "epoch": 4.058690744920993, "grad_norm": 12.321008682250977, "learning_rate": 8.877773771475074e-07, "lm_loss": 5.464, "loss": 1.219, "step": 1798, "text_contrastive_loss": 0.6812, "train_positive_log_prob": -81.9166, "train_positive_token_accuracy": 0.0761, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3589, "epoch": 4.060948081264108, "grad_norm": 12.761566162109375, "learning_rate": 8.836545018003084e-07, "lm_loss": 5.5035, "loss": 1.3069, "step": 1799, "text_contrastive_loss": 0.7954, "train_positive_log_prob": -81.6488, "train_positive_token_accuracy": 0.0737, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3871, "epoch": 4.063205417607223, "grad_norm": 14.203374862670898, "learning_rate": 8.795402939897679e-07, "lm_loss": 5.4265, "loss": 1.3561, "step": 1800, "text_contrastive_loss": 0.8529, "train_positive_log_prob": -82.095, "train_positive_token_accuracy": 0.0824, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3719, "epoch": 4.065462753950339, "grad_norm": 12.417008399963379, "learning_rate": 8.754347623789222e-07, "lm_loss": 5.3673, "loss": 1.3582, "step": 1801, "text_contrastive_loss": 0.8992, "train_positive_log_prob": -79.7549, "train_positive_token_accuracy": 0.0828, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.3008, "epoch": 4.067720090293454, "grad_norm": 12.664691925048828, "learning_rate": 8.713379156125385e-07, "lm_loss": 5.4173, "loss": 1.1594, "step": 1802, "text_contrastive_loss": 0.6336, "train_positive_log_prob": -78.679, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3289, "epoch": 4.0699774266365685, "grad_norm": 11.8455228805542, "learning_rate": 8.672497623170944e-07, "lm_loss": 5.4951, "loss": 1.2802, "step": 1803, "text_contrastive_loss": 0.8035, "train_positive_log_prob": -81.2894, "train_positive_token_accuracy": 0.0731, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.3138, "epoch": 4.072234762979684, "grad_norm": 11.705512046813965, "learning_rate": 8.631703111007645e-07, "lm_loss": 5.5792, "loss": 1.2216, "step": 1804, "text_contrastive_loss": 0.6997, "train_positive_log_prob": -81.6147, "train_positive_token_accuracy": 0.0692, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.339, "epoch": 4.074492099322799, "grad_norm": 12.615190505981445, "learning_rate": 8.590995705533994e-07, "lm_loss": 5.4129, "loss": 1.2578, "step": 1805, "text_contrastive_loss": 0.7548, "train_positive_log_prob": -81.3609, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3092, "epoch": 4.076749435665914, "grad_norm": 13.203263282775879, "learning_rate": 8.550375492465102e-07, "lm_loss": 5.383, "loss": 1.1657, "step": 1806, "text_contrastive_loss": 0.6365, "train_positive_log_prob": -81.7542, "train_positive_token_accuracy": 0.0864, "train_positive_token_prob": 0.0326 }, { "contrastive_loss": 0.429, "epoch": 4.07900677200903, "grad_norm": 13.173894882202148, "learning_rate": 8.509842557332437e-07, "lm_loss": 5.4636, "loss": 1.3769, "step": 1807, "text_contrastive_loss": 0.8032, "train_positive_log_prob": -82.9379, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.3388, "epoch": 4.081264108352144, "grad_norm": 13.1770601272583, "learning_rate": 8.469396985483724e-07, "lm_loss": 5.4438, "loss": 1.2609, "step": 1808, "text_contrastive_loss": 0.7553, "train_positive_log_prob": -80.1489, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.2894, "epoch": 4.0835214446952595, "grad_norm": 10.92631721496582, "learning_rate": 8.429038862082734e-07, "lm_loss": 5.4675, "loss": 1.1805, "step": 1809, "text_contrastive_loss": 0.6887, "train_positive_log_prob": -79.0419, "train_positive_token_accuracy": 0.0797, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.2761, "epoch": 4.085778781038375, "grad_norm": 12.565187454223633, "learning_rate": 8.388768272109105e-07, "lm_loss": 5.5025, "loss": 1.2419, "step": 1810, "text_contrastive_loss": 0.8312, "train_positive_log_prob": -81.0989, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.2982, "epoch": 4.08803611738149, "grad_norm": 13.363114356994629, "learning_rate": 8.34858530035813e-07, "lm_loss": 5.4419, "loss": 1.1888, "step": 1811, "text_contrastive_loss": 0.693, "train_positive_log_prob": -82.2441, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.399, "epoch": 4.090293453724605, "grad_norm": 12.355823516845703, "learning_rate": 8.308490031440641e-07, "lm_loss": 5.4626, "loss": 1.3551, "step": 1812, "text_contrastive_loss": 0.8195, "train_positive_log_prob": -79.486, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3029, "epoch": 4.09255079006772, "grad_norm": 12.237298965454102, "learning_rate": 8.268482549782797e-07, "lm_loss": 5.3363, "loss": 1.2381, "step": 1813, "text_contrastive_loss": 0.8032, "train_positive_log_prob": -79.0204, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.2492, "epoch": 4.094808126410835, "grad_norm": 10.571876525878906, "learning_rate": 8.228562939625906e-07, "lm_loss": 5.3881, "loss": 1.1082, "step": 1814, "text_contrastive_loss": 0.6403, "train_positive_log_prob": -80.406, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.2274, "epoch": 4.0970654627539504, "grad_norm": 10.091174125671387, "learning_rate": 8.188731285026219e-07, "lm_loss": 5.4757, "loss": 1.0978, "step": 1815, "text_contrastive_loss": 0.6458, "train_positive_log_prob": -82.757, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.2739, "epoch": 4.099322799097066, "grad_norm": 11.475275039672852, "learning_rate": 8.148987669854846e-07, "lm_loss": 5.4154, "loss": 1.1924, "step": 1816, "text_contrastive_loss": 0.7539, "train_positive_log_prob": -80.2566, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3325, "epoch": 4.10158013544018, "grad_norm": 11.306097984313965, "learning_rate": 8.109332177797469e-07, "lm_loss": 5.421, "loss": 1.2918, "step": 1817, "text_contrastive_loss": 0.8344, "train_positive_log_prob": -81.6312, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3344, "epoch": 4.1038374717832955, "grad_norm": 14.255751609802246, "learning_rate": 8.069764892354237e-07, "lm_loss": 5.4051, "loss": 1.3253, "step": 1818, "text_contrastive_loss": 0.9009, "train_positive_log_prob": -81.4146, "train_positive_token_accuracy": 0.0858, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.356, "epoch": 4.106094808126411, "grad_norm": 12.633360862731934, "learning_rate": 8.030285896839546e-07, "lm_loss": 5.4218, "loss": 1.2902, "step": 1819, "text_contrastive_loss": 0.784, "train_positive_log_prob": -80.2421, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4155, "epoch": 4.108352144469526, "grad_norm": 14.468781471252441, "learning_rate": 7.99089527438191e-07, "lm_loss": 5.345, "loss": 1.3988, "step": 1820, "text_contrastive_loss": 0.8976, "train_positive_log_prob": -78.7982, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3224, "epoch": 4.110609480812641, "grad_norm": 11.905516624450684, "learning_rate": 7.951593107923744e-07, "lm_loss": 5.4817, "loss": 1.2949, "step": 1821, "text_contrastive_loss": 0.8485, "train_positive_log_prob": -82.5006, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.2713, "epoch": 4.112866817155756, "grad_norm": 12.390151023864746, "learning_rate": 7.912379480221228e-07, "lm_loss": 5.2288, "loss": 1.1593, "step": 1822, "text_contrastive_loss": 0.7302, "train_positive_log_prob": -76.511, "train_positive_token_accuracy": 0.0838, "train_positive_token_prob": 0.0331 }, { "contrastive_loss": 0.3649, "epoch": 4.115124153498871, "grad_norm": 12.990013122558594, "learning_rate": 7.873254473844077e-07, "lm_loss": 5.4546, "loss": 1.2952, "step": 1823, "text_contrastive_loss": 0.7698, "train_positive_log_prob": -80.4384, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.2801, "epoch": 4.1173814898419865, "grad_norm": 10.795341491699219, "learning_rate": 7.834218171175428e-07, "lm_loss": 5.4179, "loss": 1.2101, "step": 1824, "text_contrastive_loss": 0.7764, "train_positive_log_prob": -80.5509, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.2809, "epoch": 4.119638826185102, "grad_norm": 12.400055885314941, "learning_rate": 7.795270654411635e-07, "lm_loss": 5.3719, "loss": 1.149, "step": 1825, "text_contrastive_loss": 0.6619, "train_positive_log_prob": -81.2927, "train_positive_token_accuracy": 0.0738, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.336, "epoch": 4.121896162528217, "grad_norm": 12.246535301208496, "learning_rate": 7.756412005562114e-07, "lm_loss": 5.4104, "loss": 1.2633, "step": 1826, "text_contrastive_loss": 0.7724, "train_positive_log_prob": -79.3204, "train_positive_token_accuracy": 0.0741, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3774, "epoch": 4.1241534988713315, "grad_norm": 11.641721725463867, "learning_rate": 7.717642306449113e-07, "lm_loss": 5.5162, "loss": 1.3255, "step": 1827, "text_contrastive_loss": 0.7929, "train_positive_log_prob": -80.3963, "train_positive_token_accuracy": 0.0734, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3358, "epoch": 4.126410835214447, "grad_norm": 13.242731094360352, "learning_rate": 7.678961638707633e-07, "lm_loss": 5.4367, "loss": 1.3027, "step": 1828, "text_contrastive_loss": 0.8465, "train_positive_log_prob": -80.8479, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.2636, "epoch": 4.128668171557562, "grad_norm": 11.120746612548828, "learning_rate": 7.640370083785175e-07, "lm_loss": 5.3813, "loss": 1.1778, "step": 1829, "text_contrastive_loss": 0.7521, "train_positive_log_prob": -79.4193, "train_positive_token_accuracy": 0.0844, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.3299, "epoch": 4.1309255079006775, "grad_norm": 12.61805534362793, "learning_rate": 7.601867722941642e-07, "lm_loss": 5.4367, "loss": 1.2724, "step": 1830, "text_contrastive_loss": 0.7978, "train_positive_log_prob": -81.3806, "train_positive_token_accuracy": 0.0885, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.3394, "epoch": 4.133182844243792, "grad_norm": 12.158013343811035, "learning_rate": 7.563454637249056e-07, "lm_loss": 5.5159, "loss": 1.3072, "step": 1831, "text_contrastive_loss": 0.8323, "train_positive_log_prob": -81.5749, "train_positive_token_accuracy": 0.0724, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3357, "epoch": 4.135440180586907, "grad_norm": 14.1693754196167, "learning_rate": 7.52513090759151e-07, "lm_loss": 5.4365, "loss": 1.2435, "step": 1832, "text_contrastive_loss": 0.7282, "train_positive_log_prob": -79.2957, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.2817, "epoch": 4.1376975169300225, "grad_norm": 11.860201835632324, "learning_rate": 7.486896614664962e-07, "lm_loss": 5.3825, "loss": 1.1636, "step": 1833, "text_contrastive_loss": 0.6873, "train_positive_log_prob": -80.5702, "train_positive_token_accuracy": 0.0723, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3016, "epoch": 4.139954853273138, "grad_norm": 11.996062278747559, "learning_rate": 7.448751838977014e-07, "lm_loss": 5.3753, "loss": 1.1836, "step": 1834, "text_contrastive_loss": 0.689, "train_positive_log_prob": -79.5193, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3436, "epoch": 4.142212189616253, "grad_norm": 12.734991073608398, "learning_rate": 7.410696660846761e-07, "lm_loss": 5.459, "loss": 1.2774, "step": 1835, "text_contrastive_loss": 0.7757, "train_positive_log_prob": -81.822, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.4216, "epoch": 4.144469525959368, "grad_norm": 13.545905113220215, "learning_rate": 7.372731160404672e-07, "lm_loss": 5.3478, "loss": 1.4101, "step": 1836, "text_contrastive_loss": 0.9075, "train_positive_log_prob": -78.5069, "train_positive_token_accuracy": 0.0761, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3848, "epoch": 4.146726862302483, "grad_norm": 11.820764541625977, "learning_rate": 7.334855417592385e-07, "lm_loss": 5.4283, "loss": 1.3046, "step": 1837, "text_contrastive_loss": 0.7538, "train_positive_log_prob": -79.6684, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.2993, "epoch": 4.148984198645598, "grad_norm": 12.980536460876465, "learning_rate": 7.297069512162535e-07, "lm_loss": 5.3651, "loss": 1.1632, "step": 1838, "text_contrastive_loss": 0.6548, "train_positive_log_prob": -80.013, "train_positive_token_accuracy": 0.0904, "train_positive_token_prob": 0.033 }, { "contrastive_loss": 0.278, "epoch": 4.1512415349887135, "grad_norm": 11.505115509033203, "learning_rate": 7.25937352367857e-07, "lm_loss": 5.4105, "loss": 1.2199, "step": 1839, "text_contrastive_loss": 0.8017, "train_positive_log_prob": -79.675, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.282, "epoch": 4.153498871331829, "grad_norm": 10.915390014648438, "learning_rate": 7.22176753151464e-07, "lm_loss": 5.5966, "loss": 1.1317, "step": 1840, "text_contrastive_loss": 0.5801, "train_positive_log_prob": -83.7717, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.3058, "epoch": 4.155756207674943, "grad_norm": 13.515417098999023, "learning_rate": 7.184251614855369e-07, "lm_loss": 5.4955, "loss": 1.1339, "step": 1841, "text_contrastive_loss": 0.5571, "train_positive_log_prob": -81.4449, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3044, "epoch": 4.158013544018059, "grad_norm": 11.466351509094238, "learning_rate": 7.146825852695749e-07, "lm_loss": 5.4374, "loss": 1.2048, "step": 1842, "text_contrastive_loss": 0.7134, "train_positive_log_prob": -84.0274, "train_positive_token_accuracy": 0.0827, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.3425, "epoch": 4.160270880361174, "grad_norm": 12.742423057556152, "learning_rate": 7.109490323840884e-07, "lm_loss": 5.4831, "loss": 1.3366, "step": 1843, "text_contrastive_loss": 0.8915, "train_positive_log_prob": -79.186, "train_positive_token_accuracy": 0.0747, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.359, "epoch": 4.162528216704289, "grad_norm": 11.728898048400879, "learning_rate": 7.072245106905928e-07, "lm_loss": 5.4898, "loss": 1.2143, "step": 1844, "text_contrastive_loss": 0.6126, "train_positive_log_prob": -81.4005, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3609, "epoch": 4.164785553047404, "grad_norm": 13.316404342651367, "learning_rate": 7.035090280315854e-07, "lm_loss": 5.5048, "loss": 1.3146, "step": 1845, "text_contrastive_loss": 0.8064, "train_positive_log_prob": -81.0004, "train_positive_token_accuracy": 0.0847, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.3137, "epoch": 4.167042889390519, "grad_norm": 11.35694694519043, "learning_rate": 6.998025922305313e-07, "lm_loss": 5.3996, "loss": 1.2345, "step": 1846, "text_contrastive_loss": 0.7617, "train_positive_log_prob": -79.4142, "train_positive_token_accuracy": 0.0869, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.3434, "epoch": 4.169300225733634, "grad_norm": 12.942392349243164, "learning_rate": 6.961052110918432e-07, "lm_loss": 5.3292, "loss": 1.2651, "step": 1847, "text_contrastive_loss": 0.7775, "train_positive_log_prob": -78.1046, "train_positive_token_accuracy": 0.0869, "train_positive_token_prob": 0.0333 }, { "contrastive_loss": 0.3689, "epoch": 4.1715575620767495, "grad_norm": 14.120200157165527, "learning_rate": 6.924168924008712e-07, "lm_loss": 5.3815, "loss": 1.1817, "step": 1848, "text_contrastive_loss": 0.5493, "train_positive_log_prob": -79.3788, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3311, "epoch": 4.173814898419865, "grad_norm": 13.238189697265625, "learning_rate": 6.887376439238813e-07, "lm_loss": 5.4392, "loss": 1.2551, "step": 1849, "text_contrastive_loss": 0.7603, "train_positive_log_prob": -79.7593, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3229, "epoch": 4.176072234762979, "grad_norm": 12.410944938659668, "learning_rate": 6.850674734080454e-07, "lm_loss": 5.4465, "loss": 1.2135, "step": 1850, "text_contrastive_loss": 0.692, "train_positive_log_prob": -80.2507, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.2852, "epoch": 4.178329571106095, "grad_norm": 12.629752159118652, "learning_rate": 6.814063885814127e-07, "lm_loss": 5.4311, "loss": 1.181, "step": 1851, "text_contrastive_loss": 0.7054, "train_positive_log_prob": -77.4575, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.2841, "epoch": 4.18058690744921, "grad_norm": 11.972105979919434, "learning_rate": 6.77754397152906e-07, "lm_loss": 5.4088, "loss": 1.2003, "step": 1852, "text_contrastive_loss": 0.7507, "train_positive_log_prob": -79.0844, "train_positive_token_accuracy": 0.0793, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3862, "epoch": 4.182844243792325, "grad_norm": 13.545210838317871, "learning_rate": 6.741115068123017e-07, "lm_loss": 5.4036, "loss": 1.3364, "step": 1853, "text_contrastive_loss": 0.8197, "train_positive_log_prob": -80.1254, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.364, "epoch": 4.1851015801354405, "grad_norm": 14.371001243591309, "learning_rate": 6.704777252302108e-07, "lm_loss": 5.4491, "loss": 1.3381, "step": 1854, "text_contrastive_loss": 0.8583, "train_positive_log_prob": -80.6463, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3535, "epoch": 4.187358916478555, "grad_norm": 13.23997688293457, "learning_rate": 6.66853060058063e-07, "lm_loss": 5.4168, "loss": 1.3207, "step": 1855, "text_contrastive_loss": 0.851, "train_positive_log_prob": -81.5058, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.281, "epoch": 4.18961625282167, "grad_norm": 10.905323028564453, "learning_rate": 6.632375189280948e-07, "lm_loss": 5.4921, "loss": 1.226, "step": 1856, "text_contrastive_loss": 0.7916, "train_positive_log_prob": -81.1158, "train_positive_token_accuracy": 0.0733, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.3916, "epoch": 4.191873589164786, "grad_norm": 13.602428436279297, "learning_rate": 6.596311094533292e-07, "lm_loss": 5.487, "loss": 1.362, "step": 1857, "text_contrastive_loss": 0.8434, "train_positive_log_prob": -81.5548, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4214, "epoch": 4.194130925507901, "grad_norm": 17.563297271728516, "learning_rate": 6.56033839227564e-07, "lm_loss": 5.3989, "loss": 1.3424, "step": 1858, "text_contrastive_loss": 0.7623, "train_positive_log_prob": -80.3679, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3235, "epoch": 4.196388261851016, "grad_norm": 12.790931701660156, "learning_rate": 6.524457158253472e-07, "lm_loss": 5.3472, "loss": 1.2644, "step": 1859, "text_contrastive_loss": 0.8123, "train_positive_log_prob": -77.931, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3487, "epoch": 4.198645598194131, "grad_norm": 12.175345420837402, "learning_rate": 6.488667468019727e-07, "lm_loss": 5.4997, "loss": 1.2886, "step": 1860, "text_contrastive_loss": 0.7799, "train_positive_log_prob": -79.8771, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.2934, "epoch": 4.200902934537246, "grad_norm": 12.599485397338867, "learning_rate": 6.452969396934567e-07, "lm_loss": 5.5562, "loss": 1.1829, "step": 1861, "text_contrastive_loss": 0.6678, "train_positive_log_prob": -81.6972, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.246, "epoch": 4.203160270880361, "grad_norm": 11.814972877502441, "learning_rate": 6.417363020165235e-07, "lm_loss": 5.3403, "loss": 1.1099, "step": 1862, "text_contrastive_loss": 0.6597, "train_positive_log_prob": -78.7533, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.2798, "epoch": 4.205417607223477, "grad_norm": 10.557435989379883, "learning_rate": 6.381848412685882e-07, "lm_loss": 5.4239, "loss": 1.1508, "step": 1863, "text_contrastive_loss": 0.6571, "train_positive_log_prob": -79.5598, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3817, "epoch": 4.207674943566591, "grad_norm": 12.646711349487305, "learning_rate": 6.346425649277454e-07, "lm_loss": 5.465, "loss": 1.3197, "step": 1864, "text_contrastive_loss": 0.783, "train_positive_log_prob": -81.398, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3897, "epoch": 4.209932279909706, "grad_norm": 15.26496696472168, "learning_rate": 6.31109480452749e-07, "lm_loss": 5.4765, "loss": 1.3993, "step": 1865, "text_contrastive_loss": 0.924, "train_positive_log_prob": -81.3361, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3847, "epoch": 4.212189616252822, "grad_norm": 14.464232444763184, "learning_rate": 6.275855952829995e-07, "lm_loss": 5.4352, "loss": 1.3913, "step": 1866, "text_contrastive_loss": 0.926, "train_positive_log_prob": -80.3427, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3403, "epoch": 4.214446952595937, "grad_norm": 11.755424499511719, "learning_rate": 6.240709168385251e-07, "lm_loss": 5.4246, "loss": 1.2199, "step": 1867, "text_contrastive_loss": 0.6741, "train_positive_log_prob": -82.3869, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.2969, "epoch": 4.216704288939052, "grad_norm": 11.180468559265137, "learning_rate": 6.2056545251997e-07, "lm_loss": 5.4097, "loss": 1.2182, "step": 1868, "text_contrastive_loss": 0.7606, "train_positive_log_prob": -80.1256, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.2758, "epoch": 4.218961625282167, "grad_norm": 11.721410751342773, "learning_rate": 6.170692097085751e-07, "lm_loss": 5.3654, "loss": 1.185, "step": 1869, "text_contrastive_loss": 0.7453, "train_positive_log_prob": -80.0163, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.307, "epoch": 4.221218961625282, "grad_norm": 11.450150489807129, "learning_rate": 6.135821957661658e-07, "lm_loss": 5.398, "loss": 1.1763, "step": 1870, "text_contrastive_loss": 0.6591, "train_positive_log_prob": -80.4404, "train_positive_token_accuracy": 0.0912, "train_positive_token_prob": 0.0331 }, { "contrastive_loss": 0.299, "epoch": 4.223476297968397, "grad_norm": 11.473048210144043, "learning_rate": 6.101044180351318e-07, "lm_loss": 5.5531, "loss": 1.1965, "step": 1871, "text_contrastive_loss": 0.6842, "train_positive_log_prob": -84.6443, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3191, "epoch": 4.225733634311513, "grad_norm": 12.278962135314941, "learning_rate": 6.066358838384184e-07, "lm_loss": 5.4404, "loss": 1.2195, "step": 1872, "text_contrastive_loss": 0.7128, "train_positive_log_prob": -81.247, "train_positive_token_accuracy": 0.0879, "train_positive_token_prob": 0.0328 }, { "contrastive_loss": 0.3228, "epoch": 4.227990970654628, "grad_norm": 13.453584671020508, "learning_rate": 6.031766004795047e-07, "lm_loss": 5.4725, "loss": 1.2444, "step": 1873, "text_contrastive_loss": 0.7486, "train_positive_log_prob": -81.2786, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.2865, "epoch": 4.230248306997742, "grad_norm": 11.51198959350586, "learning_rate": 5.997265752423936e-07, "lm_loss": 5.4334, "loss": 1.1658, "step": 1874, "text_contrastive_loss": 0.6719, "train_positive_log_prob": -80.8198, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.2555, "epoch": 4.232505643340858, "grad_norm": 12.516451835632324, "learning_rate": 5.962858153915896e-07, "lm_loss": 5.4485, "loss": 1.1855, "step": 1875, "text_contrastive_loss": 0.7705, "train_positive_log_prob": -80.4737, "train_positive_token_accuracy": 0.0756, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3613, "epoch": 4.234762979683973, "grad_norm": 13.309110641479492, "learning_rate": 5.928543281720917e-07, "lm_loss": 5.4458, "loss": 1.3317, "step": 1876, "text_contrastive_loss": 0.8516, "train_positive_log_prob": -81.0071, "train_positive_token_accuracy": 0.0838, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.4067, "epoch": 4.237020316027088, "grad_norm": 14.060657501220703, "learning_rate": 5.894321208093712e-07, "lm_loss": 5.4241, "loss": 1.3882, "step": 1877, "text_contrastive_loss": 0.8781, "train_positive_log_prob": -81.2745, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.2982, "epoch": 4.239277652370204, "grad_norm": 11.619880676269531, "learning_rate": 5.860192005093624e-07, "lm_loss": 5.4563, "loss": 1.253, "step": 1878, "text_contrastive_loss": 0.8184, "train_positive_log_prob": -81.8854, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3242, "epoch": 4.241534988713318, "grad_norm": 12.934802055358887, "learning_rate": 5.826155744584405e-07, "lm_loss": 5.3315, "loss": 1.1814, "step": 1879, "text_contrastive_loss": 0.6482, "train_positive_log_prob": -78.1412, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.2808, "epoch": 4.243792325056433, "grad_norm": 12.162077903747559, "learning_rate": 5.792212498234134e-07, "lm_loss": 5.4209, "loss": 1.2429, "step": 1880, "text_contrastive_loss": 0.8402, "train_positive_log_prob": -80.4847, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0326 }, { "contrastive_loss": 0.3423, "epoch": 4.246049661399549, "grad_norm": 13.890145301818848, "learning_rate": 5.758362337515028e-07, "lm_loss": 5.4438, "loss": 1.3069, "step": 1881, "text_contrastive_loss": 0.8404, "train_positive_log_prob": -81.1248, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.2812, "epoch": 4.248306997742664, "grad_norm": 13.08950138092041, "learning_rate": 5.724605333703303e-07, "lm_loss": 5.4289, "loss": 1.2036, "step": 1882, "text_contrastive_loss": 0.7592, "train_positive_log_prob": -77.8523, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3114, "epoch": 4.250564334085778, "grad_norm": 11.903820037841797, "learning_rate": 5.690941557878988e-07, "lm_loss": 5.4437, "loss": 1.2735, "step": 1883, "text_contrastive_loss": 0.8354, "train_positive_log_prob": -80.6324, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.2795, "epoch": 4.252821670428894, "grad_norm": 11.589510917663574, "learning_rate": 5.657371080925866e-07, "lm_loss": 5.413, "loss": 1.1392, "step": 1884, "text_contrastive_loss": 0.6367, "train_positive_log_prob": -80.4782, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.2403, "epoch": 4.255079006772009, "grad_norm": 11.013530731201172, "learning_rate": 5.623893973531225e-07, "lm_loss": 5.4858, "loss": 1.0851, "step": 1885, "text_contrastive_loss": 0.5923, "train_positive_log_prob": -81.4935, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3294, "epoch": 4.257336343115124, "grad_norm": 11.694799423217773, "learning_rate": 5.590510306185765e-07, "lm_loss": 5.48, "loss": 1.2948, "step": 1886, "text_contrastive_loss": 0.8349, "train_positive_log_prob": -81.6183, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.3281, "epoch": 4.25959367945824, "grad_norm": 12.523609161376953, "learning_rate": 5.557220149183412e-07, "lm_loss": 5.4963, "loss": 1.3172, "step": 1887, "text_contrastive_loss": 0.879, "train_positive_log_prob": -82.4027, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3005, "epoch": 4.261851015801354, "grad_norm": 12.24012565612793, "learning_rate": 5.524023572621229e-07, "lm_loss": 5.4156, "loss": 1.2741, "step": 1888, "text_contrastive_loss": 0.864, "train_positive_log_prob": -80.4733, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.2849, "epoch": 4.264108352144469, "grad_norm": 12.618743896484375, "learning_rate": 5.4909206463992e-07, "lm_loss": 5.4421, "loss": 1.2187, "step": 1889, "text_contrastive_loss": 0.7792, "train_positive_log_prob": -80.4849, "train_positive_token_accuracy": 0.0774, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3576, "epoch": 4.266365688487585, "grad_norm": 13.902774810791016, "learning_rate": 5.457911440220154e-07, "lm_loss": 5.4181, "loss": 1.3273, "step": 1890, "text_contrastive_loss": 0.8559, "train_positive_log_prob": -78.8596, "train_positive_token_accuracy": 0.0843, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3058, "epoch": 4.2686230248307, "grad_norm": 12.352407455444336, "learning_rate": 5.424996023589524e-07, "lm_loss": 5.4638, "loss": 1.2143, "step": 1891, "text_contrastive_loss": 0.7242, "train_positive_log_prob": -79.8192, "train_positive_token_accuracy": 0.073, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3377, "epoch": 4.270880361173815, "grad_norm": 12.394871711730957, "learning_rate": 5.392174465815308e-07, "lm_loss": 5.448, "loss": 1.3208, "step": 1892, "text_contrastive_loss": 0.8766, "train_positive_log_prob": -80.6557, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.2068, "epoch": 4.27313769751693, "grad_norm": 9.813087463378906, "learning_rate": 5.359446836007842e-07, "lm_loss": 5.3534, "loss": 1.0492, "step": 1893, "text_contrastive_loss": 0.6142, "train_positive_log_prob": -78.2899, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.308, "epoch": 4.275395033860045, "grad_norm": 13.385251998901367, "learning_rate": 5.326813203079706e-07, "lm_loss": 5.4145, "loss": 1.2336, "step": 1894, "text_contrastive_loss": 0.7683, "train_positive_log_prob": -78.9948, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3299, "epoch": 4.27765237020316, "grad_norm": 13.189772605895996, "learning_rate": 5.294273635745517e-07, "lm_loss": 5.4433, "loss": 1.3282, "step": 1895, "text_contrastive_loss": 0.9078, "train_positive_log_prob": -81.1639, "train_positive_token_accuracy": 0.0828, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.5198, "epoch": 4.279909706546276, "grad_norm": 13.458785057067871, "learning_rate": 5.261828202521868e-07, "lm_loss": 5.4507, "loss": 1.5369, "step": 1896, "text_contrastive_loss": 0.9441, "train_positive_log_prob": -82.8307, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.2221, "epoch": 4.282167042889391, "grad_norm": 11.057195663452148, "learning_rate": 5.229476971727115e-07, "lm_loss": 5.3322, "loss": 1.0969, "step": 1897, "text_contrastive_loss": 0.6831, "train_positive_log_prob": -78.2909, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.2966, "epoch": 4.284424379232505, "grad_norm": 11.43716812133789, "learning_rate": 5.197220011481274e-07, "lm_loss": 5.392, "loss": 1.2066, "step": 1898, "text_contrastive_loss": 0.7415, "train_positive_log_prob": -79.3838, "train_positive_token_accuracy": 0.0812, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.3147, "epoch": 4.286681715575621, "grad_norm": 11.857349395751953, "learning_rate": 5.165057389705835e-07, "lm_loss": 5.4267, "loss": 1.2362, "step": 1899, "text_contrastive_loss": 0.7577, "train_positive_log_prob": -79.3457, "train_positive_token_accuracy": 0.0793, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3817, "epoch": 4.288939051918736, "grad_norm": 13.249019622802734, "learning_rate": 5.132989174123659e-07, "lm_loss": 5.4844, "loss": 1.3051, "step": 1900, "text_contrastive_loss": 0.7499, "train_positive_log_prob": -81.2181, "train_positive_token_accuracy": 0.0839, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3436, "epoch": 4.291196388261851, "grad_norm": 12.054238319396973, "learning_rate": 5.101015432258843e-07, "lm_loss": 5.4637, "loss": 1.2689, "step": 1901, "text_contrastive_loss": 0.7578, "train_positive_log_prob": -80.6345, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3196, "epoch": 4.293453724604966, "grad_norm": 13.134613990783691, "learning_rate": 5.069136231436539e-07, "lm_loss": 5.4477, "loss": 1.2169, "step": 1902, "text_contrastive_loss": 0.7051, "train_positive_log_prob": -79.6727, "train_positive_token_accuracy": 0.0751, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3759, "epoch": 4.295711060948081, "grad_norm": 11.014152526855469, "learning_rate": 5.037351638782812e-07, "lm_loss": 5.4405, "loss": 1.3688, "step": 1903, "text_contrastive_loss": 0.8977, "train_positive_log_prob": -79.6974, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.372, "epoch": 4.297968397291196, "grad_norm": 13.289602279663086, "learning_rate": 5.00566172122453e-07, "lm_loss": 5.3684, "loss": 1.3195, "step": 1904, "text_contrastive_loss": 0.8213, "train_positive_log_prob": -79.671, "train_positive_token_accuracy": 0.0769, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.4053, "epoch": 4.300225733634312, "grad_norm": 12.918943405151367, "learning_rate": 4.97406654548922e-07, "lm_loss": 5.3847, "loss": 1.4672, "step": 1905, "text_contrastive_loss": 1.0468, "train_positive_log_prob": -77.8223, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4202, "epoch": 4.302483069977427, "grad_norm": 13.597563743591309, "learning_rate": 4.942566178104924e-07, "lm_loss": 5.3564, "loss": 1.3696, "step": 1906, "text_contrastive_loss": 0.8275, "train_positive_log_prob": -78.4392, "train_positive_token_accuracy": 0.0814, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3352, "epoch": 4.3047404063205414, "grad_norm": 11.399421691894531, "learning_rate": 4.911160685400008e-07, "lm_loss": 5.4272, "loss": 1.2694, "step": 1907, "text_contrastive_loss": 0.7829, "train_positive_log_prob": -82.2106, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.303, "epoch": 4.306997742663657, "grad_norm": 11.587457656860352, "learning_rate": 4.879850133503106e-07, "lm_loss": 5.4226, "loss": 1.2057, "step": 1908, "text_contrastive_loss": 0.7208, "train_positive_log_prob": -81.3343, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.292, "epoch": 4.309255079006772, "grad_norm": 12.802454948425293, "learning_rate": 4.848634588342932e-07, "lm_loss": 5.4536, "loss": 1.213, "step": 1909, "text_contrastive_loss": 0.7512, "train_positive_log_prob": -81.4406, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3073, "epoch": 4.311512415349887, "grad_norm": 11.837509155273438, "learning_rate": 4.817514115648164e-07, "lm_loss": 5.395, "loss": 1.1225, "step": 1910, "text_contrastive_loss": 0.5514, "train_positive_log_prob": -78.6523, "train_positive_token_accuracy": 0.0689, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.292, "epoch": 4.313769751693002, "grad_norm": 11.047466278076172, "learning_rate": 4.786488780947246e-07, "lm_loss": 5.3851, "loss": 1.172, "step": 1911, "text_contrastive_loss": 0.683, "train_positive_log_prob": -80.1441, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3286, "epoch": 4.316027088036117, "grad_norm": 12.600515365600586, "learning_rate": 4.755558649568337e-07, "lm_loss": 5.5078, "loss": 1.2175, "step": 1912, "text_contrastive_loss": 0.6761, "train_positive_log_prob": -81.7156, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3419, "epoch": 4.318284424379232, "grad_norm": 12.97014045715332, "learning_rate": 4.7247237866391236e-07, "lm_loss": 5.3523, "loss": 1.2679, "step": 1913, "text_contrastive_loss": 0.7815, "train_positive_log_prob": -78.1133, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.234, "epoch": 4.320541760722348, "grad_norm": 10.707161903381348, "learning_rate": 4.6939842570867034e-07, "lm_loss": 5.2514, "loss": 1.0834, "step": 1914, "text_contrastive_loss": 0.6484, "train_positive_log_prob": -76.5977, "train_positive_token_accuracy": 0.0833, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.2927, "epoch": 4.322799097065463, "grad_norm": 12.018651008605957, "learning_rate": 4.663340125637389e-07, "lm_loss": 5.4897, "loss": 1.1458, "step": 1915, "text_contrastive_loss": 0.6083, "train_positive_log_prob": -84.1907, "train_positive_token_accuracy": 0.0853, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3639, "epoch": 4.3250564334085775, "grad_norm": 13.130175590515137, "learning_rate": 4.6327914568166763e-07, "lm_loss": 5.552, "loss": 1.3112, "step": 1916, "text_contrastive_loss": 0.7841, "train_positive_log_prob": -83.5838, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3794, "epoch": 4.327313769751693, "grad_norm": 11.861356735229492, "learning_rate": 4.6023383149490066e-07, "lm_loss": 5.353, "loss": 1.3373, "step": 1917, "text_contrastive_loss": 0.8452, "train_positive_log_prob": -79.5412, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3183, "epoch": 4.329571106094808, "grad_norm": 12.326311111450195, "learning_rate": 4.571980764157724e-07, "lm_loss": 5.4535, "loss": 1.2567, "step": 1918, "text_contrastive_loss": 0.7861, "train_positive_log_prob": -80.2063, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3893, "epoch": 4.331828442437923, "grad_norm": 12.893142700195312, "learning_rate": 4.5417188683648417e-07, "lm_loss": 5.4157, "loss": 1.3553, "step": 1919, "text_contrastive_loss": 0.8489, "train_positive_log_prob": -81.1094, "train_positive_token_accuracy": 0.0887, "train_positive_token_prob": 0.0326 }, { "contrastive_loss": 0.3404, "epoch": 4.334085778781039, "grad_norm": 13.110001564025879, "learning_rate": 4.511552691290988e-07, "lm_loss": 5.3956, "loss": 1.2924, "step": 1920, "text_contrastive_loss": 0.8249, "train_positive_log_prob": -80.4881, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3375, "epoch": 4.336343115124153, "grad_norm": 12.533656120300293, "learning_rate": 4.4814822964552363e-07, "lm_loss": 5.3953, "loss": 1.2804, "step": 1921, "text_contrastive_loss": 0.8069, "train_positive_log_prob": -81.7771, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.4657, "epoch": 4.3386004514672685, "grad_norm": 17.309276580810547, "learning_rate": 4.4515077471749767e-07, "lm_loss": 5.3767, "loss": 1.4613, "step": 1922, "text_contrastive_loss": 0.9159, "train_positive_log_prob": -77.839, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.4217, "epoch": 4.340857787810384, "grad_norm": 14.05301284790039, "learning_rate": 4.421629106565778e-07, "lm_loss": 5.4113, "loss": 1.3483, "step": 1923, "text_contrastive_loss": 0.7708, "train_positive_log_prob": -80.115, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3704, "epoch": 4.343115124153499, "grad_norm": 15.06448745727539, "learning_rate": 4.391846437541258e-07, "lm_loss": 5.548, "loss": 1.3138, "step": 1924, "text_contrastive_loss": 0.7773, "train_positive_log_prob": -82.7872, "train_positive_token_accuracy": 0.0694, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.3323, "epoch": 4.345372460496614, "grad_norm": 12.01858139038086, "learning_rate": 4.362159802812971e-07, "lm_loss": 5.3856, "loss": 1.2592, "step": 1925, "text_contrastive_loss": 0.7768, "train_positive_log_prob": -78.4668, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3896, "epoch": 4.347629796839729, "grad_norm": 12.243396759033203, "learning_rate": 4.332569264890252e-07, "lm_loss": 5.4137, "loss": 1.3015, "step": 1926, "text_contrastive_loss": 0.7409, "train_positive_log_prob": -79.2809, "train_positive_token_accuracy": 0.0886, "train_positive_token_prob": 0.0326 }, { "contrastive_loss": 0.3488, "epoch": 4.349887133182844, "grad_norm": 12.974628448486328, "learning_rate": 4.3030748860800606e-07, "lm_loss": 5.4622, "loss": 1.2245, "step": 1927, "text_contrastive_loss": 0.6589, "train_positive_log_prob": -81.2073, "train_positive_token_accuracy": 0.0767, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3219, "epoch": 4.3521444695259595, "grad_norm": 13.554567337036133, "learning_rate": 4.273676728486925e-07, "lm_loss": 5.4657, "loss": 1.2173, "step": 1928, "text_contrastive_loss": 0.6976, "train_positive_log_prob": -81.0887, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3091, "epoch": 4.354401805869075, "grad_norm": 11.172772407531738, "learning_rate": 4.244374854012734e-07, "lm_loss": 5.3847, "loss": 1.1814, "step": 1929, "text_contrastive_loss": 0.6675, "train_positive_log_prob": -79.6139, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3764, "epoch": 4.356659142212189, "grad_norm": 12.820956230163574, "learning_rate": 4.215169324356666e-07, "lm_loss": 5.4759, "loss": 1.3309, "step": 1930, "text_contrastive_loss": 0.8139, "train_positive_log_prob": -80.7251, "train_positive_token_accuracy": 0.0829, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.2686, "epoch": 4.3589164785553045, "grad_norm": 12.839651107788086, "learning_rate": 4.186060201014991e-07, "lm_loss": 5.4411, "loss": 1.1247, "step": 1931, "text_contrastive_loss": 0.624, "train_positive_log_prob": -78.1693, "train_positive_token_accuracy": 0.0805, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3252, "epoch": 4.36117381489842, "grad_norm": 12.507498741149902, "learning_rate": 4.157047545281029e-07, "lm_loss": 5.4765, "loss": 1.2511, "step": 1932, "text_contrastive_loss": 0.7564, "train_positive_log_prob": -80.7387, "train_positive_token_accuracy": 0.0681, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3636, "epoch": 4.363431151241535, "grad_norm": 14.36543083190918, "learning_rate": 4.1281314182449405e-07, "lm_loss": 5.4303, "loss": 1.3354, "step": 1933, "text_contrastive_loss": 0.8575, "train_positive_log_prob": -80.0147, "train_positive_token_accuracy": 0.0732, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3823, "epoch": 4.3656884875846504, "grad_norm": 13.869869232177734, "learning_rate": 4.099311880793655e-07, "lm_loss": 5.3796, "loss": 1.3698, "step": 1934, "text_contrastive_loss": 0.8991, "train_positive_log_prob": -80.949, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.291, "epoch": 4.367945823927765, "grad_norm": 10.774118423461914, "learning_rate": 4.070588993610697e-07, "lm_loss": 5.5101, "loss": 1.1343, "step": 1935, "text_contrastive_loss": 0.5845, "train_positive_log_prob": -81.6393, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3525, "epoch": 4.37020316027088, "grad_norm": 12.817048072814941, "learning_rate": 4.0419628171760927e-07, "lm_loss": 5.4295, "loss": 1.2563, "step": 1936, "text_contrastive_loss": 0.7219, "train_positive_log_prob": -78.9835, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.329, "epoch": 4.3724604966139955, "grad_norm": 12.01680850982666, "learning_rate": 4.0134334117662375e-07, "lm_loss": 5.4157, "loss": 1.3076, "step": 1937, "text_contrastive_loss": 0.874, "train_positive_log_prob": -81.4031, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3601, "epoch": 4.374717832957111, "grad_norm": 13.025857925415039, "learning_rate": 3.985000837453756e-07, "lm_loss": 5.491, "loss": 1.2922, "step": 1938, "text_contrastive_loss": 0.766, "train_positive_log_prob": -80.6333, "train_positive_token_accuracy": 0.0843, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3186, "epoch": 4.376975169300226, "grad_norm": 11.83201789855957, "learning_rate": 3.9566651541073586e-07, "lm_loss": 5.3517, "loss": 1.1773, "step": 1939, "text_contrastive_loss": 0.647, "train_positive_log_prob": -81.2197, "train_positive_token_accuracy": 0.0844, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3609, "epoch": 4.3792325056433405, "grad_norm": 11.843232154846191, "learning_rate": 3.928426421391773e-07, "lm_loss": 5.4419, "loss": 1.2533, "step": 1940, "text_contrastive_loss": 0.6963, "train_positive_log_prob": -79.7574, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3211, "epoch": 4.381489841986456, "grad_norm": 10.878660202026367, "learning_rate": 3.9002846987675704e-07, "lm_loss": 5.5271, "loss": 1.238, "step": 1941, "text_contrastive_loss": 0.7284, "train_positive_log_prob": -82.4716, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3773, "epoch": 4.383747178329571, "grad_norm": 12.85601806640625, "learning_rate": 3.872240045491055e-07, "lm_loss": 5.4526, "loss": 1.2866, "step": 1942, "text_contrastive_loss": 0.7282, "train_positive_log_prob": -80.0619, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.2619, "epoch": 4.3860045146726865, "grad_norm": 10.641718864440918, "learning_rate": 3.8442925206141237e-07, "lm_loss": 5.4418, "loss": 1.1808, "step": 1943, "text_contrastive_loss": 0.7495, "train_positive_log_prob": -80.9498, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3433, "epoch": 4.388261851015802, "grad_norm": 13.001375198364258, "learning_rate": 3.8164421829841756e-07, "lm_loss": 5.4389, "loss": 1.3035, "step": 1944, "text_contrastive_loss": 0.8326, "train_positive_log_prob": -81.3895, "train_positive_token_accuracy": 0.0733, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3319, "epoch": 4.390519187358916, "grad_norm": 12.554210662841797, "learning_rate": 3.7886890912439633e-07, "lm_loss": 5.4099, "loss": 1.2898, "step": 1945, "text_contrastive_loss": 0.8339, "train_positive_log_prob": -79.8156, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.2732, "epoch": 4.3927765237020315, "grad_norm": 10.95527172088623, "learning_rate": 3.761033303831474e-07, "lm_loss": 5.3417, "loss": 1.1577, "step": 1946, "text_contrastive_loss": 0.7006, "train_positive_log_prob": -79.8084, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.3254, "epoch": 4.395033860045147, "grad_norm": 11.946934700012207, "learning_rate": 3.733474878979798e-07, "lm_loss": 5.5016, "loss": 1.2357, "step": 1947, "text_contrastive_loss": 0.7203, "train_positive_log_prob": -81.7702, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.3162, "epoch": 4.397291196388262, "grad_norm": 11.75720500946045, "learning_rate": 3.706013874717024e-07, "lm_loss": 5.4124, "loss": 1.2045, "step": 1948, "text_contrastive_loss": 0.694, "train_positive_log_prob": -78.4789, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3482, "epoch": 4.399548532731377, "grad_norm": 12.269476890563965, "learning_rate": 3.678650348866114e-07, "lm_loss": 5.4388, "loss": 1.2701, "step": 1949, "text_contrastive_loss": 0.756, "train_positive_log_prob": -81.4512, "train_positive_token_accuracy": 0.0725, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3276, "epoch": 4.401805869074492, "grad_norm": 11.407697677612305, "learning_rate": 3.651384359044774e-07, "lm_loss": 5.4254, "loss": 1.2115, "step": 1950, "text_contrastive_loss": 0.6827, "train_positive_log_prob": -78.9622, "train_positive_token_accuracy": 0.0835, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.2941, "epoch": 4.404063205417607, "grad_norm": 11.566184997558594, "learning_rate": 3.6242159626653004e-07, "lm_loss": 5.2775, "loss": 1.2618, "step": 1951, "text_contrastive_loss": 0.8798, "train_positive_log_prob": -79.4109, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.3303, "epoch": 4.4063205417607225, "grad_norm": 12.965038299560547, "learning_rate": 3.597145216934556e-07, "lm_loss": 5.5454, "loss": 1.3134, "step": 1952, "text_contrastive_loss": 0.8572, "train_positive_log_prob": -83.4865, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.2828, "epoch": 4.408577878103838, "grad_norm": 11.011374473571777, "learning_rate": 3.570172178853731e-07, "lm_loss": 5.4672, "loss": 1.2173, "step": 1953, "text_contrastive_loss": 0.7756, "train_positive_log_prob": -81.5175, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3069, "epoch": 4.410835214446952, "grad_norm": 12.267878532409668, "learning_rate": 3.5432969052183186e-07, "lm_loss": 5.443, "loss": 1.2058, "step": 1954, "text_contrastive_loss": 0.7092, "train_positive_log_prob": -78.9866, "train_positive_token_accuracy": 0.0756, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.2954, "epoch": 4.413092550790068, "grad_norm": 11.222500801086426, "learning_rate": 3.516519452617922e-07, "lm_loss": 5.4162, "loss": 1.1945, "step": 1955, "text_contrastive_loss": 0.7149, "train_positive_log_prob": -79.4485, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3573, "epoch": 4.415349887133183, "grad_norm": 12.776378631591797, "learning_rate": 3.4898398774361854e-07, "lm_loss": 5.2865, "loss": 1.2416, "step": 1956, "text_contrastive_loss": 0.7113, "train_positive_log_prob": -77.7679, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.2655, "epoch": 4.417607223476298, "grad_norm": 12.210612297058105, "learning_rate": 3.463258235850653e-07, "lm_loss": 5.4574, "loss": 1.2067, "step": 1957, "text_contrastive_loss": 0.7909, "train_positive_log_prob": -82.1617, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3802, "epoch": 4.4198645598194135, "grad_norm": 12.871219635009766, "learning_rate": 3.4367745838326807e-07, "lm_loss": 5.4506, "loss": 1.33, "step": 1958, "text_contrastive_loss": 0.8096, "train_positive_log_prob": -80.2472, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3668, "epoch": 4.422121896162528, "grad_norm": 12.472192764282227, "learning_rate": 3.410388977147244e-07, "lm_loss": 5.3762, "loss": 1.2986, "step": 1959, "text_contrastive_loss": 0.7884, "train_positive_log_prob": -80.2454, "train_positive_token_accuracy": 0.0822, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.2747, "epoch": 4.424379232505643, "grad_norm": 11.645474433898926, "learning_rate": 3.3841014713529184e-07, "lm_loss": 5.4041, "loss": 1.1202, "step": 1960, "text_contrastive_loss": 0.6103, "train_positive_log_prob": -80.7221, "train_positive_token_accuracy": 0.074, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.427, "epoch": 4.426636568848759, "grad_norm": 13.484107971191406, "learning_rate": 3.357912121801682e-07, "lm_loss": 5.3172, "loss": 1.4158, "step": 1961, "text_contrastive_loss": 0.9143, "train_positive_log_prob": -77.3503, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3313, "epoch": 4.428893905191874, "grad_norm": 11.793492317199707, "learning_rate": 3.331820983638867e-07, "lm_loss": 5.3923, "loss": 1.2893, "step": 1962, "text_contrastive_loss": 0.8376, "train_positive_log_prob": -78.7351, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3317, "epoch": 4.431151241534989, "grad_norm": 13.229374885559082, "learning_rate": 3.3058281118029553e-07, "lm_loss": 5.5145, "loss": 1.2835, "step": 1963, "text_contrastive_loss": 0.8007, "train_positive_log_prob": -83.6229, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3781, "epoch": 4.433408577878104, "grad_norm": 11.081595420837402, "learning_rate": 3.279933561025567e-07, "lm_loss": 5.4846, "loss": 1.3576, "step": 1964, "text_contrastive_loss": 0.8622, "train_positive_log_prob": -81.1751, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3986, "epoch": 4.435665914221219, "grad_norm": 13.550172805786133, "learning_rate": 3.254137385831263e-07, "lm_loss": 5.3798, "loss": 1.2674, "step": 1965, "text_contrastive_loss": 0.6616, "train_positive_log_prob": -79.9192, "train_positive_token_accuracy": 0.0763, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3653, "epoch": 4.437923250564334, "grad_norm": 13.075263977050781, "learning_rate": 3.2284396405374787e-07, "lm_loss": 5.4491, "loss": 1.3857, "step": 1966, "text_contrastive_loss": 0.951, "train_positive_log_prob": -80.6276, "train_positive_token_accuracy": 0.0721, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3097, "epoch": 4.4401805869074495, "grad_norm": 11.862391471862793, "learning_rate": 3.202840379254374e-07, "lm_loss": 5.4522, "loss": 1.2188, "step": 1967, "text_contrastive_loss": 0.7277, "train_positive_log_prob": -81.4462, "train_positive_token_accuracy": 0.0863, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3162, "epoch": 4.442437923250564, "grad_norm": 12.69119930267334, "learning_rate": 3.177339655884737e-07, "lm_loss": 5.351, "loss": 1.2078, "step": 1968, "text_contrastive_loss": 0.713, "train_positive_log_prob": -77.9056, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.4019, "epoch": 4.444695259593679, "grad_norm": 14.639025688171387, "learning_rate": 3.151937524123905e-07, "lm_loss": 5.3963, "loss": 1.374, "step": 1969, "text_contrastive_loss": 0.8648, "train_positive_log_prob": -80.1044, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.312, "epoch": 4.446952595936795, "grad_norm": 12.351846694946289, "learning_rate": 3.1266340374595693e-07, "lm_loss": 5.3913, "loss": 1.2669, "step": 1970, "text_contrastive_loss": 0.8316, "train_positive_log_prob": -80.0297, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.2739, "epoch": 4.44920993227991, "grad_norm": 11.822172164916992, "learning_rate": 3.1014292491717444e-07, "lm_loss": 5.4641, "loss": 1.1646, "step": 1971, "text_contrastive_loss": 0.6886, "train_positive_log_prob": -81.6376, "train_positive_token_accuracy": 0.0871, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.3555, "epoch": 4.451467268623025, "grad_norm": 13.001505851745605, "learning_rate": 3.076323212332605e-07, "lm_loss": 5.5178, "loss": 1.3104, "step": 1972, "text_contrastive_loss": 0.8062, "train_positive_log_prob": -82.5088, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.2463, "epoch": 4.45372460496614, "grad_norm": 9.949920654296875, "learning_rate": 3.0513159798063906e-07, "lm_loss": 5.4626, "loss": 1.176, "step": 1973, "text_contrastive_loss": 0.767, "train_positive_log_prob": -81.6643, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.2975, "epoch": 4.455981941309255, "grad_norm": 11.72691535949707, "learning_rate": 3.026407604249315e-07, "lm_loss": 5.4633, "loss": 1.1925, "step": 1974, "text_contrastive_loss": 0.6973, "train_positive_log_prob": -81.5643, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3434, "epoch": 4.45823927765237, "grad_norm": 13.088930130004883, "learning_rate": 3.0015981381094073e-07, "lm_loss": 5.4003, "loss": 1.406, "step": 1975, "text_contrastive_loss": 1.0453, "train_positive_log_prob": -80.1802, "train_positive_token_accuracy": 0.0752, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3534, "epoch": 4.460496613995486, "grad_norm": 13.003207206726074, "learning_rate": 2.976887633626435e-07, "lm_loss": 5.408, "loss": 1.2628, "step": 1976, "text_contrastive_loss": 0.7372, "train_positive_log_prob": -80.3831, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.2753, "epoch": 4.4627539503386, "grad_norm": 11.459562301635742, "learning_rate": 2.952276142831806e-07, "lm_loss": 5.3072, "loss": 1.1391, "step": 1977, "text_contrastive_loss": 0.6662, "train_positive_log_prob": -79.3733, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3029, "epoch": 4.465011286681715, "grad_norm": 12.701268196105957, "learning_rate": 2.9277637175484376e-07, "lm_loss": 5.4284, "loss": 1.2047, "step": 1978, "text_contrastive_loss": 0.7179, "train_positive_log_prob": -78.9638, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3661, "epoch": 4.467268623024831, "grad_norm": 12.158988952636719, "learning_rate": 2.9033504093906207e-07, "lm_loss": 5.3832, "loss": 1.2573, "step": 1979, "text_contrastive_loss": 0.7057, "train_positive_log_prob": -78.1847, "train_positive_token_accuracy": 0.0777, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.384, "epoch": 4.469525959367946, "grad_norm": 13.560571670532227, "learning_rate": 2.8790362697639685e-07, "lm_loss": 5.4802, "loss": 1.2471, "step": 1980, "text_contrastive_loss": 0.6301, "train_positive_log_prob": -79.8893, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3292, "epoch": 4.471783295711061, "grad_norm": 12.4685697555542, "learning_rate": 2.854821349865289e-07, "lm_loss": 5.3318, "loss": 1.3252, "step": 1981, "text_contrastive_loss": 0.9256, "train_positive_log_prob": -79.1013, "train_positive_token_accuracy": 0.0887, "train_positive_token_prob": 0.0339 }, { "contrastive_loss": 0.344, "epoch": 4.474040632054176, "grad_norm": 11.924105644226074, "learning_rate": 2.8307057006824514e-07, "lm_loss": 5.4745, "loss": 1.3361, "step": 1982, "text_contrastive_loss": 0.8894, "train_positive_log_prob": -81.9041, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.2922, "epoch": 4.476297968397291, "grad_norm": 12.074453353881836, "learning_rate": 2.806689372994292e-07, "lm_loss": 5.4906, "loss": 1.2187, "step": 1983, "text_contrastive_loss": 0.7548, "train_positive_log_prob": -82.8968, "train_positive_token_accuracy": 0.0735, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.4259, "epoch": 4.478555304740406, "grad_norm": 15.743043899536133, "learning_rate": 2.7827724173705273e-07, "lm_loss": 5.4123, "loss": 1.4059, "step": 1984, "text_contrastive_loss": 0.8776, "train_positive_log_prob": -78.8145, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.4042, "epoch": 4.480812641083522, "grad_norm": 12.900703430175781, "learning_rate": 2.7589548841716274e-07, "lm_loss": 5.4043, "loss": 1.3865, "step": 1985, "text_contrastive_loss": 0.8838, "train_positive_log_prob": -79.5618, "train_positive_token_accuracy": 0.073, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.3141, "epoch": 4.483069977426637, "grad_norm": 12.590093612670898, "learning_rate": 2.735236823548715e-07, "lm_loss": 5.4271, "loss": 1.2328, "step": 1986, "text_contrastive_loss": 0.752, "train_positive_log_prob": -80.7912, "train_positive_token_accuracy": 0.0853, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.3985, "epoch": 4.485327313769751, "grad_norm": 13.033928871154785, "learning_rate": 2.711618285443457e-07, "lm_loss": 5.4622, "loss": 1.3368, "step": 1987, "text_contrastive_loss": 0.7842, "train_positive_log_prob": -80.5889, "train_positive_token_accuracy": 0.0789, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3857, "epoch": 4.487584650112867, "grad_norm": 13.071440696716309, "learning_rate": 2.6880993195879614e-07, "lm_loss": 5.3629, "loss": 1.2775, "step": 1988, "text_contrastive_loss": 0.7109, "train_positive_log_prob": -78.0521, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.2822, "epoch": 4.489841986455982, "grad_norm": 14.284415245056152, "learning_rate": 2.6646799755046746e-07, "lm_loss": 5.4118, "loss": 1.1507, "step": 1989, "text_contrastive_loss": 0.6547, "train_positive_log_prob": -80.6014, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0296 }, { "contrastive_loss": 0.2255, "epoch": 4.492099322799097, "grad_norm": 9.530472755432129, "learning_rate": 2.64136030250628e-07, "lm_loss": 5.3235, "loss": 1.0414, "step": 1990, "text_contrastive_loss": 0.5671, "train_positive_log_prob": -76.9655, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3283, "epoch": 4.494356659142213, "grad_norm": 12.200201988220215, "learning_rate": 2.618140349695575e-07, "lm_loss": 5.3967, "loss": 1.2619, "step": 1991, "text_contrastive_loss": 0.7879, "train_positive_log_prob": -79.5396, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3181, "epoch": 4.496613995485327, "grad_norm": 13.046611785888672, "learning_rate": 2.595020165965401e-07, "lm_loss": 5.3985, "loss": 1.1363, "step": 1992, "text_contrastive_loss": 0.5566, "train_positive_log_prob": -79.5424, "train_positive_token_accuracy": 0.075, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4152, "epoch": 4.498871331828442, "grad_norm": 15.92698860168457, "learning_rate": 2.571999799998509e-07, "lm_loss": 5.3889, "loss": 1.3561, "step": 1993, "text_contrastive_loss": 0.8039, "train_positive_log_prob": -79.6548, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.4907, "epoch": 4.501128668171558, "grad_norm": 14.646669387817383, "learning_rate": 2.549079300267482e-07, "lm_loss": 5.404, "loss": 1.4439, "step": 1994, "text_contrastive_loss": 0.8257, "train_positive_log_prob": -80.0659, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3904, "epoch": 4.503386004514673, "grad_norm": 13.377157211303711, "learning_rate": 2.526258715034602e-07, "lm_loss": 5.4566, "loss": 1.367, "step": 1995, "text_contrastive_loss": 0.8619, "train_positive_log_prob": -80.9489, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3421, "epoch": 4.505643340857787, "grad_norm": 11.762893676757812, "learning_rate": 2.503538092351782e-07, "lm_loss": 5.4225, "loss": 1.2789, "step": 1996, "text_contrastive_loss": 0.7892, "train_positive_log_prob": -80.2756, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.2713, "epoch": 4.507900677200903, "grad_norm": 11.286093711853027, "learning_rate": 2.480917480060441e-07, "lm_loss": 5.4995, "loss": 1.1857, "step": 1997, "text_contrastive_loss": 0.729, "train_positive_log_prob": -84.0377, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3871, "epoch": 4.510158013544018, "grad_norm": 11.77099323272705, "learning_rate": 2.458396925791434e-07, "lm_loss": 5.5436, "loss": 1.3411, "step": 1998, "text_contrastive_loss": 0.7993, "train_positive_log_prob": -83.414, "train_positive_token_accuracy": 0.0719, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.2465, "epoch": 4.512415349887133, "grad_norm": 10.904062271118164, "learning_rate": 2.4359764769648907e-07, "lm_loss": 5.4694, "loss": 1.16, "step": 1999, "text_contrastive_loss": 0.7331, "train_positive_log_prob": -81.9734, "train_positive_token_accuracy": 0.0868, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3651, "epoch": 4.514672686230249, "grad_norm": 12.968461036682129, "learning_rate": 2.4136561807901916e-07, "lm_loss": 5.439, "loss": 1.3276, "step": 2000, "text_contrastive_loss": 0.8373, "train_positive_log_prob": -80.5832, "train_positive_token_accuracy": 0.0862, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.2988, "epoch": 4.516930022573363, "grad_norm": 11.34416675567627, "learning_rate": 2.391436084265814e-07, "lm_loss": 5.4296, "loss": 1.1813, "step": 2001, "text_contrastive_loss": 0.679, "train_positive_log_prob": -80.3034, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.2805, "epoch": 4.519187358916478, "grad_norm": 10.901216506958008, "learning_rate": 2.3693162341792532e-07, "lm_loss": 5.3696, "loss": 1.1619, "step": 2002, "text_contrastive_loss": 0.6888, "train_positive_log_prob": -77.397, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.3483, "epoch": 4.521444695259594, "grad_norm": 12.048800468444824, "learning_rate": 2.347296677106925e-07, "lm_loss": 5.4966, "loss": 1.3013, "step": 2003, "text_contrastive_loss": 0.8066, "train_positive_log_prob": -81.4393, "train_positive_token_accuracy": 0.0762, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3522, "epoch": 4.523702031602709, "grad_norm": 11.89638614654541, "learning_rate": 2.3253774594140633e-07, "lm_loss": 5.412, "loss": 1.2412, "step": 2004, "text_contrastive_loss": 0.6956, "train_positive_log_prob": -79.8289, "train_positive_token_accuracy": 0.0731, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3092, "epoch": 4.525959367945823, "grad_norm": 12.427459716796875, "learning_rate": 2.3035586272546207e-07, "lm_loss": 5.3236, "loss": 1.321, "step": 2005, "text_contrastive_loss": 0.9589, "train_positive_log_prob": -77.9007, "train_positive_token_accuracy": 0.0729, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.2784, "epoch": 4.528216704288939, "grad_norm": 11.306635856628418, "learning_rate": 2.2818402265711858e-07, "lm_loss": 5.5013, "loss": 1.1735, "step": 2006, "text_contrastive_loss": 0.6899, "train_positive_log_prob": -81.2113, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3845, "epoch": 4.530474040632054, "grad_norm": 15.276398658752441, "learning_rate": 2.2602223030948445e-07, "lm_loss": 5.381, "loss": 1.3115, "step": 2007, "text_contrastive_loss": 0.7777, "train_positive_log_prob": -81.9315, "train_positive_token_accuracy": 0.0893, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.2927, "epoch": 4.532731376975169, "grad_norm": 11.345015525817871, "learning_rate": 2.2387049023451458e-07, "lm_loss": 5.3351, "loss": 1.1545, "step": 2008, "text_contrastive_loss": 0.6567, "train_positive_log_prob": -78.8331, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.3535, "epoch": 4.534988713318285, "grad_norm": 15.586918830871582, "learning_rate": 2.2172880696299692e-07, "lm_loss": 5.3683, "loss": 1.2256, "step": 2009, "text_contrastive_loss": 0.6706, "train_positive_log_prob": -79.2257, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.2647, "epoch": 4.5372460496614, "grad_norm": 10.706208229064941, "learning_rate": 2.1959718500454196e-07, "lm_loss": 5.5631, "loss": 1.1452, "step": 2010, "text_contrastive_loss": 0.6484, "train_positive_log_prob": -82.7724, "train_positive_token_accuracy": 0.073, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3174, "epoch": 4.539503386004514, "grad_norm": 12.67294692993164, "learning_rate": 2.17475628847576e-07, "lm_loss": 5.387, "loss": 1.2475, "step": 2011, "text_contrastive_loss": 0.7827, "train_positive_log_prob": -79.5876, "train_positive_token_accuracy": 0.0812, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.2579, "epoch": 4.54176072234763, "grad_norm": 11.39964771270752, "learning_rate": 2.1536414295932896e-07, "lm_loss": 5.4511, "loss": 1.1641, "step": 2012, "text_contrastive_loss": 0.7222, "train_positive_log_prob": -79.8067, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.4161, "epoch": 4.544018058690745, "grad_norm": 13.330033302307129, "learning_rate": 2.1326273178582822e-07, "lm_loss": 5.305, "loss": 1.4528, "step": 2013, "text_contrastive_loss": 1.0125, "train_positive_log_prob": -77.4401, "train_positive_token_accuracy": 0.0851, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.373, "epoch": 4.54627539503386, "grad_norm": 12.099190711975098, "learning_rate": 2.1117139975188716e-07, "lm_loss": 5.4084, "loss": 1.2817, "step": 2014, "text_contrastive_loss": 0.7357, "train_positive_log_prob": -80.1291, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.2718, "epoch": 4.548532731376975, "grad_norm": 12.181597709655762, "learning_rate": 2.0909015126109488e-07, "lm_loss": 5.368, "loss": 1.1887, "step": 2015, "text_contrastive_loss": 0.7603, "train_positive_log_prob": -81.444, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.2917, "epoch": 4.55079006772009, "grad_norm": 11.465213775634766, "learning_rate": 2.070189906958081e-07, "lm_loss": 5.4248, "loss": 1.1875, "step": 2016, "text_contrastive_loss": 0.7067, "train_positive_log_prob": -80.5051, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.276, "epoch": 4.553047404063205, "grad_norm": 11.312572479248047, "learning_rate": 2.0495792241714386e-07, "lm_loss": 5.4656, "loss": 1.2279, "step": 2017, "text_contrastive_loss": 0.8106, "train_positive_log_prob": -80.32, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.3549, "epoch": 4.555304740406321, "grad_norm": 14.06812858581543, "learning_rate": 2.029069507649678e-07, "lm_loss": 5.4875, "loss": 1.3419, "step": 2018, "text_contrastive_loss": 0.8765, "train_positive_log_prob": -81.2641, "train_positive_token_accuracy": 0.0707, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.2721, "epoch": 4.557562076749436, "grad_norm": 10.342501640319824, "learning_rate": 2.0086608005788376e-07, "lm_loss": 5.3842, "loss": 1.1507, "step": 2019, "text_contrastive_loss": 0.6803, "train_positive_log_prob": -78.0244, "train_positive_token_accuracy": 0.083, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.2837, "epoch": 4.5598194130925505, "grad_norm": 11.828228950500488, "learning_rate": 1.988353145932298e-07, "lm_loss": 5.4307, "loss": 1.1625, "step": 2020, "text_contrastive_loss": 0.6716, "train_positive_log_prob": -79.9561, "train_positive_token_accuracy": 0.0804, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.2986, "epoch": 4.562076749435666, "grad_norm": 12.166635513305664, "learning_rate": 1.9681465864706372e-07, "lm_loss": 5.4377, "loss": 1.2576, "step": 2021, "text_contrastive_loss": 0.8305, "train_positive_log_prob": -81.2903, "train_positive_token_accuracy": 0.0868, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3584, "epoch": 4.564334085778781, "grad_norm": 12.779674530029297, "learning_rate": 1.9480411647415708e-07, "lm_loss": 5.4291, "loss": 1.2428, "step": 2022, "text_contrastive_loss": 0.683, "train_positive_log_prob": -79.7797, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.4095, "epoch": 4.566591422121896, "grad_norm": 12.529008865356445, "learning_rate": 1.9280369230798568e-07, "lm_loss": 5.365, "loss": 1.3624, "step": 2023, "text_contrastive_loss": 0.8327, "train_positive_log_prob": -76.9113, "train_positive_token_accuracy": 0.0815, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.2817, "epoch": 4.568848758465011, "grad_norm": 12.580851554870605, "learning_rate": 1.9081339036071956e-07, "lm_loss": 5.3876, "loss": 1.2247, "step": 2024, "text_contrastive_loss": 0.8087, "train_positive_log_prob": -79.8277, "train_positive_token_accuracy": 0.0754, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.2684, "epoch": 4.571106094808126, "grad_norm": 10.923868179321289, "learning_rate": 1.8883321482321583e-07, "lm_loss": 5.4357, "loss": 1.1561, "step": 2025, "text_contrastive_loss": 0.6884, "train_positive_log_prob": -79.1586, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3664, "epoch": 4.573363431151241, "grad_norm": 13.49759578704834, "learning_rate": 1.8686316986500974e-07, "lm_loss": 5.4025, "loss": 1.3802, "step": 2026, "text_contrastive_loss": 0.9471, "train_positive_log_prob": -80.7852, "train_positive_token_accuracy": 0.0862, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3016, "epoch": 4.575620767494357, "grad_norm": 10.842415809631348, "learning_rate": 1.8490325963430368e-07, "lm_loss": 5.4726, "loss": 1.2499, "step": 2027, "text_contrastive_loss": 0.8021, "train_positive_log_prob": -79.3777, "train_positive_token_accuracy": 0.0749, "train_positive_token_prob": 0.0293 }, { "contrastive_loss": 0.3296, "epoch": 4.577878103837472, "grad_norm": 11.55229663848877, "learning_rate": 1.829534882579598e-07, "lm_loss": 5.5845, "loss": 1.2516, "step": 2028, "text_contrastive_loss": 0.7272, "train_positive_log_prob": -82.3522, "train_positive_token_accuracy": 0.076, "train_positive_token_prob": 0.0299 }, { "contrastive_loss": 0.3164, "epoch": 4.580135440180587, "grad_norm": 12.349048614501953, "learning_rate": 1.8101385984149343e-07, "lm_loss": 5.3823, "loss": 1.2898, "step": 2029, "text_contrastive_loss": 0.8703, "train_positive_log_prob": -79.4107, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3504, "epoch": 4.582392776523702, "grad_norm": 12.200100898742676, "learning_rate": 1.7908437846906158e-07, "lm_loss": 5.435, "loss": 1.2723, "step": 2030, "text_contrastive_loss": 0.7567, "train_positive_log_prob": -81.249, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3954, "epoch": 4.584650112866817, "grad_norm": 13.013609886169434, "learning_rate": 1.7716504820345427e-07, "lm_loss": 5.3436, "loss": 1.3419, "step": 2031, "text_contrastive_loss": 0.8244, "train_positive_log_prob": -77.6288, "train_positive_token_accuracy": 0.0869, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3222, "epoch": 4.586907449209932, "grad_norm": 11.47140121459961, "learning_rate": 1.752558730860876e-07, "lm_loss": 5.3549, "loss": 1.2048, "step": 2032, "text_contrastive_loss": 0.6943, "train_positive_log_prob": -77.5382, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3463, "epoch": 4.589164785553048, "grad_norm": 13.19788646697998, "learning_rate": 1.733568571369948e-07, "lm_loss": 5.4858, "loss": 1.2803, "step": 2033, "text_contrastive_loss": 0.7707, "train_positive_log_prob": -80.9162, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3182, "epoch": 4.591422121896162, "grad_norm": 11.778922080993652, "learning_rate": 1.7146800435481837e-07, "lm_loss": 5.3939, "loss": 1.2261, "step": 2034, "text_contrastive_loss": 0.7371, "train_positive_log_prob": -79.3073, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4292, "epoch": 4.5936794582392775, "grad_norm": 14.718167304992676, "learning_rate": 1.6958931871679908e-07, "lm_loss": 5.5364, "loss": 1.3682, "step": 2035, "text_contrastive_loss": 0.7706, "train_positive_log_prob": -83.2176, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3166, "epoch": 4.595936794582393, "grad_norm": 12.074480056762695, "learning_rate": 1.677208041787698e-07, "lm_loss": 5.4131, "loss": 1.1992, "step": 2036, "text_contrastive_loss": 0.6826, "train_positive_log_prob": -79.6951, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3273, "epoch": 4.598194130925508, "grad_norm": 13.735128402709961, "learning_rate": 1.6586246467514833e-07, "lm_loss": 5.411, "loss": 1.305, "step": 2037, "text_contrastive_loss": 0.8731, "train_positive_log_prob": -81.145, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4123, "epoch": 4.600451467268623, "grad_norm": 13.067122459411621, "learning_rate": 1.6401430411892572e-07, "lm_loss": 5.3216, "loss": 1.3894, "step": 2038, "text_contrastive_loss": 0.8898, "train_positive_log_prob": -80.0312, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0331 }, { "contrastive_loss": 0.31, "epoch": 4.602708803611738, "grad_norm": 10.924623489379883, "learning_rate": 1.621763264016607e-07, "lm_loss": 5.4647, "loss": 1.1342, "step": 2039, "text_contrastive_loss": 0.5556, "train_positive_log_prob": -81.1827, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3662, "epoch": 4.604966139954853, "grad_norm": 13.684930801391602, "learning_rate": 1.603485353934703e-07, "lm_loss": 5.4645, "loss": 1.3618, "step": 2040, "text_contrastive_loss": 0.8984, "train_positive_log_prob": -82.928, "train_positive_token_accuracy": 0.0855, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.2111, "epoch": 4.6072234762979685, "grad_norm": 10.00831127166748, "learning_rate": 1.5853093494302195e-07, "lm_loss": 5.4779, "loss": 1.058, "step": 2041, "text_contrastive_loss": 0.5982, "train_positive_log_prob": -80.8853, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3747, "epoch": 4.609480812641084, "grad_norm": 14.141894340515137, "learning_rate": 1.567235288775265e-07, "lm_loss": 5.401, "loss": 1.319, "step": 2042, "text_contrastive_loss": 0.8084, "train_positive_log_prob": -78.6915, "train_positive_token_accuracy": 0.081, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.344, "epoch": 4.611738148984198, "grad_norm": 12.464301109313965, "learning_rate": 1.5492632100272686e-07, "lm_loss": 5.4333, "loss": 1.2704, "step": 2043, "text_contrastive_loss": 0.7661, "train_positive_log_prob": -82.3947, "train_positive_token_accuracy": 0.0873, "train_positive_token_prob": 0.033 }, { "contrastive_loss": 0.4377, "epoch": 4.6139954853273135, "grad_norm": 13.692405700683594, "learning_rate": 1.5313931510289482e-07, "lm_loss": 5.3693, "loss": 1.3814, "step": 2044, "text_contrastive_loss": 0.8136, "train_positive_log_prob": -78.9673, "train_positive_token_accuracy": 0.0845, "train_positive_token_prob": 0.0332 }, { "contrastive_loss": 0.2544, "epoch": 4.616252821670429, "grad_norm": 11.105146408081055, "learning_rate": 1.5136251494081822e-07, "lm_loss": 5.446, "loss": 1.1049, "step": 2045, "text_contrastive_loss": 0.6119, "train_positive_log_prob": -79.5606, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.361, "epoch": 4.618510158013544, "grad_norm": 12.92881965637207, "learning_rate": 1.4959592425779768e-07, "lm_loss": 5.3528, "loss": 1.2643, "step": 2046, "text_contrastive_loss": 0.7361, "train_positive_log_prob": -79.4182, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.3699, "epoch": 4.6207674943566595, "grad_norm": 14.304123878479004, "learning_rate": 1.4783954677363376e-07, "lm_loss": 5.4698, "loss": 1.316, "step": 2047, "text_contrastive_loss": 0.7984, "train_positive_log_prob": -82.6011, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3806, "epoch": 4.623024830699774, "grad_norm": 12.955826759338379, "learning_rate": 1.4609338618662318e-07, "lm_loss": 5.4526, "loss": 1.3282, "step": 2048, "text_contrastive_loss": 0.8047, "train_positive_log_prob": -83.1533, "train_positive_token_accuracy": 0.0671, "train_positive_token_prob": 0.0295 }, { "contrastive_loss": 0.3194, "epoch": 4.625282167042889, "grad_norm": 12.944198608398438, "learning_rate": 1.4435744617354975e-07, "lm_loss": 5.4412, "loss": 1.3477, "step": 2049, "text_contrastive_loss": 0.9683, "train_positive_log_prob": -81.9415, "train_positive_token_accuracy": 0.0799, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.305, "epoch": 4.6275395033860045, "grad_norm": 11.911348342895508, "learning_rate": 1.4263173038967627e-07, "lm_loss": 5.5198, "loss": 1.2054, "step": 2050, "text_contrastive_loss": 0.697, "train_positive_log_prob": -81.1809, "train_positive_token_accuracy": 0.0722, "train_positive_token_prob": 0.0289 }, { "contrastive_loss": 0.2567, "epoch": 4.62979683972912, "grad_norm": 11.535606384277344, "learning_rate": 1.409162424687366e-07, "lm_loss": 5.3598, "loss": 1.1905, "step": 2051, "text_contrastive_loss": 0.7955, "train_positive_log_prob": -78.6422, "train_positive_token_accuracy": 0.0756, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.2979, "epoch": 4.632054176072235, "grad_norm": 13.395713806152344, "learning_rate": 1.3921098602292793e-07, "lm_loss": 5.3943, "loss": 1.2463, "step": 2052, "text_contrastive_loss": 0.8179, "train_positive_log_prob": -79.0636, "train_positive_token_accuracy": 0.0861, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.2936, "epoch": 4.6343115124153496, "grad_norm": 11.608037948608398, "learning_rate": 1.3751596464290529e-07, "lm_loss": 5.4302, "loss": 1.2308, "step": 2053, "text_contrastive_loss": 0.7883, "train_positive_log_prob": -79.7558, "train_positive_token_accuracy": 0.0734, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3458, "epoch": 4.636568848758465, "grad_norm": 13.384845733642578, "learning_rate": 1.358311818977709e-07, "lm_loss": 5.3795, "loss": 1.2662, "step": 2054, "text_contrastive_loss": 0.7648, "train_positive_log_prob": -77.6663, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3486, "epoch": 4.63882618510158, "grad_norm": 11.830622673034668, "learning_rate": 1.3415664133506812e-07, "lm_loss": 5.4747, "loss": 1.3308, "step": 2055, "text_contrastive_loss": 0.8694, "train_positive_log_prob": -79.8118, "train_positive_token_accuracy": 0.0758, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3462, "epoch": 4.6410835214446955, "grad_norm": 13.477181434631348, "learning_rate": 1.324923464807759e-07, "lm_loss": 5.4064, "loss": 1.3357, "step": 2056, "text_contrastive_loss": 0.8979, "train_positive_log_prob": -80.4606, "train_positive_token_accuracy": 0.0814, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3742, "epoch": 4.643340857787811, "grad_norm": 12.482406616210938, "learning_rate": 1.308383008392977e-07, "lm_loss": 5.3617, "loss": 1.3185, "step": 2057, "text_contrastive_loss": 0.8163, "train_positive_log_prob": -78.4543, "train_positive_token_accuracy": 0.0723, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3072, "epoch": 4.645598194130925, "grad_norm": 11.806655883789062, "learning_rate": 1.2919450789345477e-07, "lm_loss": 5.4184, "loss": 1.2309, "step": 2058, "text_contrastive_loss": 0.7636, "train_positive_log_prob": -79.0748, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.2766, "epoch": 4.6478555304740405, "grad_norm": 12.432291984558105, "learning_rate": 1.275609711044823e-07, "lm_loss": 5.342, "loss": 1.183, "step": 2059, "text_contrastive_loss": 0.7443, "train_positive_log_prob": -79.4607, "train_positive_token_accuracy": 0.0793, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.406, "epoch": 4.650112866817156, "grad_norm": 12.977935791015625, "learning_rate": 1.2593769391201827e-07, "lm_loss": 5.4356, "loss": 1.4555, "step": 2060, "text_contrastive_loss": 1.0119, "train_positive_log_prob": -80.8732, "train_positive_token_accuracy": 0.0848, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3498, "epoch": 4.652370203160271, "grad_norm": 12.516545295715332, "learning_rate": 1.2432467973409857e-07, "lm_loss": 5.3364, "loss": 1.2904, "step": 2061, "text_contrastive_loss": 0.814, "train_positive_log_prob": -78.9008, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.2692, "epoch": 4.654627539503386, "grad_norm": 10.78467082977295, "learning_rate": 1.2272193196714854e-07, "lm_loss": 5.5324, "loss": 1.1341, "step": 2062, "text_contrastive_loss": 0.6233, "train_positive_log_prob": -82.2738, "train_positive_token_accuracy": 0.0809, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3494, "epoch": 4.656884875846501, "grad_norm": 12.288996696472168, "learning_rate": 1.211294539859753e-07, "lm_loss": 5.4357, "loss": 1.2813, "step": 2063, "text_contrastive_loss": 0.7766, "train_positive_log_prob": -79.4139, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3073, "epoch": 4.659142212189616, "grad_norm": 11.596695899963379, "learning_rate": 1.1954724914376215e-07, "lm_loss": 5.3737, "loss": 1.1777, "step": 2064, "text_contrastive_loss": 0.666, "train_positive_log_prob": -79.1985, "train_positive_token_accuracy": 0.0824, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3061, "epoch": 4.6613995485327315, "grad_norm": 12.038764953613281, "learning_rate": 1.1797532077206187e-07, "lm_loss": 5.4305, "loss": 1.211, "step": 2065, "text_contrastive_loss": 0.7238, "train_positive_log_prob": -81.8811, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.4456, "epoch": 4.663656884875847, "grad_norm": 12.934012413024902, "learning_rate": 1.1641367218078736e-07, "lm_loss": 5.3544, "loss": 1.3961, "step": 2066, "text_contrastive_loss": 0.8301, "train_positive_log_prob": -77.4513, "train_positive_token_accuracy": 0.0864, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3436, "epoch": 4.665914221218961, "grad_norm": 12.084171295166016, "learning_rate": 1.1486230665820552e-07, "lm_loss": 5.2328, "loss": 1.2445, "step": 2067, "text_contrastive_loss": 0.7552, "train_positive_log_prob": -76.1001, "train_positive_token_accuracy": 0.0857, "train_positive_token_prob": 0.0328 }, { "contrastive_loss": 0.4447, "epoch": 4.668171557562077, "grad_norm": 13.250980377197266, "learning_rate": 1.1332122747093277e-07, "lm_loss": 5.4104, "loss": 1.4142, "step": 2068, "text_contrastive_loss": 0.8569, "train_positive_log_prob": -83.3349, "train_positive_token_accuracy": 0.0834, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3151, "epoch": 4.670428893905192, "grad_norm": 13.162932395935059, "learning_rate": 1.1179043786392507e-07, "lm_loss": 5.4408, "loss": 1.2327, "step": 2069, "text_contrastive_loss": 0.7472, "train_positive_log_prob": -83.866, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.2952, "epoch": 4.672686230248307, "grad_norm": 11.531064987182617, "learning_rate": 1.1026994106047296e-07, "lm_loss": 5.4242, "loss": 1.2729, "step": 2070, "text_contrastive_loss": 0.8707, "train_positive_log_prob": -79.7545, "train_positive_token_accuracy": 0.0725, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3958, "epoch": 4.674943566591422, "grad_norm": 13.006556510925293, "learning_rate": 1.0875974026219149e-07, "lm_loss": 5.3074, "loss": 1.3072, "step": 2071, "text_contrastive_loss": 0.7614, "train_positive_log_prob": -78.7218, "train_positive_token_accuracy": 0.088, "train_positive_token_prob": 0.0328 }, { "contrastive_loss": 0.3375, "epoch": 4.677200902934537, "grad_norm": 12.089827537536621, "learning_rate": 1.0725983864901978e-07, "lm_loss": 5.4113, "loss": 1.2594, "step": 2072, "text_contrastive_loss": 0.7614, "train_positive_log_prob": -80.4053, "train_positive_token_accuracy": 0.0828, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3665, "epoch": 4.679458239277652, "grad_norm": 12.759051322937012, "learning_rate": 1.0577023937920816e-07, "lm_loss": 5.499, "loss": 1.3204, "step": 2073, "text_contrastive_loss": 0.808, "train_positive_log_prob": -82.7604, "train_positive_token_accuracy": 0.0771, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3849, "epoch": 4.681715575620768, "grad_norm": 14.711612701416016, "learning_rate": 1.0429094558931485e-07, "lm_loss": 5.3991, "loss": 1.3684, "step": 2074, "text_contrastive_loss": 0.8873, "train_positive_log_prob": -79.6192, "train_positive_token_accuracy": 0.0872, "train_positive_token_prob": 0.0326 }, { "contrastive_loss": 0.3411, "epoch": 4.683972911963883, "grad_norm": 12.863508224487305, "learning_rate": 1.0282196039419823e-07, "lm_loss": 5.4376, "loss": 1.3317, "step": 2075, "text_contrastive_loss": 0.8936, "train_positive_log_prob": -80.5236, "train_positive_token_accuracy": 0.0744, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3127, "epoch": 4.686230248306998, "grad_norm": 10.788444519042969, "learning_rate": 1.0136328688700958e-07, "lm_loss": 5.4425, "loss": 1.2224, "step": 2076, "text_contrastive_loss": 0.731, "train_positive_log_prob": -79.3002, "train_positive_token_accuracy": 0.0718, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3667, "epoch": 4.688487584650113, "grad_norm": 13.088545799255371, "learning_rate": 9.99149281391898e-08, "lm_loss": 5.3919, "loss": 1.2321, "step": 2077, "text_contrastive_loss": 0.6524, "train_positive_log_prob": -78.898, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3255, "epoch": 4.690744920993228, "grad_norm": 11.597572326660156, "learning_rate": 9.847688720045878e-08, "lm_loss": 5.4269, "loss": 1.2331, "step": 2078, "text_contrastive_loss": 0.73, "train_positive_log_prob": -79.6855, "train_positive_token_accuracy": 0.0843, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.2471, "epoch": 4.693002257336343, "grad_norm": 11.617135047912598, "learning_rate": 9.704916709881052e-08, "lm_loss": 5.4324, "loss": 1.145, "step": 2079, "text_contrastive_loss": 0.7092, "train_positive_log_prob": -79.9204, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3364, "epoch": 4.6952595936794586, "grad_norm": 11.719111442565918, "learning_rate": 9.5631770840508e-08, "lm_loss": 5.5484, "loss": 1.234, "step": 2080, "text_contrastive_loss": 0.6854, "train_positive_log_prob": -83.6129, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3692, "epoch": 4.697516930022573, "grad_norm": 12.207919120788574, "learning_rate": 9.422470141007667e-08, "lm_loss": 5.4526, "loss": 1.2597, "step": 2081, "text_contrastive_loss": 0.6903, "train_positive_log_prob": -82.4028, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.336, "epoch": 4.699774266365688, "grad_norm": 12.443987846374512, "learning_rate": 9.282796177029596e-08, "lm_loss": 5.4218, "loss": 1.2955, "step": 2082, "text_contrastive_loss": 0.8346, "train_positive_log_prob": -79.5021, "train_positive_token_accuracy": 0.0815, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.4101, "epoch": 4.702031602708804, "grad_norm": 12.711996078491211, "learning_rate": 9.144155486219442e-08, "lm_loss": 5.4367, "loss": 1.4459, "step": 2083, "text_contrastive_loss": 0.9843, "train_positive_log_prob": -80.0858, "train_positive_token_accuracy": 0.0785, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.2777, "epoch": 4.704288939051919, "grad_norm": 10.978333473205566, "learning_rate": 9.006548360504463e-08, "lm_loss": 5.4455, "loss": 1.1272, "step": 2084, "text_contrastive_loss": 0.61, "train_positive_log_prob": -80.8739, "train_positive_token_accuracy": 0.0784, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3009, "epoch": 4.706546275395034, "grad_norm": 12.159939765930176, "learning_rate": 8.869975089635552e-08, "lm_loss": 5.4226, "loss": 1.222, "step": 2085, "text_contrastive_loss": 0.7578, "train_positive_log_prob": -81.4469, "train_positive_token_accuracy": 0.0772, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3548, "epoch": 4.708803611738149, "grad_norm": 11.7377347946167, "learning_rate": 8.734435961186782e-08, "lm_loss": 5.4025, "loss": 1.2418, "step": 2086, "text_contrastive_loss": 0.6935, "train_positive_log_prob": -79.0716, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3235, "epoch": 4.711060948081264, "grad_norm": 12.328231811523438, "learning_rate": 8.599931260554417e-08, "lm_loss": 5.5571, "loss": 1.2996, "step": 2087, "text_contrastive_loss": 0.8409, "train_positive_log_prob": -82.8615, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.2976, "epoch": 4.713318284424379, "grad_norm": 10.746467590332031, "learning_rate": 8.466461270956794e-08, "lm_loss": 5.4189, "loss": 1.2427, "step": 2088, "text_contrastive_loss": 0.8063, "train_positive_log_prob": -79.5882, "train_positive_token_accuracy": 0.0867, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.2718, "epoch": 4.715575620767495, "grad_norm": 11.545552253723145, "learning_rate": 8.334026273433659e-08, "lm_loss": 5.3835, "loss": 1.234, "step": 2089, "text_contrastive_loss": 0.8478, "train_positive_log_prob": -78.9033, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.2311, "epoch": 4.717832957110609, "grad_norm": 10.946799278259277, "learning_rate": 8.202626546845172e-08, "lm_loss": 5.4353, "loss": 1.071, "step": 2090, "text_contrastive_loss": 0.5926, "train_positive_log_prob": -79.4662, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.4032, "epoch": 4.720090293453724, "grad_norm": 12.895364761352539, "learning_rate": 8.072262367871675e-08, "lm_loss": 5.3504, "loss": 1.3457, "step": 2091, "text_contrastive_loss": 0.8149, "train_positive_log_prob": -79.6706, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3089, "epoch": 4.72234762979684, "grad_norm": 13.007477760314941, "learning_rate": 7.942934011013037e-08, "lm_loss": 5.4861, "loss": 1.2169, "step": 2092, "text_contrastive_loss": 0.7188, "train_positive_log_prob": -82.4851, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.2809, "epoch": 4.724604966139955, "grad_norm": 10.615347862243652, "learning_rate": 7.814641748588148e-08, "lm_loss": 5.2945, "loss": 1.1622, "step": 2093, "text_contrastive_loss": 0.7038, "train_positive_log_prob": -78.9861, "train_positive_token_accuracy": 0.0744, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.2786, "epoch": 4.72686230248307, "grad_norm": 13.077999114990234, "learning_rate": 7.687385850734086e-08, "lm_loss": 5.4131, "loss": 1.2135, "step": 2094, "text_contrastive_loss": 0.7872, "train_positive_log_prob": -80.4959, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.312, "epoch": 4.729119638826186, "grad_norm": 13.22095775604248, "learning_rate": 7.561166585405789e-08, "lm_loss": 5.3892, "loss": 1.2536, "step": 2095, "text_contrastive_loss": 0.8053, "train_positive_log_prob": -77.1326, "train_positive_token_accuracy": 0.068, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.2928, "epoch": 4.7313769751693, "grad_norm": 12.82379150390625, "learning_rate": 7.435984218375436e-08, "lm_loss": 5.3739, "loss": 1.1897, "step": 2096, "text_contrastive_loss": 0.719, "train_positive_log_prob": -78.2753, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3323, "epoch": 4.733634311512415, "grad_norm": 14.110048294067383, "learning_rate": 7.311839013231959e-08, "lm_loss": 5.4364, "loss": 1.2955, "step": 2097, "text_contrastive_loss": 0.8391, "train_positive_log_prob": -80.7377, "train_positive_token_accuracy": 0.0831, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.3956, "epoch": 4.735891647855531, "grad_norm": 14.666621208190918, "learning_rate": 7.188731231380253e-08, "lm_loss": 5.3727, "loss": 1.2443, "step": 2098, "text_contrastive_loss": 0.6229, "train_positive_log_prob": -79.4347, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3946, "epoch": 4.738148984198646, "grad_norm": 12.903202056884766, "learning_rate": 7.066661132040853e-08, "lm_loss": 5.5108, "loss": 1.326, "step": 2099, "text_contrastive_loss": 0.7608, "train_positive_log_prob": -81.371, "train_positive_token_accuracy": 0.0825, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.2598, "epoch": 4.74040632054176, "grad_norm": 11.989819526672363, "learning_rate": 6.945628972249208e-08, "lm_loss": 5.4815, "loss": 1.1443, "step": 2100, "text_contrastive_loss": 0.6727, "train_positive_log_prob": -81.1732, "train_positive_token_accuracy": 0.0836, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3638, "epoch": 4.742663656884876, "grad_norm": 12.796906471252441, "learning_rate": 6.825635006855458e-08, "lm_loss": 5.4179, "loss": 1.3187, "step": 2101, "text_contrastive_loss": 0.8262, "train_positive_log_prob": -79.9597, "train_positive_token_accuracy": 0.0815, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.3587, "epoch": 4.744920993227991, "grad_norm": 14.724745750427246, "learning_rate": 6.706679488523494e-08, "lm_loss": 5.451, "loss": 1.3276, "step": 2102, "text_contrastive_loss": 0.8476, "train_positive_log_prob": -80.2735, "train_positive_token_accuracy": 0.0846, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.2436, "epoch": 4.747178329571106, "grad_norm": 10.973912239074707, "learning_rate": 6.58876266773062e-08, "lm_loss": 5.3528, "loss": 1.1167, "step": 2103, "text_contrastive_loss": 0.6755, "train_positive_log_prob": -80.1458, "train_positive_token_accuracy": 0.0824, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.3384, "epoch": 4.749435665914222, "grad_norm": 12.817278861999512, "learning_rate": 6.471884792767169e-08, "lm_loss": 5.5092, "loss": 1.2561, "step": 2104, "text_contrastive_loss": 0.7336, "train_positive_log_prob": -81.477, "train_positive_token_accuracy": 0.0708, "train_positive_token_prob": 0.0298 }, { "contrastive_loss": 0.3605, "epoch": 4.751693002257336, "grad_norm": 12.139284133911133, "learning_rate": 6.356046109735614e-08, "lm_loss": 5.3043, "loss": 1.3008, "step": 2105, "text_contrastive_loss": 0.8197, "train_positive_log_prob": -78.9196, "train_positive_token_accuracy": 0.0869, "train_positive_token_prob": 0.0326 }, { "contrastive_loss": 0.3031, "epoch": 4.753950338600451, "grad_norm": 12.628264427185059, "learning_rate": 6.241246862550398e-08, "lm_loss": 5.3751, "loss": 1.2693, "step": 2106, "text_contrastive_loss": 0.8572, "train_positive_log_prob": -78.2553, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3356, "epoch": 4.756207674943567, "grad_norm": 13.342379570007324, "learning_rate": 6.127487292937328e-08, "lm_loss": 5.4735, "loss": 1.3297, "step": 2107, "text_contrastive_loss": 0.8935, "train_positive_log_prob": -79.8123, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.2696, "epoch": 4.758465011286682, "grad_norm": 10.582463264465332, "learning_rate": 6.014767640432905e-08, "lm_loss": 5.4762, "loss": 1.164, "step": 2108, "text_contrastive_loss": 0.6935, "train_positive_log_prob": -82.9131, "train_positive_token_accuracy": 0.0872, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.4206, "epoch": 4.760722347629796, "grad_norm": 12.734881401062012, "learning_rate": 5.903088142384106e-08, "lm_loss": 5.3532, "loss": 1.3542, "step": 2109, "text_contrastive_loss": 0.7965, "train_positive_log_prob": -81.2604, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3928, "epoch": 4.762979683972912, "grad_norm": 14.01072883605957, "learning_rate": 5.7924490339474335e-08, "lm_loss": 5.5281, "loss": 1.3354, "step": 2110, "text_contrastive_loss": 0.7796, "train_positive_log_prob": -81.6016, "train_positive_token_accuracy": 0.0791, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3374, "epoch": 4.765237020316027, "grad_norm": 12.548912048339844, "learning_rate": 5.682850548089036e-08, "lm_loss": 5.4102, "loss": 1.2604, "step": 2111, "text_contrastive_loss": 0.7639, "train_positive_log_prob": -79.5031, "train_positive_token_accuracy": 0.0752, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3398, "epoch": 4.767494356659142, "grad_norm": 13.495406150817871, "learning_rate": 5.574292915583646e-08, "lm_loss": 5.3557, "loss": 1.2511, "step": 2112, "text_contrastive_loss": 0.7515, "train_positive_log_prob": -80.2072, "train_positive_token_accuracy": 0.0845, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.3384, "epoch": 4.769751693002258, "grad_norm": 11.788864135742188, "learning_rate": 5.46677636501447e-08, "lm_loss": 5.4145, "loss": 1.227, "step": 2113, "text_contrastive_loss": 0.6943, "train_positive_log_prob": -79.0759, "train_positive_token_accuracy": 0.0826, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.317, "epoch": 4.772009029345372, "grad_norm": 11.977039337158203, "learning_rate": 5.3603011227725265e-08, "lm_loss": 5.3856, "loss": 1.2106, "step": 2114, "text_contrastive_loss": 0.71, "train_positive_log_prob": -80.6362, "train_positive_token_accuracy": 0.0759, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.4162, "epoch": 4.774266365688487, "grad_norm": 15.057369232177734, "learning_rate": 5.2548674130561974e-08, "lm_loss": 5.3857, "loss": 1.4213, "step": 2115, "text_contrastive_loss": 0.9331, "train_positive_log_prob": -80.8506, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3748, "epoch": 4.776523702031603, "grad_norm": 13.112998962402344, "learning_rate": 5.1504754578707294e-08, "lm_loss": 5.4176, "loss": 1.2763, "step": 2116, "text_contrastive_loss": 0.7194, "train_positive_log_prob": -81.0951, "train_positive_token_accuracy": 0.0814, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3005, "epoch": 4.778781038374718, "grad_norm": 12.008501052856445, "learning_rate": 5.047125477027959e-08, "lm_loss": 5.3666, "loss": 1.2459, "step": 2117, "text_contrastive_loss": 0.8174, "train_positive_log_prob": -80.3735, "train_positive_token_accuracy": 0.0816, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3083, "epoch": 4.781038374717833, "grad_norm": 13.533900260925293, "learning_rate": 4.944817688145642e-08, "lm_loss": 5.4471, "loss": 1.2277, "step": 2118, "text_contrastive_loss": 0.7494, "train_positive_log_prob": -80.7105, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.2717, "epoch": 4.783295711060948, "grad_norm": 13.106285095214844, "learning_rate": 4.843552306646904e-08, "lm_loss": 5.4763, "loss": 1.2196, "step": 2119, "text_contrastive_loss": 0.8005, "train_positive_log_prob": -78.9802, "train_positive_token_accuracy": 0.0764, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3135, "epoch": 4.785553047404063, "grad_norm": 12.778962135314941, "learning_rate": 4.743329545760122e-08, "lm_loss": 5.5184, "loss": 1.3231, "step": 2120, "text_contrastive_loss": 0.9156, "train_positive_log_prob": -80.8988, "train_positive_token_accuracy": 0.0677, "train_positive_token_prob": 0.029 }, { "contrastive_loss": 0.2959, "epoch": 4.787810383747178, "grad_norm": 11.397628784179688, "learning_rate": 4.644149616518212e-08, "lm_loss": 5.4751, "loss": 1.2509, "step": 2121, "text_contrastive_loss": 0.8151, "train_positive_log_prob": -83.5852, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.339, "epoch": 4.790067720090294, "grad_norm": 13.37972640991211, "learning_rate": 4.5460127277582863e-08, "lm_loss": 5.4814, "loss": 1.2819, "step": 2122, "text_contrastive_loss": 0.7893, "train_positive_log_prob": -82.4566, "train_positive_token_accuracy": 0.0766, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.2208, "epoch": 4.792325056433409, "grad_norm": 11.886345863342285, "learning_rate": 4.448919086121217e-08, "lm_loss": 5.4168, "loss": 1.1237, "step": 2123, "text_contrastive_loss": 0.7226, "train_positive_log_prob": -79.4093, "train_positive_token_accuracy": 0.0855, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.4379, "epoch": 4.794582392776523, "grad_norm": 14.661003112792969, "learning_rate": 4.352868896051077e-08, "lm_loss": 5.2979, "loss": 1.417, "step": 2124, "text_contrastive_loss": 0.8986, "train_positive_log_prob": -76.606, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3752, "epoch": 4.796839729119639, "grad_norm": 12.711482048034668, "learning_rate": 4.2578623597949174e-08, "lm_loss": 5.4417, "loss": 1.3594, "step": 2125, "text_contrastive_loss": 0.88, "train_positive_log_prob": -80.8598, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.2863, "epoch": 4.799097065462754, "grad_norm": 11.192163467407227, "learning_rate": 4.163899677402161e-08, "lm_loss": 5.468, "loss": 1.2364, "step": 2126, "text_contrastive_loss": 0.8066, "train_positive_log_prob": -81.3618, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3255, "epoch": 4.801354401805869, "grad_norm": 12.773097038269043, "learning_rate": 4.0709810467243204e-08, "lm_loss": 5.429, "loss": 1.2461, "step": 2127, "text_contrastive_loss": 0.7556, "train_positive_log_prob": -78.9775, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.343, "epoch": 4.803611738148984, "grad_norm": 14.528846740722656, "learning_rate": 3.979106663414389e-08, "lm_loss": 5.523, "loss": 1.282, "step": 2128, "text_contrastive_loss": 0.7735, "train_positive_log_prob": -80.3841, "train_positive_token_accuracy": 0.0773, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3483, "epoch": 4.805869074492099, "grad_norm": 12.990553855895996, "learning_rate": 3.8882767209266756e-08, "lm_loss": 5.3118, "loss": 1.299, "step": 2129, "text_contrastive_loss": 0.8389, "train_positive_log_prob": -78.0564, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3104, "epoch": 4.808126410835214, "grad_norm": 12.372488021850586, "learning_rate": 3.7984914105162474e-08, "lm_loss": 5.3832, "loss": 1.2436, "step": 2130, "text_contrastive_loss": 0.7897, "train_positive_log_prob": -78.5156, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3918, "epoch": 4.81038374717833, "grad_norm": 13.191797256469727, "learning_rate": 3.709750921238486e-08, "lm_loss": 5.3872, "loss": 1.3841, "step": 2131, "text_contrastive_loss": 0.9073, "train_positive_log_prob": -79.9259, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.2995, "epoch": 4.812641083521445, "grad_norm": 12.146777153015137, "learning_rate": 3.622055439948813e-08, "lm_loss": 5.4303, "loss": 1.2288, "step": 2132, "text_contrastive_loss": 0.7725, "train_positive_log_prob": -80.8785, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.2753, "epoch": 4.8148984198645595, "grad_norm": 12.120311737060547, "learning_rate": 3.5354051513022405e-08, "lm_loss": 5.5964, "loss": 1.2464, "step": 2133, "text_contrastive_loss": 0.8229, "train_positive_log_prob": -81.5879, "train_positive_token_accuracy": 0.0734, "train_positive_token_prob": 0.0297 }, { "contrastive_loss": 0.4458, "epoch": 4.817155756207675, "grad_norm": 14.222434043884277, "learning_rate": 3.449800237753043e-08, "lm_loss": 5.5048, "loss": 1.4594, "step": 2134, "text_contrastive_loss": 0.9263, "train_positive_log_prob": -81.2377, "train_positive_token_accuracy": 0.0752, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.306, "epoch": 4.81941309255079, "grad_norm": 12.048348426818848, "learning_rate": 3.365240879554144e-08, "lm_loss": 5.3986, "loss": 1.2466, "step": 2135, "text_contrastive_loss": 0.8014, "train_positive_log_prob": -80.4526, "train_positive_token_accuracy": 0.0862, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.3533, "epoch": 4.821670428893905, "grad_norm": 12.44783878326416, "learning_rate": 3.281727254757061e-08, "lm_loss": 5.3986, "loss": 1.2914, "step": 2136, "text_contrastive_loss": 0.7965, "train_positive_log_prob": -81.5695, "train_positive_token_accuracy": 0.0778, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.2698, "epoch": 4.82392776523702, "grad_norm": 10.991819381713867, "learning_rate": 3.1992595392112966e-08, "lm_loss": 5.4381, "loss": 1.1362, "step": 2137, "text_contrastive_loss": 0.6451, "train_positive_log_prob": -78.2802, "train_positive_token_accuracy": 0.0746, "train_positive_token_prob": 0.03 }, { "contrastive_loss": 0.2979, "epoch": 4.826185101580135, "grad_norm": 11.42900562286377, "learning_rate": 3.117837906564114e-08, "lm_loss": 5.4505, "loss": 1.2333, "step": 2138, "text_contrastive_loss": 0.7807, "train_positive_log_prob": -80.6938, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.201, "epoch": 4.8284424379232505, "grad_norm": 10.826726913452148, "learning_rate": 3.0374625282599826e-08, "lm_loss": 5.4096, "loss": 1.0695, "step": 2139, "text_contrastive_loss": 0.6552, "train_positive_log_prob": -80.4656, "train_positive_token_accuracy": 0.0861, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.4181, "epoch": 4.830699774266366, "grad_norm": 15.124938011169434, "learning_rate": 2.9581335735404672e-08, "lm_loss": 5.4617, "loss": 1.3917, "step": 2140, "text_contrastive_loss": 0.855, "train_positive_log_prob": -80.624, "train_positive_token_accuracy": 0.0836, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3782, "epoch": 4.832957110609481, "grad_norm": 14.185858726501465, "learning_rate": 2.8798512094436738e-08, "lm_loss": 5.3913, "loss": 1.3164, "step": 2141, "text_contrastive_loss": 0.7982, "train_positive_log_prob": -78.5765, "train_positive_token_accuracy": 0.0866, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3813, "epoch": 4.835214446952596, "grad_norm": 12.532388687133789, "learning_rate": 2.802615600804026e-08, "lm_loss": 5.3925, "loss": 1.3371, "step": 2142, "text_contrastive_loss": 0.8331, "train_positive_log_prob": -80.0637, "train_positive_token_accuracy": 0.0839, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.3296, "epoch": 4.837471783295711, "grad_norm": 12.764655113220215, "learning_rate": 2.7264269102517117e-08, "lm_loss": 5.5455, "loss": 1.2569, "step": 2143, "text_contrastive_loss": 0.7455, "train_positive_log_prob": -80.5822, "train_positive_token_accuracy": 0.0761, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3623, "epoch": 4.839729119638826, "grad_norm": 12.576608657836914, "learning_rate": 2.6512852982127357e-08, "lm_loss": 5.329, "loss": 1.3258, "step": 2144, "text_contrastive_loss": 0.8611, "train_positive_log_prob": -79.4155, "train_positive_token_accuracy": 0.0847, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.3862, "epoch": 4.841986455981941, "grad_norm": 13.27994441986084, "learning_rate": 2.577190922908035e-08, "lm_loss": 5.4388, "loss": 1.4206, "step": 2145, "text_contrastive_loss": 0.981, "train_positive_log_prob": -81.9209, "train_positive_token_accuracy": 0.082, "train_positive_token_prob": 0.0321 }, { "contrastive_loss": 0.3339, "epoch": 4.844243792325057, "grad_norm": 11.295732498168945, "learning_rate": 2.5041439403537537e-08, "lm_loss": 5.3908, "loss": 1.2231, "step": 2146, "text_contrastive_loss": 0.7002, "train_positive_log_prob": -80.1039, "train_positive_token_accuracy": 0.0812, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.325, "epoch": 4.846501128668171, "grad_norm": 13.89465618133545, "learning_rate": 2.4321445043603565e-08, "lm_loss": 5.3316, "loss": 1.2789, "step": 2147, "text_contrastive_loss": 0.8415, "train_positive_log_prob": -76.8932, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3286, "epoch": 4.8487584650112865, "grad_norm": 11.77916145324707, "learning_rate": 2.3611927665326275e-08, "lm_loss": 5.3282, "loss": 1.2164, "step": 2148, "text_contrastive_loss": 0.7099, "train_positive_log_prob": -79.9631, "train_positive_token_accuracy": 0.0807, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3907, "epoch": 4.851015801354402, "grad_norm": 13.596877098083496, "learning_rate": 2.291288876269393e-08, "lm_loss": 5.4148, "loss": 1.3509, "step": 2149, "text_contrastive_loss": 0.8375, "train_positive_log_prob": -79.6366, "train_positive_token_accuracy": 0.0794, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3773, "epoch": 4.853273137697517, "grad_norm": 12.371967315673828, "learning_rate": 2.222432980762912e-08, "lm_loss": 5.4539, "loss": 1.3755, "step": 2150, "text_contrastive_loss": 0.9055, "train_positive_log_prob": -80.511, "train_positive_token_accuracy": 0.0725, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.361, "epoch": 4.855530474040632, "grad_norm": 14.236940383911133, "learning_rate": 2.1546252249988186e-08, "lm_loss": 5.4339, "loss": 1.2696, "step": 2151, "text_contrastive_loss": 0.7305, "train_positive_log_prob": -80.81, "train_positive_token_accuracy": 0.0841, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.272, "epoch": 4.857787810383747, "grad_norm": 11.388404846191406, "learning_rate": 2.087865751755791e-08, "lm_loss": 5.504, "loss": 1.1417, "step": 2152, "text_contrastive_loss": 0.6385, "train_positive_log_prob": -81.1409, "train_positive_token_accuracy": 0.0692, "train_positive_token_prob": 0.0301 }, { "contrastive_loss": 0.3098, "epoch": 4.860045146726862, "grad_norm": 12.139877319335938, "learning_rate": 2.0221547016051614e-08, "lm_loss": 5.3685, "loss": 1.1453, "step": 2153, "text_contrastive_loss": 0.5971, "train_positive_log_prob": -79.4589, "train_positive_token_accuracy": 0.08, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3832, "epoch": 4.8623024830699775, "grad_norm": 13.00784683227539, "learning_rate": 1.957492212910639e-08, "lm_loss": 5.4133, "loss": 1.3761, "step": 2154, "text_contrastive_loss": 0.9032, "train_positive_log_prob": -80.165, "train_positive_token_accuracy": 0.0757, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3381, "epoch": 4.864559819413093, "grad_norm": 11.445502281188965, "learning_rate": 1.8938784218281435e-08, "lm_loss": 5.3975, "loss": 1.2394, "step": 2155, "text_contrastive_loss": 0.7231, "train_positive_log_prob": -80.9187, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3416, "epoch": 4.866817155756207, "grad_norm": 12.394635200500488, "learning_rate": 1.8313134623051955e-08, "lm_loss": 5.476, "loss": 1.2625, "step": 2156, "text_contrastive_loss": 0.7466, "train_positive_log_prob": -80.7737, "train_positive_token_accuracy": 0.0802, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3451, "epoch": 4.8690744920993225, "grad_norm": 12.728734970092773, "learning_rate": 1.7697974660811357e-08, "lm_loss": 5.3634, "loss": 1.2057, "step": 2157, "text_contrastive_loss": 0.6484, "train_positive_log_prob": -80.9139, "train_positive_token_accuracy": 0.0798, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3577, "epoch": 4.871331828442438, "grad_norm": 13.499052047729492, "learning_rate": 1.7093305626864065e-08, "lm_loss": 5.3418, "loss": 1.2358, "step": 2158, "text_contrastive_loss": 0.6877, "train_positive_log_prob": -78.5233, "train_positive_token_accuracy": 0.0832, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.3278, "epoch": 4.873589164785553, "grad_norm": 12.652538299560547, "learning_rate": 1.6499128794423836e-08, "lm_loss": 5.3845, "loss": 1.2968, "step": 2159, "text_contrastive_loss": 0.861, "train_positive_log_prob": -80.3219, "train_positive_token_accuracy": 0.077, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3964, "epoch": 4.8758465011286685, "grad_norm": 13.595474243164062, "learning_rate": 1.5915445414613208e-08, "lm_loss": 5.3284, "loss": 1.331, "step": 2160, "text_contrastive_loss": 0.8034, "train_positive_log_prob": -78.7615, "train_positive_token_accuracy": 0.0854, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3774, "epoch": 4.878103837471784, "grad_norm": 13.105132102966309, "learning_rate": 1.5342256716459058e-08, "lm_loss": 5.3624, "loss": 1.3081, "step": 2161, "text_contrastive_loss": 0.7889, "train_positive_log_prob": -80.5641, "train_positive_token_accuracy": 0.0793, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.2354, "epoch": 4.880361173814898, "grad_norm": 12.309341430664062, "learning_rate": 1.4779563906888172e-08, "lm_loss": 5.4188, "loss": 1.192, "step": 2162, "text_contrastive_loss": 0.8294, "train_positive_log_prob": -80.8815, "train_positive_token_accuracy": 0.0717, "train_positive_token_prob": 0.0294 }, { "contrastive_loss": 0.2926, "epoch": 4.8826185101580135, "grad_norm": 11.640186309814453, "learning_rate": 1.4227368170728894e-08, "lm_loss": 5.3122, "loss": 1.1701, "step": 2163, "text_contrastive_loss": 0.6926, "train_positive_log_prob": -76.9255, "train_positive_token_accuracy": 0.0782, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.4086, "epoch": 4.884875846501129, "grad_norm": 15.052828788757324, "learning_rate": 1.3685670670706697e-08, "lm_loss": 5.4157, "loss": 1.3768, "step": 2164, "text_contrastive_loss": 0.8532, "train_positive_log_prob": -79.8648, "train_positive_token_accuracy": 0.0723, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.3648, "epoch": 4.887133182844244, "grad_norm": 11.98923110961914, "learning_rate": 1.3154472547440289e-08, "lm_loss": 5.405, "loss": 1.3541, "step": 2165, "text_contrastive_loss": 0.8975, "train_positive_log_prob": -79.3221, "train_positive_token_accuracy": 0.0868, "train_positive_token_prob": 0.0331 }, { "contrastive_loss": 0.3488, "epoch": 4.889390519187359, "grad_norm": 12.5957612991333, "learning_rate": 1.2633774919441622e-08, "lm_loss": 5.3882, "loss": 1.2936, "step": 2166, "text_contrastive_loss": 0.812, "train_positive_log_prob": -79.3674, "train_positive_token_accuracy": 0.0711, "train_positive_token_prob": 0.0303 }, { "contrastive_loss": 0.3448, "epoch": 4.891647855530474, "grad_norm": 11.962646484375, "learning_rate": 1.2123578883110887e-08, "lm_loss": 5.4075, "loss": 1.2624, "step": 2167, "text_contrastive_loss": 0.7537, "train_positive_log_prob": -80.3766, "train_positive_token_accuracy": 0.0765, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3378, "epoch": 4.893905191873589, "grad_norm": 11.774933815002441, "learning_rate": 1.1623885512737076e-08, "lm_loss": 5.3664, "loss": 1.2064, "step": 2168, "text_contrastive_loss": 0.6639, "train_positive_log_prob": -80.263, "train_positive_token_accuracy": 0.0846, "train_positive_token_prob": 0.0322 }, { "contrastive_loss": 0.2715, "epoch": 4.8961625282167045, "grad_norm": 11.338656425476074, "learning_rate": 1.1134695860493539e-08, "lm_loss": 5.2685, "loss": 1.1253, "step": 2169, "text_contrastive_loss": 0.6538, "train_positive_log_prob": -77.6493, "train_positive_token_accuracy": 0.0819, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.284, "epoch": 4.89841986455982, "grad_norm": 11.437028884887695, "learning_rate": 1.0656010956437979e-08, "lm_loss": 5.4169, "loss": 1.2548, "step": 2170, "text_contrastive_loss": 0.8582, "train_positive_log_prob": -80.4479, "train_positive_token_accuracy": 0.0786, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.3261, "epoch": 4.900677200902934, "grad_norm": 11.590265274047852, "learning_rate": 1.018783180850691e-08, "lm_loss": 5.4278, "loss": 1.1799, "step": 2171, "text_contrastive_loss": 0.6221, "train_positive_log_prob": -79.2501, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3522, "epoch": 4.9029345372460496, "grad_norm": 13.533720970153809, "learning_rate": 9.73015940251676e-09, "lm_loss": 5.388, "loss": 1.2409, "step": 2172, "text_contrastive_loss": 0.6997, "train_positive_log_prob": -79.0731, "train_positive_token_accuracy": 0.0745, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3091, "epoch": 4.905191873589165, "grad_norm": 12.223340034484863, "learning_rate": 9.282994702159986e-09, "lm_loss": 5.395, "loss": 1.2321, "step": 2173, "text_contrastive_loss": 0.767, "train_positive_log_prob": -79.5565, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3397, "epoch": 4.90744920993228, "grad_norm": 11.706966400146484, "learning_rate": 8.846338649005082e-09, "lm_loss": 5.4793, "loss": 1.2444, "step": 2174, "text_contrastive_loss": 0.7136, "train_positive_log_prob": -80.9685, "train_positive_token_accuracy": 0.079, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.313, "epoch": 4.909706546275395, "grad_norm": 12.395425796508789, "learning_rate": 8.42019216249046e-09, "lm_loss": 5.3893, "loss": 1.1799, "step": 2175, "text_contrastive_loss": 0.656, "train_positive_log_prob": -78.2505, "train_positive_token_accuracy": 0.0729, "train_positive_token_prob": 0.0312 }, { "contrastive_loss": 0.292, "epoch": 4.91196388261851, "grad_norm": 12.1083345413208, "learning_rate": 8.004556139927788e-09, "lm_loss": 5.3979, "loss": 1.1718, "step": 2176, "text_contrastive_loss": 0.6799, "train_positive_log_prob": -78.2929, "train_positive_token_accuracy": 0.0743, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3209, "epoch": 4.914221218961625, "grad_norm": 12.850380897521973, "learning_rate": 7.599431456495888e-09, "lm_loss": 5.3555, "loss": 1.2501, "step": 2177, "text_contrastive_loss": 0.7874, "train_positive_log_prob": -78.1134, "train_positive_token_accuracy": 0.0806, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.2447, "epoch": 4.9164785553047405, "grad_norm": 10.093464851379395, "learning_rate": 7.2048189652412784e-09, "lm_loss": 5.4659, "loss": 1.159, "step": 2178, "text_contrastive_loss": 0.7354, "train_positive_log_prob": -81.4749, "train_positive_token_accuracy": 0.0852, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.307, "epoch": 4.918735891647856, "grad_norm": 12.094605445861816, "learning_rate": 6.820719497074857e-09, "lm_loss": 5.4588, "loss": 1.2629, "step": 2179, "text_contrastive_loss": 0.8201, "train_positive_log_prob": -80.2418, "train_positive_token_accuracy": 0.0779, "train_positive_token_prob": 0.0313 }, { "contrastive_loss": 0.3124, "epoch": 4.92099322799097, "grad_norm": 11.375383377075195, "learning_rate": 6.447133860771893e-09, "lm_loss": 5.3883, "loss": 1.2174, "step": 2180, "text_contrastive_loss": 0.7324, "train_positive_log_prob": -79.6664, "train_positive_token_accuracy": 0.0803, "train_positive_token_prob": 0.032 }, { "contrastive_loss": 0.3418, "epoch": 4.923250564334086, "grad_norm": 12.987213134765625, "learning_rate": 6.084062842968696e-09, "lm_loss": 5.3302, "loss": 1.278, "step": 2181, "text_contrastive_loss": 0.8064, "train_positive_log_prob": -79.5401, "train_positive_token_accuracy": 0.0812, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.316, "epoch": 4.925507900677201, "grad_norm": 13.518106460571289, "learning_rate": 5.731507208160958e-09, "lm_loss": 5.55, "loss": 1.216, "step": 2182, "text_contrastive_loss": 0.6899, "train_positive_log_prob": -81.4133, "train_positive_token_accuracy": 0.0775, "train_positive_token_prob": 0.0307 }, { "contrastive_loss": 0.3942, "epoch": 4.927765237020316, "grad_norm": 14.615264892578125, "learning_rate": 5.389467698704298e-09, "lm_loss": 5.3942, "loss": 1.3891, "step": 2183, "text_contrastive_loss": 0.911, "train_positive_log_prob": -78.4682, "train_positive_token_accuracy": 0.083, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.3662, "epoch": 4.9300225733634315, "grad_norm": 11.848650932312012, "learning_rate": 5.057945034810385e-09, "lm_loss": 5.3746, "loss": 1.2872, "step": 2184, "text_contrastive_loss": 0.7672, "train_positive_log_prob": -79.4624, "train_positive_token_accuracy": 0.0808, "train_positive_token_prob": 0.0325 }, { "contrastive_loss": 0.2615, "epoch": 4.932279909706546, "grad_norm": 11.342528343200684, "learning_rate": 4.736939914545824e-09, "lm_loss": 5.363, "loss": 1.1611, "step": 2185, "text_contrastive_loss": 0.7268, "train_positive_log_prob": -76.8871, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3441, "epoch": 4.934537246049661, "grad_norm": 14.007142066955566, "learning_rate": 4.4264530138310445e-09, "lm_loss": 5.4225, "loss": 1.2712, "step": 2186, "text_contrastive_loss": 0.7696, "train_positive_log_prob": -80.5107, "train_positive_token_accuracy": 0.0813, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3191, "epoch": 4.936794582392777, "grad_norm": 11.95272445678711, "learning_rate": 4.1264849864403044e-09, "lm_loss": 5.4757, "loss": 1.2197, "step": 2187, "text_contrastive_loss": 0.7061, "train_positive_log_prob": -79.8722, "train_positive_token_accuracy": 0.0755, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.3955, "epoch": 4.939051918735892, "grad_norm": 13.96945858001709, "learning_rate": 3.837036463997246e-09, "lm_loss": 5.4896, "loss": 1.3315, "step": 2188, "text_contrastive_loss": 0.774, "train_positive_log_prob": -83.0937, "train_positive_token_accuracy": 0.0823, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3248, "epoch": 4.941309255079007, "grad_norm": 12.298246383666992, "learning_rate": 3.558108055976006e-09, "lm_loss": 5.3952, "loss": 1.2783, "step": 2189, "text_contrastive_loss": 0.8278, "train_positive_log_prob": -78.2913, "train_positive_token_accuracy": 0.0781, "train_positive_token_prob": 0.0314 }, { "contrastive_loss": 0.2874, "epoch": 4.943566591422122, "grad_norm": 11.63664436340332, "learning_rate": 3.289700349698999e-09, "lm_loss": 5.4742, "loss": 1.1871, "step": 2190, "text_contrastive_loss": 0.7045, "train_positive_log_prob": -84.9146, "train_positive_token_accuracy": 0.0846, "train_positive_token_prob": 0.0329 }, { "contrastive_loss": 0.2688, "epoch": 4.945823927765237, "grad_norm": 12.425889015197754, "learning_rate": 3.0318139103363564e-09, "lm_loss": 5.352, "loss": 1.1751, "step": 2191, "text_contrastive_loss": 0.7422, "train_positive_log_prob": -77.8727, "train_positive_token_accuracy": 0.0821, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.3718, "epoch": 4.948081264108352, "grad_norm": 14.650064468383789, "learning_rate": 2.7844492809031567e-09, "lm_loss": 5.4311, "loss": 1.2509, "step": 2192, "text_contrastive_loss": 0.672, "train_positive_log_prob": -81.0791, "train_positive_token_accuracy": 0.0788, "train_positive_token_prob": 0.0319 }, { "contrastive_loss": 0.3667, "epoch": 4.950338600451468, "grad_norm": 16.653989791870117, "learning_rate": 2.547606982260531e-09, "lm_loss": 5.3663, "loss": 1.2801, "step": 2193, "text_contrastive_loss": 0.7535, "train_positive_log_prob": -82.4041, "train_positive_token_accuracy": 0.0768, "train_positive_token_prob": 0.0311 }, { "contrastive_loss": 0.3982, "epoch": 4.952595936794582, "grad_norm": 14.31432819366455, "learning_rate": 2.3212875131117805e-09, "lm_loss": 5.3636, "loss": 1.3578, "step": 2194, "text_contrastive_loss": 0.8466, "train_positive_log_prob": -77.1246, "train_positive_token_accuracy": 0.0753, "train_positive_token_prob": 0.031 }, { "contrastive_loss": 0.3434, "epoch": 4.954853273137697, "grad_norm": 11.791407585144043, "learning_rate": 2.1054913500051512e-09, "lm_loss": 5.3388, "loss": 1.2172, "step": 2195, "text_contrastive_loss": 0.6799, "train_positive_log_prob": -80.1854, "train_positive_token_accuracy": 0.0861, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.4087, "epoch": 4.957110609480813, "grad_norm": 14.255420684814453, "learning_rate": 1.9002189473288356e-09, "lm_loss": 5.4553, "loss": 1.3133, "step": 2196, "text_contrastive_loss": 0.7181, "train_positive_log_prob": -81.9416, "train_positive_token_accuracy": 0.0866, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.3213, "epoch": 4.959367945823928, "grad_norm": 10.890216827392578, "learning_rate": 1.7054707373126423e-09, "lm_loss": 5.441, "loss": 1.2117, "step": 2197, "text_contrastive_loss": 0.6927, "train_positive_log_prob": -79.7222, "train_positive_token_accuracy": 0.0744, "train_positive_token_prob": 0.0302 }, { "contrastive_loss": 0.2907, "epoch": 4.961625282167043, "grad_norm": 12.555000305175781, "learning_rate": 1.5212471300252163e-09, "lm_loss": 5.3561, "loss": 1.197, "step": 2198, "text_contrastive_loss": 0.7414, "train_positive_log_prob": -77.8124, "train_positive_token_accuracy": 0.0801, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.3132, "epoch": 4.963882618510158, "grad_norm": 11.874371528625488, "learning_rate": 1.347548513375707e-09, "lm_loss": 5.4174, "loss": 1.2188, "step": 2199, "text_contrastive_loss": 0.7279, "train_positive_log_prob": -78.0867, "train_positive_token_accuracy": 0.0748, "train_positive_token_prob": 0.0304 }, { "contrastive_loss": 0.3551, "epoch": 4.966139954853273, "grad_norm": 12.687294006347656, "learning_rate": 1.1843752531104368e-09, "lm_loss": 5.3605, "loss": 1.2945, "step": 2200, "text_contrastive_loss": 0.8069, "train_positive_log_prob": -81.1182, "train_positive_token_accuracy": 0.0787, "train_positive_token_prob": 0.0317 }, { "contrastive_loss": 0.425, "epoch": 4.968397291196388, "grad_norm": 15.35137939453125, "learning_rate": 1.0317276928134557e-09, "lm_loss": 5.3153, "loss": 1.3585, "step": 2201, "text_contrastive_loss": 0.804, "train_positive_log_prob": -78.1956, "train_positive_token_accuracy": 0.086, "train_positive_token_prob": 0.0323 }, { "contrastive_loss": 0.4068, "epoch": 4.970654627539504, "grad_norm": 13.613174438476562, "learning_rate": 8.896061539048762e-10, "lm_loss": 5.3597, "loss": 1.3727, "step": 2202, "text_contrastive_loss": 0.8598, "train_positive_log_prob": -78.7634, "train_positive_token_accuracy": 0.0811, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.2783, "epoch": 4.972911963882618, "grad_norm": 12.867480278015137, "learning_rate": 7.580109356419841e-10, "lm_loss": 5.4139, "loss": 1.1883, "step": 2203, "text_contrastive_loss": 0.7372, "train_positive_log_prob": -79.3215, "train_positive_token_accuracy": 0.0845, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.253, "epoch": 4.975169300225733, "grad_norm": 11.267635345458984, "learning_rate": 6.369423151164622e-10, "lm_loss": 5.4207, "loss": 1.186, "step": 2204, "text_contrastive_loss": 0.782, "train_positive_log_prob": -79.7659, "train_positive_token_accuracy": 0.0812, "train_positive_token_prob": 0.0305 }, { "contrastive_loss": 0.3076, "epoch": 4.977426636568849, "grad_norm": 12.308904647827148, "learning_rate": 5.264005472549461e-10, "lm_loss": 5.409, "loss": 1.2107, "step": 2205, "text_contrastive_loss": 0.7244, "train_positive_log_prob": -80.7564, "train_positive_token_accuracy": 0.078, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.238, "epoch": 4.979683972911964, "grad_norm": 12.098566055297852, "learning_rate": 4.2638586481846823e-10, "lm_loss": 5.44, "loss": 1.0985, "step": 2206, "text_contrastive_loss": 0.6332, "train_positive_log_prob": -79.9642, "train_positive_token_accuracy": 0.0776, "train_positive_token_prob": 0.0308 }, { "contrastive_loss": 0.3076, "epoch": 4.981941309255079, "grad_norm": 12.042253494262695, "learning_rate": 3.368984784024587e-10, "lm_loss": 5.3821, "loss": 1.208, "step": 2207, "text_contrastive_loss": 0.7244, "train_positive_log_prob": -79.5945, "train_positive_token_accuracy": 0.0729, "train_positive_token_prob": 0.0306 }, { "contrastive_loss": 0.3644, "epoch": 4.984198645598195, "grad_norm": 13.446281433105469, "learning_rate": 2.5793857643396924e-10, "lm_loss": 5.3932, "loss": 1.3001, "step": 2208, "text_contrastive_loss": 0.7927, "train_positive_log_prob": -79.8464, "train_positive_token_accuracy": 0.0718, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.2575, "epoch": 4.986455981941309, "grad_norm": 11.859148979187012, "learning_rate": 1.89506325175004e-10, "lm_loss": 5.3855, "loss": 1.1785, "step": 2209, "text_contrastive_loss": 0.7648, "train_positive_log_prob": -77.9696, "train_positive_token_accuracy": 0.0783, "train_positive_token_prob": 0.0324 }, { "contrastive_loss": 0.2738, "epoch": 4.988713318284424, "grad_norm": 11.753767967224121, "learning_rate": 1.316018687191889e-10, "lm_loss": 5.4317, "loss": 1.2152, "step": 2210, "text_contrastive_loss": 0.7965, "train_positive_log_prob": -81.3885, "train_positive_token_accuracy": 0.0795, "train_positive_token_prob": 0.0309 }, { "contrastive_loss": 0.3206, "epoch": 4.99097065462754, "grad_norm": 11.845379829406738, "learning_rate": 8.422532899121649e-11, "lm_loss": 5.4001, "loss": 1.2306, "step": 2211, "text_contrastive_loss": 0.74, "train_positive_log_prob": -78.0693, "train_positive_token_accuracy": 0.0792, "train_positive_token_prob": 0.0315 }, { "contrastive_loss": 0.327, "epoch": 4.993227990970655, "grad_norm": 13.591782569885254, "learning_rate": 4.737680575017667e-11, "lm_loss": 5.3766, "loss": 1.2574, "step": 2212, "text_contrastive_loss": 0.7853, "train_positive_log_prob": -78.3168, "train_positive_token_accuracy": 0.0885, "train_positive_token_prob": 0.0318 }, { "contrastive_loss": 0.3259, "epoch": 4.995485327313769, "grad_norm": 12.919146537780762, "learning_rate": 2.1056376585115723e-11, "lm_loss": 5.4108, "loss": 1.2339, "step": 2213, "text_contrastive_loss": 0.7337, "train_positive_log_prob": -79.6642, "train_positive_token_accuracy": 0.086, "train_positive_token_prob": 0.0327 }, { "contrastive_loss": 0.3853, "epoch": 4.997742663656885, "grad_norm": 14.124292373657227, "learning_rate": 5.2640969172568225e-12, "lm_loss": 5.4305, "loss": 1.389, "step": 2214, "text_contrastive_loss": 0.9214, "train_positive_log_prob": -77.827, "train_positive_token_accuracy": 0.0796, "train_positive_token_prob": 0.0316 }, { "contrastive_loss": 0.2102, "epoch": 5.0, "grad_norm": 13.013922691345215, "learning_rate": 0.0, "lm_loss": 5.4698, "loss": 1.0002, "step": 2215, "text_contrastive_loss": 0.486, "train_positive_log_prob": -82.2887, "train_positive_token_accuracy": 0.0793, "train_positive_token_prob": 0.0311 }, { "epoch": 5.0, "step": 2215, "total_flos": 1.487319842422784e+17, "train_loss": 1.4400290369718391, "train_runtime": 7589.3376, "train_samples_per_second": 74.636, "train_steps_per_second": 0.292 } ], "logging_steps": 1, "max_steps": 2215, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.487319842422784e+17, "train_batch_size": 256, "trial_name": null, "trial_params": null }