|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997327870312639, |
|
"eval_steps": 500, |
|
"global_step": 1403, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007125679166295537, |
|
"grad_norm": 35.624961853027344, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -3.107421875, |
|
"logits/rejected": -3.0234375, |
|
"logps/chosen": -106.375, |
|
"logps/rejected": -64.125, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0035628395831477687, |
|
"grad_norm": 26.24993324279785, |
|
"learning_rate": 2.8368794326241133e-08, |
|
"logits/chosen": -3.1044921875, |
|
"logits/rejected": -3.08642578125, |
|
"logps/chosen": -95.46875, |
|
"logps/rejected": -64.515625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": -0.0012693405151367188, |
|
"rewards/margins": -0.0015630722045898438, |
|
"rewards/rejected": 0.0002932548522949219, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0071256791662955375, |
|
"grad_norm": 24.48309326171875, |
|
"learning_rate": 6.382978723404254e-08, |
|
"logits/chosen": -3.10546875, |
|
"logits/rejected": -3.072265625, |
|
"logps/chosen": -88.23750305175781, |
|
"logps/rejected": -55.287498474121094, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": 0.00015716553025413305, |
|
"rewards/margins": 0.002190017607063055, |
|
"rewards/rejected": -0.0020355223678052425, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010688518749443307, |
|
"grad_norm": 54.63581848144531, |
|
"learning_rate": 9.929078014184397e-08, |
|
"logits/chosen": -3.080078125, |
|
"logits/rejected": -3.0648436546325684, |
|
"logps/chosen": -98.9437484741211, |
|
"logps/rejected": -59.84375, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0001586914004292339, |
|
"rewards/margins": 0.00680465716868639, |
|
"rewards/rejected": -0.006960677914321423, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.014251358332591075, |
|
"grad_norm": 50.01255416870117, |
|
"learning_rate": 1.3475177304964538e-07, |
|
"logits/chosen": -3.099609375, |
|
"logits/rejected": -3.0687499046325684, |
|
"logps/chosen": -101.0562515258789, |
|
"logps/rejected": -56.58124923706055, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.006333542056381702, |
|
"rewards/margins": 0.01415863074362278, |
|
"rewards/rejected": -0.00781860388815403, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.017814197915738843, |
|
"grad_norm": 142.6243896484375, |
|
"learning_rate": 1.702127659574468e-07, |
|
"logits/chosen": -3.076171875, |
|
"logits/rejected": -3.0640625953674316, |
|
"logps/chosen": -111.58125305175781, |
|
"logps/rejected": -76.98750305175781, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.01683807373046875, |
|
"rewards/margins": 0.02606506273150444, |
|
"rewards/rejected": -0.009222030639648438, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.021377037498886614, |
|
"grad_norm": 16.588232040405273, |
|
"learning_rate": 2.0567375886524822e-07, |
|
"logits/chosen": -3.083203077316284, |
|
"logits/rejected": -3.0679688453674316, |
|
"logps/chosen": -106.0250015258789, |
|
"logps/rejected": -67.875, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.05385131761431694, |
|
"rewards/margins": 0.06231040880084038, |
|
"rewards/rejected": -0.008445357903838158, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.024939877082034382, |
|
"grad_norm": 23.556861877441406, |
|
"learning_rate": 2.411347517730496e-07, |
|
"logits/chosen": -3.0796875953674316, |
|
"logits/rejected": -3.08203125, |
|
"logps/chosen": -88.76249694824219, |
|
"logps/rejected": -53.837501525878906, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.09301147609949112, |
|
"rewards/margins": 0.10019302368164062, |
|
"rewards/rejected": -0.00728950509801507, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02850271666518215, |
|
"grad_norm": 29.775815963745117, |
|
"learning_rate": 2.7659574468085106e-07, |
|
"logits/chosen": -3.0914063453674316, |
|
"logits/rejected": -3.057421922683716, |
|
"logps/chosen": -102.15625, |
|
"logps/rejected": -71.5250015258789, |
|
"loss": 0.6043, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.202159121632576, |
|
"rewards/margins": 0.22055740654468536, |
|
"rewards/rejected": -0.01839141920208931, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03206555624832992, |
|
"grad_norm": 25.135190963745117, |
|
"learning_rate": 3.1205673758865245e-07, |
|
"logits/chosen": -3.071093797683716, |
|
"logits/rejected": -3.0601563453674316, |
|
"logps/chosen": -87.21875, |
|
"logps/rejected": -47.756248474121094, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.25634080171585083, |
|
"rewards/margins": 0.26132506132125854, |
|
"rewards/rejected": -0.005035400390625, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.035628395831477686, |
|
"grad_norm": 13.338802337646484, |
|
"learning_rate": 3.475177304964539e-07, |
|
"logits/chosen": -3.0531249046325684, |
|
"logits/rejected": -3.065234422683716, |
|
"logps/chosen": -107.1875, |
|
"logps/rejected": -79.0374984741211, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.207794189453125, |
|
"rewards/margins": 0.29575881361961365, |
|
"rewards/rejected": -0.08815918117761612, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03919123541462546, |
|
"grad_norm": 12.088021278381348, |
|
"learning_rate": 3.829787234042553e-07, |
|
"logits/chosen": -3.0746092796325684, |
|
"logits/rejected": -3.073437452316284, |
|
"logps/chosen": -91.6187515258789, |
|
"logps/rejected": -63.34375, |
|
"loss": 0.5622, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.36282652616500854, |
|
"rewards/margins": 0.417471319437027, |
|
"rewards/rejected": -0.054642487317323685, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04275407499777323, |
|
"grad_norm": 13.686232566833496, |
|
"learning_rate": 4.184397163120567e-07, |
|
"logits/chosen": -3.083984375, |
|
"logits/rejected": -3.079296827316284, |
|
"logps/chosen": -101.8062515258789, |
|
"logps/rejected": -72.42500305175781, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.42637938261032104, |
|
"rewards/margins": 0.6403244137763977, |
|
"rewards/rejected": -0.2142478972673416, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04631691458092099, |
|
"grad_norm": 10.232641220092773, |
|
"learning_rate": 4.5390070921985813e-07, |
|
"logits/chosen": -3.065624952316284, |
|
"logits/rejected": -3.0667967796325684, |
|
"logps/chosen": -86.7750015258789, |
|
"logps/rejected": -56.353126525878906, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6796913146972656, |
|
"rewards/margins": 0.741424560546875, |
|
"rewards/rejected": -0.061974335461854935, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.049879754164068764, |
|
"grad_norm": 17.83266830444336, |
|
"learning_rate": 4.893617021276595e-07, |
|
"logits/chosen": -3.049999952316284, |
|
"logits/rejected": -3.05078125, |
|
"logps/chosen": -101.2874984741211, |
|
"logps/rejected": -77.9749984741211, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.3456260561943054, |
|
"rewards/margins": 0.6742362976074219, |
|
"rewards/rejected": -0.32913780212402344, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05344259374721653, |
|
"grad_norm": 23.34439468383789, |
|
"learning_rate": 5.248226950354609e-07, |
|
"logits/chosen": -3.060546875, |
|
"logits/rejected": -3.0562500953674316, |
|
"logps/chosen": -86.29374694824219, |
|
"logps/rejected": -62.92499923706055, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.6199722290039062, |
|
"rewards/margins": 0.7961105108261108, |
|
"rewards/rejected": -0.176055908203125, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0570054333303643, |
|
"grad_norm": 13.863036155700684, |
|
"learning_rate": 5.602836879432624e-07, |
|
"logits/chosen": -3.065624952316284, |
|
"logits/rejected": -3.0374999046325684, |
|
"logps/chosen": -101.5625, |
|
"logps/rejected": -79.7750015258789, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.655413806438446, |
|
"rewards/margins": 0.987408459186554, |
|
"rewards/rejected": -0.33138352632522583, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06056827291351207, |
|
"grad_norm": 14.414058685302734, |
|
"learning_rate": 5.957446808510638e-07, |
|
"logits/chosen": -3.0542969703674316, |
|
"logits/rejected": -3.07421875, |
|
"logps/chosen": -93.625, |
|
"logps/rejected": -69.44999694824219, |
|
"loss": 0.5144, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.5085555911064148, |
|
"rewards/margins": 0.8773147463798523, |
|
"rewards/rejected": -0.3694648742675781, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06413111249665984, |
|
"grad_norm": 12.498698234558105, |
|
"learning_rate": 6.312056737588652e-07, |
|
"logits/chosen": -3.056640625, |
|
"logits/rejected": -3.055859327316284, |
|
"logps/chosen": -115.61250305175781, |
|
"logps/rejected": -93.48750305175781, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.4992126524448395, |
|
"rewards/margins": 0.902966320514679, |
|
"rewards/rejected": -0.4039718508720398, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0676939520798076, |
|
"grad_norm": 11.825833320617676, |
|
"learning_rate": 6.666666666666666e-07, |
|
"logits/chosen": -3.048046827316284, |
|
"logits/rejected": -3.072265625, |
|
"logps/chosen": -88.01249694824219, |
|
"logps/rejected": -62.79375076293945, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 0.911865234375, |
|
"rewards/margins": 1.028173804283142, |
|
"rewards/rejected": -0.11660919338464737, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07125679166295537, |
|
"grad_norm": 17.15355682373047, |
|
"learning_rate": 7.021276595744681e-07, |
|
"logits/chosen": -3.020703077316284, |
|
"logits/rejected": -3.037890672683716, |
|
"logps/chosen": -92.09375, |
|
"logps/rejected": -78.25, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.714630126953125, |
|
"rewards/margins": 0.987384021282196, |
|
"rewards/rejected": -0.2730239927768707, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07481963124610315, |
|
"grad_norm": 14.750085830688477, |
|
"learning_rate": 7.375886524822694e-07, |
|
"logits/chosen": -3.033203125, |
|
"logits/rejected": -3.0386719703674316, |
|
"logps/chosen": -98.8062515258789, |
|
"logps/rejected": -73.3062515258789, |
|
"loss": 0.4708, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.8927696347236633, |
|
"rewards/margins": 1.1517212390899658, |
|
"rewards/rejected": -0.25947266817092896, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07838247082925091, |
|
"grad_norm": 9.73326301574707, |
|
"learning_rate": 7.730496453900709e-07, |
|
"logits/chosen": -3.0648436546325684, |
|
"logits/rejected": -3.0335936546325684, |
|
"logps/chosen": -92.2874984741211, |
|
"logps/rejected": -64.23124694824219, |
|
"loss": 0.4229, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 1.2247527837753296, |
|
"rewards/margins": 1.42816162109375, |
|
"rewards/rejected": -0.20256957411766052, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08194531041239868, |
|
"grad_norm": 8.861647605895996, |
|
"learning_rate": 8.085106382978723e-07, |
|
"logits/chosen": -3.0152344703674316, |
|
"logits/rejected": -3.016796827316284, |
|
"logps/chosen": -84.01875305175781, |
|
"logps/rejected": -62.01250076293945, |
|
"loss": 0.4404, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 1.268524169921875, |
|
"rewards/margins": 1.392974853515625, |
|
"rewards/rejected": -0.12417755275964737, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08550814999554646, |
|
"grad_norm": 11.753287315368652, |
|
"learning_rate": 8.439716312056737e-07, |
|
"logits/chosen": -3.021484375, |
|
"logits/rejected": -3.0257811546325684, |
|
"logps/chosen": -70.6187515258789, |
|
"logps/rejected": -54.650001525878906, |
|
"loss": 0.457, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.0282318592071533, |
|
"rewards/margins": 1.1394774913787842, |
|
"rewards/rejected": -0.11103515326976776, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08907098957869422, |
|
"grad_norm": 16.672988891601562, |
|
"learning_rate": 8.794326241134752e-07, |
|
"logits/chosen": -2.983593702316284, |
|
"logits/rejected": -3.01171875, |
|
"logps/chosen": -84.30000305175781, |
|
"logps/rejected": -61.98125076293945, |
|
"loss": 0.41, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.290094017982483, |
|
"rewards/margins": 1.4578125476837158, |
|
"rewards/rejected": -0.16876526176929474, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09263382916184199, |
|
"grad_norm": 10.004197120666504, |
|
"learning_rate": 9.148936170212766e-07, |
|
"logits/chosen": -2.984375, |
|
"logits/rejected": -3.0121092796325684, |
|
"logps/chosen": -82.8375015258789, |
|
"logps/rejected": -61.41875076293945, |
|
"loss": 0.4163, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 1.3681640625, |
|
"rewards/margins": 1.491455078125, |
|
"rewards/rejected": -0.12366028130054474, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09619666874498976, |
|
"grad_norm": 12.494372367858887, |
|
"learning_rate": 9.50354609929078e-07, |
|
"logits/chosen": -2.975781202316284, |
|
"logits/rejected": -2.9828124046325684, |
|
"logps/chosen": -90.48750305175781, |
|
"logps/rejected": -74.5562515258789, |
|
"loss": 0.3907, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 1.3906981945037842, |
|
"rewards/margins": 1.6629638671875, |
|
"rewards/rejected": -0.2710815370082855, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09975950832813753, |
|
"grad_norm": 13.188983917236328, |
|
"learning_rate": 9.858156028368794e-07, |
|
"logits/chosen": -2.9781250953674316, |
|
"logits/rejected": -2.955859422683716, |
|
"logps/chosen": -76.2437515258789, |
|
"logps/rejected": -61.16875076293945, |
|
"loss": 0.3928, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.412744164466858, |
|
"rewards/margins": 1.584985375404358, |
|
"rewards/rejected": -0.17142944037914276, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10332234791128529, |
|
"grad_norm": 12.645596504211426, |
|
"learning_rate": 9.999860568295915e-07, |
|
"logits/chosen": -2.948046922683716, |
|
"logits/rejected": -2.9703125953674316, |
|
"logps/chosen": -78.86250305175781, |
|
"logps/rejected": -66.6875, |
|
"loss": 0.4173, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.195831298828125, |
|
"rewards/margins": 1.5179870128631592, |
|
"rewards/rejected": -0.32008057832717896, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.10688518749443306, |
|
"grad_norm": 11.21827220916748, |
|
"learning_rate": 9.999008513821418e-07, |
|
"logits/chosen": -2.9410157203674316, |
|
"logits/rejected": -2.9488282203674316, |
|
"logps/chosen": -77.67500305175781, |
|
"logps/rejected": -57.64374923706055, |
|
"loss": 0.3616, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.391027808189392, |
|
"rewards/margins": 1.7245604991912842, |
|
"rewards/rejected": -0.3327087461948395, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11044802707758083, |
|
"grad_norm": 19.32581901550293, |
|
"learning_rate": 9.997381998772935e-07, |
|
"logits/chosen": -2.928515672683716, |
|
"logits/rejected": -2.944531202316284, |
|
"logps/chosen": -94.01249694824219, |
|
"logps/rejected": -76.98750305175781, |
|
"loss": 0.3437, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.4373290538787842, |
|
"rewards/margins": 1.874505639076233, |
|
"rewards/rejected": -0.4372314512729645, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1140108666607286, |
|
"grad_norm": 9.437000274658203, |
|
"learning_rate": 9.99498127513479e-07, |
|
"logits/chosen": -2.9027342796325684, |
|
"logits/rejected": -2.920703172683716, |
|
"logps/chosen": -74.16874694824219, |
|
"logps/rejected": -60.243751525878906, |
|
"loss": 0.3419, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.5391356945037842, |
|
"rewards/margins": 2.053997755050659, |
|
"rewards/rejected": -0.51385498046875, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11757370624387636, |
|
"grad_norm": 7.837158679962158, |
|
"learning_rate": 9.991806714833894e-07, |
|
"logits/chosen": -2.9039063453674316, |
|
"logits/rejected": -2.9156250953674316, |
|
"logps/chosen": -87.0875015258789, |
|
"logps/rejected": -70.16874694824219, |
|
"loss": 0.3555, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.413110375404358, |
|
"rewards/margins": 1.951440453529358, |
|
"rewards/rejected": -0.537158191204071, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.12113654582702414, |
|
"grad_norm": 11.675161361694336, |
|
"learning_rate": 9.987858809682132e-07, |
|
"logits/chosen": -2.8902344703674316, |
|
"logits/rejected": -2.910937547683716, |
|
"logps/chosen": -80.60624694824219, |
|
"logps/rejected": -64.75, |
|
"loss": 0.3047, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.405981421470642, |
|
"rewards/margins": 2.0376954078674316, |
|
"rewards/rejected": -0.629956066608429, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1246993854101719, |
|
"grad_norm": 10.536681175231934, |
|
"learning_rate": 9.983138171300162e-07, |
|
"logits/chosen": -2.8675780296325684, |
|
"logits/rejected": -2.88671875, |
|
"logps/chosen": -80.84375, |
|
"logps/rejected": -67.55000305175781, |
|
"loss": 0.3357, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.3307921886444092, |
|
"rewards/margins": 1.8141601085662842, |
|
"rewards/rejected": -0.4823974668979645, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12826222499331968, |
|
"grad_norm": 18.031932830810547, |
|
"learning_rate": 9.977645531022672e-07, |
|
"logits/chosen": -2.8734374046325684, |
|
"logits/rejected": -2.896484375, |
|
"logps/chosen": -76.4375, |
|
"logps/rejected": -72.40625, |
|
"loss": 0.3215, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 1.909423828125, |
|
"rewards/margins": 2.198779344558716, |
|
"rewards/rejected": -0.28810423612594604, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13182506457646745, |
|
"grad_norm": 11.270240783691406, |
|
"learning_rate": 9.971381739785065e-07, |
|
"logits/chosen": -2.859375, |
|
"logits/rejected": -2.883593797683716, |
|
"logps/chosen": -90.1500015258789, |
|
"logps/rejected": -74.66874694824219, |
|
"loss": 0.3281, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.711999535560608, |
|
"rewards/margins": 2.1112303733825684, |
|
"rewards/rejected": -0.39727783203125, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1353879041596152, |
|
"grad_norm": 7.635077476501465, |
|
"learning_rate": 9.964347767991644e-07, |
|
"logits/chosen": -2.8558592796325684, |
|
"logits/rejected": -2.862499952316284, |
|
"logps/chosen": -95.88749694824219, |
|
"logps/rejected": -84.53125, |
|
"loss": 0.2675, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.230224609375, |
|
"rewards/margins": 2.361132860183716, |
|
"rewards/rejected": -1.1302001476287842, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13895074374276298, |
|
"grad_norm": 9.399760246276855, |
|
"learning_rate": 9.956544705365262e-07, |
|
"logits/chosen": -2.8539061546325684, |
|
"logits/rejected": -2.860546827316284, |
|
"logps/chosen": -85.8375015258789, |
|
"logps/rejected": -74.95625305175781, |
|
"loss": 0.2563, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.636621117591858, |
|
"rewards/margins": 2.4004883766174316, |
|
"rewards/rejected": -0.7652435302734375, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.14251358332591074, |
|
"grad_norm": 20.745431900024414, |
|
"learning_rate": 9.947973760778508e-07, |
|
"logits/chosen": -2.830859422683716, |
|
"logits/rejected": -2.856250047683716, |
|
"logps/chosen": -73.9000015258789, |
|
"logps/rejected": -63.41875076293945, |
|
"loss": 0.2483, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.80950927734375, |
|
"rewards/margins": 2.4383788108825684, |
|
"rewards/rejected": -0.6286865472793579, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1460764229090585, |
|
"grad_norm": 17.357345581054688, |
|
"learning_rate": 9.938636262066423e-07, |
|
"logits/chosen": -2.821093797683716, |
|
"logits/rejected": -2.8414063453674316, |
|
"logps/chosen": -87.64375305175781, |
|
"logps/rejected": -77.25, |
|
"loss": 0.2311, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.82574462890625, |
|
"rewards/margins": 2.6576170921325684, |
|
"rewards/rejected": -0.8322509527206421, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.1496392624922063, |
|
"grad_norm": 12.413117408752441, |
|
"learning_rate": 9.928533655820778e-07, |
|
"logits/chosen": -2.8140625953674316, |
|
"logits/rejected": -2.82421875, |
|
"logps/chosen": -86.48124694824219, |
|
"logps/rejected": -79.54374694824219, |
|
"loss": 0.3201, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.5425536632537842, |
|
"rewards/margins": 2.359692335128784, |
|
"rewards/rejected": -0.8163818120956421, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15320210207535406, |
|
"grad_norm": 17.220603942871094, |
|
"learning_rate": 9.917667507165988e-07, |
|
"logits/chosen": -2.8363280296325684, |
|
"logits/rejected": -2.8285155296325684, |
|
"logps/chosen": -77.04374694824219, |
|
"logps/rejected": -70.4375, |
|
"loss": 0.2564, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.592675805091858, |
|
"rewards/margins": 2.5828003883361816, |
|
"rewards/rejected": -0.9895995855331421, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.15676494165850183, |
|
"grad_norm": 8.446002960205078, |
|
"learning_rate": 9.90603949951661e-07, |
|
"logits/chosen": -2.8246092796325684, |
|
"logits/rejected": -2.837109327316284, |
|
"logps/chosen": -91.78125, |
|
"logps/rejected": -82.59375, |
|
"loss": 0.2734, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.702734351158142, |
|
"rewards/margins": 2.5569825172424316, |
|
"rewards/rejected": -0.854077160358429, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1603277812416496, |
|
"grad_norm": 7.998837471008301, |
|
"learning_rate": 9.89365143431656e-07, |
|
"logits/chosen": -2.815624952316284, |
|
"logits/rejected": -2.842578172683716, |
|
"logps/chosen": -77.2125015258789, |
|
"logps/rejected": -77.8125, |
|
"loss": 0.1894, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.030078172683716, |
|
"rewards/margins": 2.8941407203674316, |
|
"rewards/rejected": -0.8644775152206421, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.16389062082479736, |
|
"grad_norm": 9.701436996459961, |
|
"learning_rate": 9.880505230760025e-07, |
|
"logits/chosen": -2.787890672683716, |
|
"logits/rejected": -2.826171875, |
|
"logps/chosen": -73.625, |
|
"logps/rejected": -74.35624694824219, |
|
"loss": 0.2395, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.212085008621216, |
|
"rewards/margins": 2.756884813308716, |
|
"rewards/rejected": -0.543957531452179, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16745346040794512, |
|
"grad_norm": 8.417997360229492, |
|
"learning_rate": 9.866602925494141e-07, |
|
"logits/chosen": -2.7718749046325684, |
|
"logits/rejected": -2.817187547683716, |
|
"logps/chosen": -90.19999694824219, |
|
"logps/rejected": -81.1500015258789, |
|
"loss": 0.2562, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.513342261314392, |
|
"rewards/margins": 2.689257860183716, |
|
"rewards/rejected": -1.1742675304412842, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.1710162999910929, |
|
"grad_norm": 7.670560359954834, |
|
"learning_rate": 9.851946672303459e-07, |
|
"logits/chosen": -2.793750047683716, |
|
"logits/rejected": -2.788281202316284, |
|
"logps/chosen": -96.0374984741211, |
|
"logps/rejected": -86.01249694824219, |
|
"loss": 0.2326, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.3016846179962158, |
|
"rewards/margins": 3.0846190452575684, |
|
"rewards/rejected": -1.782128930091858, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.17457913957424068, |
|
"grad_norm": 12.10120964050293, |
|
"learning_rate": 9.836538741776283e-07, |
|
"logits/chosen": -2.791015625, |
|
"logits/rejected": -2.802734375, |
|
"logps/chosen": -89.48124694824219, |
|
"logps/rejected": -85.63749694824219, |
|
"loss": 0.2696, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.33062744140625, |
|
"rewards/margins": 2.7452392578125, |
|
"rewards/rejected": -1.411718726158142, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.17814197915738844, |
|
"grad_norm": 9.357841491699219, |
|
"learning_rate": 9.8203815209529e-07, |
|
"logits/chosen": -2.78125, |
|
"logits/rejected": -2.8042969703674316, |
|
"logps/chosen": -73.15625, |
|
"logps/rejected": -73.1312484741211, |
|
"loss": 0.1795, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.027270555496216, |
|
"rewards/margins": 2.994921922683716, |
|
"rewards/rejected": -0.967480480670929, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1817048187405362, |
|
"grad_norm": 8.387929916381836, |
|
"learning_rate": 9.80347751295577e-07, |
|
"logits/chosen": -2.7914061546325684, |
|
"logits/rejected": -2.8046875, |
|
"logps/chosen": -96.3187484741211, |
|
"logps/rejected": -98.40625, |
|
"loss": 0.1988, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.1563477516174316, |
|
"rewards/margins": 3.224609375, |
|
"rewards/rejected": -1.0703246593475342, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.18526765832368397, |
|
"grad_norm": 7.9508137702941895, |
|
"learning_rate": 9.78582933660175e-07, |
|
"logits/chosen": -2.7789063453674316, |
|
"logits/rejected": -2.7972655296325684, |
|
"logps/chosen": -85.13749694824219, |
|
"logps/rejected": -84.64375305175781, |
|
"loss": 0.2766, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 2.044140577316284, |
|
"rewards/margins": 2.802441358566284, |
|
"rewards/rejected": -0.75970458984375, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18883049790683173, |
|
"grad_norm": 6.185698509216309, |
|
"learning_rate": 9.767439725996362e-07, |
|
"logits/chosen": -2.753124952316284, |
|
"logits/rejected": -2.770312547683716, |
|
"logps/chosen": -89.39375305175781, |
|
"logps/rejected": -89.0562515258789, |
|
"loss": 0.2549, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.800146460533142, |
|
"rewards/margins": 2.896484375, |
|
"rewards/rejected": -1.096582055091858, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.19239333748997953, |
|
"grad_norm": 9.08353328704834, |
|
"learning_rate": 9.748311530110229e-07, |
|
"logits/chosen": -2.748828172683716, |
|
"logits/rejected": -2.7718749046325684, |
|
"logps/chosen": -100.5625, |
|
"logps/rejected": -97.46875, |
|
"loss": 0.2605, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.925512671470642, |
|
"rewards/margins": 3.23291015625, |
|
"rewards/rejected": -1.30859375, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1959561770731273, |
|
"grad_norm": 4.137091159820557, |
|
"learning_rate": 9.728447712337691e-07, |
|
"logits/chosen": -2.744921922683716, |
|
"logits/rejected": -2.759765625, |
|
"logps/chosen": -87.5999984741211, |
|
"logps/rejected": -91.3062515258789, |
|
"loss": 0.2278, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.05633544921875, |
|
"rewards/margins": 3.37890625, |
|
"rewards/rejected": -1.320715308189392, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.19951901665627506, |
|
"grad_norm": 12.414250373840332, |
|
"learning_rate": 9.707851350037725e-07, |
|
"logits/chosen": -2.729296922683716, |
|
"logits/rejected": -2.7542967796325684, |
|
"logps/chosen": -77.55000305175781, |
|
"logps/rejected": -77.8125, |
|
"loss": 0.1781, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.095141649246216, |
|
"rewards/margins": 3.4671874046325684, |
|
"rewards/rejected": -1.3744628429412842, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20308185623942282, |
|
"grad_norm": 7.794823169708252, |
|
"learning_rate": 9.686525634057183e-07, |
|
"logits/chosen": -2.733203172683716, |
|
"logits/rejected": -2.7464842796325684, |
|
"logps/chosen": -99.1500015258789, |
|
"logps/rejected": -100.1500015258789, |
|
"loss": 0.231, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 2.1944947242736816, |
|
"rewards/margins": 3.321044921875, |
|
"rewards/rejected": -1.124609351158142, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.20664469582257058, |
|
"grad_norm": 8.649138450622559, |
|
"learning_rate": 9.664473868236452e-07, |
|
"logits/chosen": -2.755859375, |
|
"logits/rejected": -2.76953125, |
|
"logps/chosen": -80.39375305175781, |
|
"logps/rejected": -76.4312515258789, |
|
"loss": 0.1813, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.514404296875, |
|
"rewards/margins": 3.5414061546325684, |
|
"rewards/rejected": -1.02783203125, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21020753540571835, |
|
"grad_norm": 12.671677589416504, |
|
"learning_rate": 9.641699468897624e-07, |
|
"logits/chosen": -2.7093749046325684, |
|
"logits/rejected": -2.739453077316284, |
|
"logps/chosen": -60.45000076293945, |
|
"logps/rejected": -56.525001525878906, |
|
"loss": 0.2041, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.3620848655700684, |
|
"rewards/margins": 3.497265577316284, |
|
"rewards/rejected": -1.1365234851837158, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2137703749888661, |
|
"grad_norm": 4.332060813903809, |
|
"learning_rate": 9.618205964315222e-07, |
|
"logits/chosen": -2.727734327316284, |
|
"logits/rejected": -2.757031202316284, |
|
"logps/chosen": -98.5374984741211, |
|
"logps/rejected": -100.4937515258789, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.587915062904358, |
|
"rewards/margins": 3.080639600753784, |
|
"rewards/rejected": -1.492285132408142, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2173332145720139, |
|
"grad_norm": 15.351773262023926, |
|
"learning_rate": 9.593996994169595e-07, |
|
"logits/chosen": -2.7203125953674316, |
|
"logits/rejected": -2.7249999046325684, |
|
"logps/chosen": -75.66874694824219, |
|
"logps/rejected": -77.15625, |
|
"loss": 0.2185, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 2.589599609375, |
|
"rewards/margins": 3.6607422828674316, |
|
"rewards/rejected": -1.0699951648712158, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.22089605415516167, |
|
"grad_norm": 26.818561553955078, |
|
"learning_rate": 9.569076308983043e-07, |
|
"logits/chosen": -2.696484327316284, |
|
"logits/rejected": -2.7085938453674316, |
|
"logps/chosen": -75.07499694824219, |
|
"logps/rejected": -86.9749984741211, |
|
"loss": 0.2727, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 2.1810059547424316, |
|
"rewards/margins": 3.570849657058716, |
|
"rewards/rejected": -1.389892578125, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22445889373830943, |
|
"grad_norm": 8.661190032958984, |
|
"learning_rate": 9.54344776953878e-07, |
|
"logits/chosen": -2.6617188453674316, |
|
"logits/rejected": -2.6832032203674316, |
|
"logps/chosen": -79.5875015258789, |
|
"logps/rejected": -76.4124984741211, |
|
"loss": 0.214, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.545873999595642, |
|
"rewards/margins": 3.3358397483825684, |
|
"rewards/rejected": -1.7917969226837158, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2280217333214572, |
|
"grad_norm": 10.709281921386719, |
|
"learning_rate": 9.517115346282807e-07, |
|
"logits/chosen": -2.677734375, |
|
"logits/rejected": -2.713671922683716, |
|
"logps/chosen": -81.0687484741211, |
|
"logps/rejected": -86.5, |
|
"loss": 0.3241, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 2.3992676734924316, |
|
"rewards/margins": 3.3475098609924316, |
|
"rewards/rejected": -0.9508301019668579, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.23158457290460496, |
|
"grad_norm": 8.042264938354492, |
|
"learning_rate": 9.490083118708802e-07, |
|
"logits/chosen": -2.666015625, |
|
"logits/rejected": -2.6875, |
|
"logps/chosen": -82.4937515258789, |
|
"logps/rejected": -84.82499694824219, |
|
"loss": 0.2036, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.216723680496216, |
|
"rewards/margins": 3.622363328933716, |
|
"rewards/rejected": -1.403662085533142, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.23514741248775273, |
|
"grad_norm": 5.525402545928955, |
|
"learning_rate": 9.462355274726115e-07, |
|
"logits/chosen": -2.670703172683716, |
|
"logits/rejected": -2.70703125, |
|
"logps/chosen": -77.34375, |
|
"logps/rejected": -76.76875305175781, |
|
"loss": 0.1855, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.365652561187744, |
|
"rewards/margins": 3.461718797683716, |
|
"rewards/rejected": -1.0956542491912842, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.23871025207090052, |
|
"grad_norm": 10.05048942565918, |
|
"learning_rate": 9.433936110010956e-07, |
|
"logits/chosen": -2.667187452316284, |
|
"logits/rejected": -2.6871094703674316, |
|
"logps/chosen": -78.17500305175781, |
|
"logps/rejected": -76.9937515258789, |
|
"loss": 0.1874, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.0904572010040283, |
|
"rewards/margins": 3.5014405250549316, |
|
"rewards/rejected": -1.4128906726837158, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.24227309165404828, |
|
"grad_norm": 9.706530570983887, |
|
"learning_rate": 9.404830027340911e-07, |
|
"logits/chosen": -2.6640625, |
|
"logits/rejected": -2.694531202316284, |
|
"logps/chosen": -69.0562515258789, |
|
"logps/rejected": -75.125, |
|
"loss": 0.2029, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.834741234779358, |
|
"rewards/margins": 3.663867235183716, |
|
"rewards/rejected": -1.8297851085662842, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.24583593123719605, |
|
"grad_norm": 6.624788284301758, |
|
"learning_rate": 9.375041535912838e-07, |
|
"logits/chosen": -2.639453172683716, |
|
"logits/rejected": -2.6953125, |
|
"logps/chosen": -92.89375305175781, |
|
"logps/rejected": -91.64375305175781, |
|
"loss": 0.1853, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.5345947742462158, |
|
"rewards/margins": 3.5376954078674316, |
|
"rewards/rejected": -2.003124952316284, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2493987708203438, |
|
"grad_norm": 41.15028762817383, |
|
"learning_rate": 9.344575250644295e-07, |
|
"logits/chosen": -2.6402344703674316, |
|
"logits/rejected": -2.6488280296325684, |
|
"logps/chosen": -79.6812515258789, |
|
"logps/rejected": -83.7249984741211, |
|
"loss": 0.2387, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.3847413063049316, |
|
"rewards/margins": 4.047461032867432, |
|
"rewards/rejected": -1.664794921875, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2529616104034916, |
|
"grad_norm": 10.601265907287598, |
|
"learning_rate": 9.313435891458587e-07, |
|
"logits/chosen": -2.651562452316284, |
|
"logits/rejected": -2.67578125, |
|
"logps/chosen": -81.4000015258789, |
|
"logps/rejected": -91.5875015258789, |
|
"loss": 0.1739, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.215380907058716, |
|
"rewards/margins": 3.997180223464966, |
|
"rewards/rejected": -1.7802734375, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.25652444998663937, |
|
"grad_norm": 22.471799850463867, |
|
"learning_rate": 9.281628282553535e-07, |
|
"logits/chosen": -2.627734422683716, |
|
"logits/rejected": -2.673828125, |
|
"logps/chosen": -83.95625305175781, |
|
"logps/rejected": -93.0, |
|
"loss": 0.2203, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.1271729469299316, |
|
"rewards/margins": 3.6353516578674316, |
|
"rewards/rejected": -1.506982445716858, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2600872895697871, |
|
"grad_norm": 203.5772705078125, |
|
"learning_rate": 9.249157351654104e-07, |
|
"logits/chosen": -2.643359422683716, |
|
"logits/rejected": -2.676562547683716, |
|
"logps/chosen": -89.5687484741211, |
|
"logps/rejected": -84.54374694824219, |
|
"loss": 0.212, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.6217529773712158, |
|
"rewards/margins": 3.0572266578674316, |
|
"rewards/rejected": -1.4359023571014404, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2636501291529349, |
|
"grad_norm": 9.114107131958008, |
|
"learning_rate": 9.216028129248985e-07, |
|
"logits/chosen": -2.63671875, |
|
"logits/rejected": -2.673828125, |
|
"logps/chosen": -92.1500015258789, |
|
"logps/rejected": -92.40625, |
|
"loss": 0.1802, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.858483910560608, |
|
"rewards/margins": 3.7662110328674316, |
|
"rewards/rejected": -1.9098632335662842, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.26721296873608263, |
|
"grad_norm": 8.871646881103516, |
|
"learning_rate": 9.182245747811248e-07, |
|
"logits/chosen": -2.6390624046325684, |
|
"logits/rejected": -2.655078172683716, |
|
"logps/chosen": -91.5, |
|
"logps/rejected": -87.88749694824219, |
|
"loss": 0.2021, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.110546827316284, |
|
"rewards/margins": 3.6029295921325684, |
|
"rewards/rejected": -1.4890625476837158, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2707758083192304, |
|
"grad_norm": 25.285552978515625, |
|
"learning_rate": 9.147815441003221e-07, |
|
"logits/chosen": -2.653125047683716, |
|
"logits/rejected": -2.666796922683716, |
|
"logps/chosen": -91.6875, |
|
"logps/rejected": -100.0687484741211, |
|
"loss": 0.2094, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.166583299636841, |
|
"rewards/margins": 3.796191453933716, |
|
"rewards/rejected": -1.6293213367462158, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2743386479023782, |
|
"grad_norm": 11.9544038772583, |
|
"learning_rate": 9.112742542865664e-07, |
|
"logits/chosen": -2.623046875, |
|
"logits/rejected": -2.647656202316284, |
|
"logps/chosen": -69.10624694824219, |
|
"logps/rejected": -73.8125, |
|
"loss": 0.1568, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.197265625, |
|
"rewards/margins": 4.13671875, |
|
"rewards/rejected": -1.93994140625, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.27790148748552596, |
|
"grad_norm": 7.545533657073975, |
|
"learning_rate": 9.077032486991407e-07, |
|
"logits/chosen": -2.6390624046325684, |
|
"logits/rejected": -2.6527342796325684, |
|
"logps/chosen": -76.2125015258789, |
|
"logps/rejected": -78.25, |
|
"loss": 0.164, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.2819581031799316, |
|
"rewards/margins": 4.1171875, |
|
"rewards/rejected": -1.8315918445587158, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.28146432706867375, |
|
"grad_norm": 17.746479034423828, |
|
"learning_rate": 9.040690805683566e-07, |
|
"logits/chosen": -2.6285157203674316, |
|
"logits/rejected": -2.654296875, |
|
"logps/chosen": -91.58125305175781, |
|
"logps/rejected": -96.57499694824219, |
|
"loss": 0.1974, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.3270201683044434, |
|
"rewards/margins": 3.623730421066284, |
|
"rewards/rejected": -1.2992675304412842, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.2850271666518215, |
|
"grad_norm": 6.393121719360352, |
|
"learning_rate": 9.003723129098458e-07, |
|
"logits/chosen": -2.5835938453674316, |
|
"logits/rejected": -2.6171875, |
|
"logps/chosen": -67.17500305175781, |
|
"logps/rejected": -64.5374984741211, |
|
"loss": 0.1381, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.3965821266174316, |
|
"rewards/margins": 3.896484375, |
|
"rewards/rejected": -1.499121069908142, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2885900062349693, |
|
"grad_norm": 9.168136596679688, |
|
"learning_rate": 8.966135184373361e-07, |
|
"logits/chosen": -2.59375, |
|
"logits/rejected": -2.611328125, |
|
"logps/chosen": -91.40625, |
|
"logps/rejected": -89.26249694824219, |
|
"loss": 0.1728, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.112548828125, |
|
"rewards/margins": 3.8931641578674316, |
|
"rewards/rejected": -1.783105492591858, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.292152845818117, |
|
"grad_norm": 7.881724834442139, |
|
"learning_rate": 8.927932794739257e-07, |
|
"logits/chosen": -2.578906297683716, |
|
"logits/rejected": -2.610156297683716, |
|
"logps/chosen": -74.96875, |
|
"logps/rejected": -79.33125305175781, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.987982153892517, |
|
"rewards/margins": 3.7542967796325684, |
|
"rewards/rejected": -1.769140601158142, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2957156854012648, |
|
"grad_norm": 17.415807723999023, |
|
"learning_rate": 8.889121878618675e-07, |
|
"logits/chosen": -2.5550780296325684, |
|
"logits/rejected": -2.594921827316284, |
|
"logps/chosen": -76.9124984741211, |
|
"logps/rejected": -78.9375, |
|
"loss": 0.1577, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.731329321861267, |
|
"rewards/margins": 3.586132764816284, |
|
"rewards/rejected": -1.855126976966858, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.2992785249844126, |
|
"grad_norm": 19.104272842407227, |
|
"learning_rate": 8.849708448708789e-07, |
|
"logits/chosen": -2.5941405296325684, |
|
"logits/rejected": -2.607421875, |
|
"logps/chosen": -85.20625305175781, |
|
"logps/rejected": -90.34375, |
|
"loss": 0.1708, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.9668457508087158, |
|
"rewards/margins": 4.154101371765137, |
|
"rewards/rejected": -2.1869139671325684, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.30284136456756033, |
|
"grad_norm": 9.433489799499512, |
|
"learning_rate": 8.809698611049922e-07, |
|
"logits/chosen": -2.5746092796325684, |
|
"logits/rejected": -2.6011719703674316, |
|
"logps/chosen": -89.78125, |
|
"logps/rejected": -102.30000305175781, |
|
"loss": 0.1482, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.6952025890350342, |
|
"rewards/margins": 4.217577934265137, |
|
"rewards/rejected": -2.5201172828674316, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3064042041507081, |
|
"grad_norm": 9.763529777526855, |
|
"learning_rate": 8.769098564079573e-07, |
|
"logits/chosen": -2.582812547683716, |
|
"logits/rejected": -2.6058592796325684, |
|
"logps/chosen": -77.29374694824219, |
|
"logps/rejected": -87.48750305175781, |
|
"loss": 0.1348, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.206249952316284, |
|
"rewards/margins": 4.6728515625, |
|
"rewards/rejected": -2.46875, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.30996704373385586, |
|
"grad_norm": 110.477294921875, |
|
"learning_rate": 8.727914597672146e-07, |
|
"logits/chosen": -2.569140672683716, |
|
"logits/rejected": -2.6070313453674316, |
|
"logps/chosen": -98.33125305175781, |
|
"logps/rejected": -109.48124694824219, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.8028564453125, |
|
"rewards/margins": 3.9544920921325684, |
|
"rewards/rejected": -2.153027296066284, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.31352988331700365, |
|
"grad_norm": 14.672298431396484, |
|
"learning_rate": 8.686153092164492e-07, |
|
"logits/chosen": -2.5316405296325684, |
|
"logits/rejected": -2.575000047683716, |
|
"logps/chosen": -76.9124984741211, |
|
"logps/rejected": -81.6937484741211, |
|
"loss": 0.1387, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.836523413658142, |
|
"rewards/margins": 3.8916015625, |
|
"rewards/rejected": -2.058398485183716, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.31709272290015145, |
|
"grad_norm": 8.469518661499023, |
|
"learning_rate": 8.643820517367467e-07, |
|
"logits/chosen": -2.522656202316284, |
|
"logits/rejected": -2.548828125, |
|
"logps/chosen": -94.51875305175781, |
|
"logps/rejected": -93.4312515258789, |
|
"loss": 0.2304, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.2852294445037842, |
|
"rewards/margins": 3.875537157058716, |
|
"rewards/rejected": -2.592578172683716, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3206555624832992, |
|
"grad_norm": 17.38793182373047, |
|
"learning_rate": 8.600923431563589e-07, |
|
"logits/chosen": -2.5218749046325684, |
|
"logits/rejected": -2.551953077316284, |
|
"logps/chosen": -97.5625, |
|
"logps/rejected": -101.07499694824219, |
|
"loss": 0.2786, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.6149657964706421, |
|
"rewards/margins": 4.080859184265137, |
|
"rewards/rejected": -3.467578172683716, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.324218402066447, |
|
"grad_norm": 10.183024406433105, |
|
"learning_rate": 8.557468480491035e-07, |
|
"logits/chosen": -2.5523438453674316, |
|
"logits/rejected": -2.5445313453674316, |
|
"logps/chosen": -107.8812484741211, |
|
"logps/rejected": -117.9437484741211, |
|
"loss": 0.2774, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.05524902418255806, |
|
"rewards/margins": 4.509814262390137, |
|
"rewards/rejected": -4.458398342132568, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3277812416495947, |
|
"grad_norm": 5.875920295715332, |
|
"learning_rate": 8.513462396314041e-07, |
|
"logits/chosen": -2.5562500953674316, |
|
"logits/rejected": -2.567578077316284, |
|
"logps/chosen": -102.76875305175781, |
|
"logps/rejected": -110.07499694824219, |
|
"loss": 0.3396, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.13802489638328552, |
|
"rewards/margins": 4.290234565734863, |
|
"rewards/rejected": -4.151757717132568, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3313440812327425, |
|
"grad_norm": 13.03176212310791, |
|
"learning_rate": 8.46891199657995e-07, |
|
"logits/chosen": -2.516796827316284, |
|
"logits/rejected": -2.5433592796325684, |
|
"logps/chosen": -80.4000015258789, |
|
"logps/rejected": -85.76249694824219, |
|
"loss": 0.176, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.7038635015487671, |
|
"rewards/margins": 3.87890625, |
|
"rewards/rejected": -3.173828125, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.33490692081589024, |
|
"grad_norm": 5.999340057373047, |
|
"learning_rate": 8.423824183163015e-07, |
|
"logits/chosen": -2.5425782203674316, |
|
"logits/rejected": -2.55859375, |
|
"logps/chosen": -86.125, |
|
"logps/rejected": -90.82499694824219, |
|
"loss": 0.1629, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.2559814453125, |
|
"rewards/margins": 4.011328220367432, |
|
"rewards/rejected": -2.756640672683716, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.33846976039903803, |
|
"grad_norm": 10.494653701782227, |
|
"learning_rate": 8.37820594119514e-07, |
|
"logits/chosen": -2.5570311546325684, |
|
"logits/rejected": -2.5503907203674316, |
|
"logps/chosen": -94.70625305175781, |
|
"logps/rejected": -101.88749694824219, |
|
"loss": 0.3166, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.986376941204071, |
|
"rewards/margins": 3.810375928878784, |
|
"rewards/rejected": -2.823193311691284, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3420325999821858, |
|
"grad_norm": 6.390571594238281, |
|
"learning_rate": 8.332064337983725e-07, |
|
"logits/chosen": -2.508593797683716, |
|
"logits/rejected": -2.536328077316284, |
|
"logps/chosen": -82.83125305175781, |
|
"logps/rejected": -84.9124984741211, |
|
"loss": 0.1201, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.835424780845642, |
|
"rewards/margins": 4.11328125, |
|
"rewards/rejected": -2.278515577316284, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.34559543956533356, |
|
"grad_norm": 62.394775390625, |
|
"learning_rate": 8.285406521916776e-07, |
|
"logits/chosen": -2.54296875, |
|
"logits/rejected": -2.5542969703674316, |
|
"logps/chosen": -86.91874694824219, |
|
"logps/rejected": -94.3499984741211, |
|
"loss": 0.1883, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.5449950695037842, |
|
"rewards/margins": 4.182275295257568, |
|
"rewards/rejected": -2.636767625808716, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.34915827914848135, |
|
"grad_norm": 9.866166114807129, |
|
"learning_rate": 8.23823972135546e-07, |
|
"logits/chosen": -2.473437547683716, |
|
"logits/rejected": -2.501171827316284, |
|
"logps/chosen": -71.8499984741211, |
|
"logps/rejected": -76.29374694824219, |
|
"loss": 0.1806, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.8723846673965454, |
|
"rewards/margins": 3.8558592796325684, |
|
"rewards/rejected": -1.983862280845642, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3527211187316291, |
|
"grad_norm": 8.677438735961914, |
|
"learning_rate": 8.190571243514265e-07, |
|
"logits/chosen": -2.542187452316284, |
|
"logits/rejected": -2.580859422683716, |
|
"logps/chosen": -94.5062484741211, |
|
"logps/rejected": -103.3375015258789, |
|
"loss": 0.1849, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.524304211139679, |
|
"rewards/margins": 3.6502928733825684, |
|
"rewards/rejected": -3.1285157203674316, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3562839583147769, |
|
"grad_norm": 7.261411666870117, |
|
"learning_rate": 8.142408473328944e-07, |
|
"logits/chosen": -2.5062499046325684, |
|
"logits/rejected": -2.521484375, |
|
"logps/chosen": -70.9312515258789, |
|
"logps/rejected": -89.0250015258789, |
|
"loss": 0.1543, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.715905785560608, |
|
"rewards/margins": 4.405468940734863, |
|
"rewards/rejected": -2.6905274391174316, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3598467978979246, |
|
"grad_norm": 10.193512916564941, |
|
"learning_rate": 8.093758872312423e-07, |
|
"logits/chosen": -2.5394530296325684, |
|
"logits/rejected": -2.5746092796325684, |
|
"logps/chosen": -95.79374694824219, |
|
"logps/rejected": -104.95625305175781, |
|
"loss": 0.1999, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.8937820196151733, |
|
"rewards/margins": 4.2265625, |
|
"rewards/rejected": -3.332812547683716, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.3634096374810724, |
|
"grad_norm": 9.057995796203613, |
|
"learning_rate": 8.044629977398845e-07, |
|
"logits/chosen": -2.521484375, |
|
"logits/rejected": -2.5492186546325684, |
|
"logps/chosen": -84.61250305175781, |
|
"logps/rejected": -100.2125015258789, |
|
"loss": 0.226, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.5130493640899658, |
|
"rewards/margins": 4.856640815734863, |
|
"rewards/rejected": -3.340136766433716, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 6.788172245025635, |
|
"learning_rate": 7.995029399775912e-07, |
|
"logits/chosen": -2.4839844703674316, |
|
"logits/rejected": -2.5132813453674316, |
|
"logps/chosen": -75.07499694824219, |
|
"logps/rejected": -85.2437515258789, |
|
"loss": 0.1204, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 1.325585961341858, |
|
"rewards/margins": 4.249218940734863, |
|
"rewards/rejected": -2.926562547683716, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.37053531664736794, |
|
"grad_norm": 64.83377838134766, |
|
"learning_rate": 7.944964823705759e-07, |
|
"logits/chosen": -2.4761719703674316, |
|
"logits/rejected": -2.510546922683716, |
|
"logps/chosen": -85.46875, |
|
"logps/rejected": -98.6312484741211, |
|
"loss": 0.14, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.808642566204071, |
|
"rewards/margins": 3.9560546875, |
|
"rewards/rejected": -3.145312547683716, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.37409815623051573, |
|
"grad_norm": 10.828370094299316, |
|
"learning_rate": 7.894444005334471e-07, |
|
"logits/chosen": -2.483593702316284, |
|
"logits/rejected": -2.5093750953674316, |
|
"logps/chosen": -82.57499694824219, |
|
"logps/rejected": -83.0999984741211, |
|
"loss": 0.3036, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.2034575939178467, |
|
"rewards/margins": 3.845410108566284, |
|
"rewards/rejected": -2.64306640625, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.37766099581366347, |
|
"grad_norm": 6.788658618927002, |
|
"learning_rate": 7.843474771490485e-07, |
|
"logits/chosen": -2.498046875, |
|
"logits/rejected": -2.516796827316284, |
|
"logps/chosen": -83.8125, |
|
"logps/rejected": -92.9375, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.125561475753784, |
|
"rewards/margins": 4.599413871765137, |
|
"rewards/rejected": -2.474902391433716, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.38122383539681126, |
|
"grad_norm": 11.926194190979004, |
|
"learning_rate": 7.792065018472035e-07, |
|
"logits/chosen": -2.485156297683716, |
|
"logits/rejected": -2.4925780296325684, |
|
"logps/chosen": -75.98124694824219, |
|
"logps/rejected": -84.58125305175781, |
|
"loss": 0.2244, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.962408423423767, |
|
"rewards/margins": 4.793554782867432, |
|
"rewards/rejected": -2.8294920921325684, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.38478667497995905, |
|
"grad_norm": 6.597282886505127, |
|
"learning_rate": 7.740222710823836e-07, |
|
"logits/chosen": -2.505859375, |
|
"logits/rejected": -2.51953125, |
|
"logps/chosen": -87.7874984741211, |
|
"logps/rejected": -94.8187484741211, |
|
"loss": 0.1883, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.200537085533142, |
|
"rewards/margins": 4.257177829742432, |
|
"rewards/rejected": -3.0601563453674316, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3883495145631068, |
|
"grad_norm": 7.958057880401611, |
|
"learning_rate": 7.687955880103189e-07, |
|
"logits/chosen": -2.490234375, |
|
"logits/rejected": -2.503124952316284, |
|
"logps/chosen": -90.0062484741211, |
|
"logps/rejected": -96.61250305175781, |
|
"loss": 0.1619, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.556249976158142, |
|
"rewards/margins": 4.306445121765137, |
|
"rewards/rejected": -2.7484374046325684, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.3919123541462546, |
|
"grad_norm": 5.72054386138916, |
|
"learning_rate": 7.635272623635716e-07, |
|
"logits/chosen": -2.524609327316284, |
|
"logits/rejected": -2.544140577316284, |
|
"logps/chosen": -84.875, |
|
"logps/rejected": -97.2874984741211, |
|
"loss": 0.172, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.576513648033142, |
|
"rewards/margins": 4.614648342132568, |
|
"rewards/rejected": -3.0403809547424316, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3954751937294023, |
|
"grad_norm": 10.896967887878418, |
|
"learning_rate": 7.582181103260896e-07, |
|
"logits/chosen": -2.51171875, |
|
"logits/rejected": -2.5289063453674316, |
|
"logps/chosen": -97.76875305175781, |
|
"logps/rejected": -115.2562484741211, |
|
"loss": 0.154, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.833831787109375, |
|
"rewards/margins": 4.320898532867432, |
|
"rewards/rejected": -3.486132860183716, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.3990380333125501, |
|
"grad_norm": 4.492983341217041, |
|
"learning_rate": 7.528689544067612e-07, |
|
"logits/chosen": -2.516796827316284, |
|
"logits/rejected": -2.533203125, |
|
"logps/chosen": -95.07499694824219, |
|
"logps/rejected": -107.0999984741211, |
|
"loss": 0.2136, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.541247546672821, |
|
"rewards/margins": 3.962109327316284, |
|
"rewards/rejected": -3.4200196266174316, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.40260087289569785, |
|
"grad_norm": 7.211833477020264, |
|
"learning_rate": 7.474806233119889e-07, |
|
"logits/chosen": -2.5054688453674316, |
|
"logits/rejected": -2.557812452316284, |
|
"logps/chosen": -97.6624984741211, |
|
"logps/rejected": -107.6812515258789, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.6728760004043579, |
|
"rewards/margins": 3.773632764816284, |
|
"rewards/rejected": -3.101757764816284, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.40616371247884564, |
|
"grad_norm": 10.611228942871094, |
|
"learning_rate": 7.420539518173053e-07, |
|
"logits/chosen": -2.501171827316284, |
|
"logits/rejected": -2.5230469703674316, |
|
"logps/chosen": -84.89375305175781, |
|
"logps/rejected": -96.04374694824219, |
|
"loss": 0.2756, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.957080066204071, |
|
"rewards/margins": 4.474218845367432, |
|
"rewards/rejected": -3.5152344703674316, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.40972655206199343, |
|
"grad_norm": 8.464762687683105, |
|
"learning_rate": 7.365897806380457e-07, |
|
"logits/chosen": -2.4691405296325684, |
|
"logits/rejected": -2.490234375, |
|
"logps/chosen": -74.5875015258789, |
|
"logps/rejected": -92.9375, |
|
"loss": 0.1274, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.570068359375, |
|
"rewards/margins": 4.603906154632568, |
|
"rewards/rejected": -3.0337891578674316, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.41328939164514117, |
|
"grad_norm": 16.32123565673828, |
|
"learning_rate": 7.310889562991036e-07, |
|
"logits/chosen": -2.458203077316284, |
|
"logits/rejected": -2.479687452316284, |
|
"logps/chosen": -94.0999984741211, |
|
"logps/rejected": -104.98124694824219, |
|
"loss": 0.1985, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.3551514148712158, |
|
"rewards/margins": 4.083203315734863, |
|
"rewards/rejected": -2.726855516433716, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.41685223122828896, |
|
"grad_norm": 5.781502723693848, |
|
"learning_rate": 7.255523310037832e-07, |
|
"logits/chosen": -2.442187547683716, |
|
"logits/rejected": -2.4574217796325684, |
|
"logps/chosen": -79.4625015258789, |
|
"logps/rejected": -91.8687515258789, |
|
"loss": 0.1093, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.938916027545929, |
|
"rewards/margins": 4.7197265625, |
|
"rewards/rejected": -3.7837891578674316, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.4204150708114367, |
|
"grad_norm": 6.917849540710449, |
|
"learning_rate": 7.199807625017749e-07, |
|
"logits/chosen": -2.450390577316284, |
|
"logits/rejected": -2.4691405296325684, |
|
"logps/chosen": -93.23750305175781, |
|
"logps/rejected": -97.26249694824219, |
|
"loss": 0.1563, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.2737548351287842, |
|
"rewards/margins": 4.183203220367432, |
|
"rewards/rejected": -2.9102845191955566, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4239779103945845, |
|
"grad_norm": 14.325077056884766, |
|
"learning_rate": 7.143751139562694e-07, |
|
"logits/chosen": -2.4664063453674316, |
|
"logits/rejected": -2.4683594703674316, |
|
"logps/chosen": -100.33125305175781, |
|
"logps/rejected": -115.8812484741211, |
|
"loss": 0.2218, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.18304443359375, |
|
"rewards/margins": 3.9588866233825684, |
|
"rewards/rejected": -3.7740235328674316, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4275407499777322, |
|
"grad_norm": 8.037822723388672, |
|
"learning_rate": 7.08736253810235e-07, |
|
"logits/chosen": -2.401171922683716, |
|
"logits/rejected": -2.4214844703674316, |
|
"logps/chosen": -77.76249694824219, |
|
"logps/rejected": -88.64375305175781, |
|
"loss": 0.119, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.3977539539337158, |
|
"rewards/margins": 4.58984375, |
|
"rewards/rejected": -3.192578077316284, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.43110358956088, |
|
"grad_norm": 5.8383636474609375, |
|
"learning_rate": 7.030650556518742e-07, |
|
"logits/chosen": -2.444531202316284, |
|
"logits/rejected": -2.473828077316284, |
|
"logps/chosen": -93.4437484741211, |
|
"logps/rejected": -104.63749694824219, |
|
"loss": 0.1362, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.25640869140625, |
|
"rewards/margins": 4.8349609375, |
|
"rewards/rejected": -3.578125, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.4346664291440278, |
|
"grad_norm": 4.72075891494751, |
|
"learning_rate": 6.973623980792874e-07, |
|
"logits/chosen": -2.4136719703674316, |
|
"logits/rejected": -2.423828125, |
|
"logps/chosen": -90.25, |
|
"logps/rejected": -103.0562515258789, |
|
"loss": 0.2161, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.163671851158142, |
|
"rewards/margins": 4.592236518859863, |
|
"rewards/rejected": -3.4292969703674316, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.43822926872717555, |
|
"grad_norm": 7.089286804199219, |
|
"learning_rate": 6.916291645643557e-07, |
|
"logits/chosen": -2.4195313453674316, |
|
"logits/rejected": -2.457812547683716, |
|
"logps/chosen": -89.0999984741211, |
|
"logps/rejected": -115.44999694824219, |
|
"loss": 0.1713, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.977294921875, |
|
"rewards/margins": 4.731640815734863, |
|
"rewards/rejected": -3.75390625, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.44179210831032334, |
|
"grad_norm": 10.222452163696289, |
|
"learning_rate": 6.858662433158724e-07, |
|
"logits/chosen": -2.411328077316284, |
|
"logits/rejected": -2.451953172683716, |
|
"logps/chosen": -105.6187515258789, |
|
"logps/rejected": -113.46875, |
|
"loss": 0.1649, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.13162842392921448, |
|
"rewards/margins": 4.344336032867432, |
|
"rewards/rejected": -4.2119140625, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4453549478934711, |
|
"grad_norm": 4.796070575714111, |
|
"learning_rate": 6.800745271419382e-07, |
|
"logits/chosen": -2.382031202316284, |
|
"logits/rejected": -2.408984422683716, |
|
"logps/chosen": -75.1312484741211, |
|
"logps/rejected": -81.4375, |
|
"loss": 0.1686, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.6754150390625, |
|
"rewards/margins": 4.466406345367432, |
|
"rewards/rejected": -2.793652296066284, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.44891778747661887, |
|
"grad_norm": 12.59176254272461, |
|
"learning_rate": 6.742549133116458e-07, |
|
"logits/chosen": -2.393359422683716, |
|
"logits/rejected": -2.428906202316284, |
|
"logps/chosen": -79.39375305175781, |
|
"logps/rejected": -99.51875305175781, |
|
"loss": 0.2592, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.839855968952179, |
|
"rewards/margins": 4.597460746765137, |
|
"rewards/rejected": -3.752734422683716, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.45248062705976666, |
|
"grad_norm": 11.1537504196167, |
|
"learning_rate": 6.684083034160716e-07, |
|
"logits/chosen": -2.4027342796325684, |
|
"logits/rejected": -2.3941407203674316, |
|
"logps/chosen": -88.94999694824219, |
|
"logps/rejected": -94.60624694824219, |
|
"loss": 0.1405, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.701879858970642, |
|
"rewards/margins": 5.148046970367432, |
|
"rewards/rejected": -3.4483399391174316, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.4560434666429144, |
|
"grad_norm": 6.074214458465576, |
|
"learning_rate": 6.62535603228599e-07, |
|
"logits/chosen": -2.3843750953674316, |
|
"logits/rejected": -2.404296875, |
|
"logps/chosen": -79.69999694824219, |
|
"logps/rejected": -93.61250305175781, |
|
"loss": 0.1523, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.164160132408142, |
|
"rewards/margins": 4.486718654632568, |
|
"rewards/rejected": -3.321582078933716, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.4596063062260622, |
|
"grad_norm": 8.071775436401367, |
|
"learning_rate": 6.566377225645938e-07, |
|
"logits/chosen": -2.4292969703674316, |
|
"logits/rejected": -2.4781250953674316, |
|
"logps/chosen": -103.96875, |
|
"logps/rejected": -115.1937484741211, |
|
"loss": 0.1832, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.35205078125, |
|
"rewards/margins": 4.561913967132568, |
|
"rewards/rejected": -3.2134766578674316, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.4631691458092099, |
|
"grad_norm": 25.342777252197266, |
|
"learning_rate": 6.507155751404518e-07, |
|
"logits/chosen": -2.3851561546325684, |
|
"logits/rejected": -2.4195313453674316, |
|
"logps/chosen": -92.91874694824219, |
|
"logps/rejected": -109.58125305175781, |
|
"loss": 0.1515, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.623998999595642, |
|
"rewards/margins": 5.013281345367432, |
|
"rewards/rejected": -3.393749952316284, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4667319853923577, |
|
"grad_norm": 12.96956729888916, |
|
"learning_rate": 6.447700784320449e-07, |
|
"logits/chosen": -2.3804688453674316, |
|
"logits/rejected": -2.405468702316284, |
|
"logps/chosen": -76.1875, |
|
"logps/rejected": -93.0999984741211, |
|
"loss": 0.1773, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.213952660560608, |
|
"rewards/margins": 3.945117235183716, |
|
"rewards/rejected": -2.731640577316284, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.47029482497550545, |
|
"grad_norm": 9.139556884765625, |
|
"learning_rate": 6.38802153532582e-07, |
|
"logits/chosen": -2.3753905296325684, |
|
"logits/rejected": -2.393359422683716, |
|
"logps/chosen": -87.54374694824219, |
|
"logps/rejected": -91.7750015258789, |
|
"loss": 0.1862, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.614367663860321, |
|
"rewards/margins": 4.119140625, |
|
"rewards/rejected": -3.505664110183716, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.47385766455865325, |
|
"grad_norm": 7.9081339836120605, |
|
"learning_rate": 6.328127250099111e-07, |
|
"logits/chosen": -2.4085936546325684, |
|
"logits/rejected": -2.419921875, |
|
"logps/chosen": -92.58125305175781, |
|
"logps/rejected": -103.55000305175781, |
|
"loss": 0.3, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.018774390220642, |
|
"rewards/margins": 4.601855278015137, |
|
"rewards/rejected": -3.581372022628784, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.47742050414180104, |
|
"grad_norm": 8.504165649414062, |
|
"learning_rate": 6.268027207632821e-07, |
|
"logits/chosen": -2.376171827316284, |
|
"logits/rejected": -2.381640672683716, |
|
"logps/chosen": -81.94999694824219, |
|
"logps/rejected": -97.55000305175781, |
|
"loss": 0.1193, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.4761962890625, |
|
"rewards/margins": 4.615234375, |
|
"rewards/rejected": -3.1357421875, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4809833437249488, |
|
"grad_norm": 8.712873458862305, |
|
"learning_rate": 6.207730718795948e-07, |
|
"logits/chosen": -2.342968702316284, |
|
"logits/rejected": -2.3753905296325684, |
|
"logps/chosen": -79.13749694824219, |
|
"logps/rejected": -94.98750305175781, |
|
"loss": 0.1471, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.8577514886856079, |
|
"rewards/margins": 3.96484375, |
|
"rewards/rejected": -3.1075196266174316, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.48454618330809657, |
|
"grad_norm": 15.8992338180542, |
|
"learning_rate": 6.147247124891518e-07, |
|
"logits/chosen": -2.3609375953674316, |
|
"logits/rejected": -2.3746094703674316, |
|
"logps/chosen": -82.59375, |
|
"logps/rejected": -91.4124984741211, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.4687988758087158, |
|
"rewards/margins": 4.640625, |
|
"rewards/rejected": -3.1724610328674316, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4881090228912443, |
|
"grad_norm": 7.288065433502197, |
|
"learning_rate": 6.086585796209404e-07, |
|
"logits/chosen": -2.3714842796325684, |
|
"logits/rejected": -2.3773436546325684, |
|
"logps/chosen": -77.9124984741211, |
|
"logps/rejected": -95.8125, |
|
"loss": 0.1402, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.0320556163787842, |
|
"rewards/margins": 4.360156059265137, |
|
"rewards/rejected": -3.329296827316284, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.4916718624743921, |
|
"grad_norm": 75.57015991210938, |
|
"learning_rate": 6.025756130574652e-07, |
|
"logits/chosen": -2.380859375, |
|
"logits/rejected": -2.3902344703674316, |
|
"logps/chosen": -91.91874694824219, |
|
"logps/rejected": -106.76875305175781, |
|
"loss": 0.1206, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.6959717273712158, |
|
"rewards/margins": 4.78515625, |
|
"rewards/rejected": -3.089062452316284, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.49523470205753983, |
|
"grad_norm": 9.346633911132812, |
|
"learning_rate": 5.96476755189155e-07, |
|
"logits/chosen": -2.3636717796325684, |
|
"logits/rejected": -2.3515625, |
|
"logps/chosen": -86.60624694824219, |
|
"logps/rejected": -92.7249984741211, |
|
"loss": 0.2749, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.4318358898162842, |
|
"rewards/margins": 4.559765815734863, |
|
"rewards/rejected": -3.130078077316284, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.4987975416406876, |
|
"grad_norm": 5.443614482879639, |
|
"learning_rate": 5.903629508683649e-07, |
|
"logits/chosen": -2.348437547683716, |
|
"logits/rejected": -2.364453077316284, |
|
"logps/chosen": -75.1624984741211, |
|
"logps/rejected": -92.7874984741211, |
|
"loss": 0.1063, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.4652831554412842, |
|
"rewards/margins": 4.849023342132568, |
|
"rewards/rejected": -3.383984327316284, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5023603812238354, |
|
"grad_norm": 7.599747657775879, |
|
"learning_rate": 5.842351472629959e-07, |
|
"logits/chosen": -2.34765625, |
|
"logits/rejected": -2.385546922683716, |
|
"logps/chosen": -88.2437515258789, |
|
"logps/rejected": -101.01249694824219, |
|
"loss": 0.139, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.15228271484375, |
|
"rewards/margins": 4.364062309265137, |
|
"rewards/rejected": -3.2127928733825684, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5059232208069832, |
|
"grad_norm": 7.442669868469238, |
|
"learning_rate": 5.780942937097584e-07, |
|
"logits/chosen": -2.3828125, |
|
"logits/rejected": -2.4105467796325684, |
|
"logps/chosen": -82.2562484741211, |
|
"logps/rejected": -106.9437484741211, |
|
"loss": 0.1851, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.423883080482483, |
|
"rewards/margins": 4.710058689117432, |
|
"rewards/rejected": -3.2855467796325684, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5094860603901309, |
|
"grad_norm": 4.387864589691162, |
|
"learning_rate": 5.719413415670976e-07, |
|
"logits/chosen": -2.3765625953674316, |
|
"logits/rejected": -2.382031202316284, |
|
"logps/chosen": -75.7750015258789, |
|
"logps/rejected": -90.01875305175781, |
|
"loss": 0.0862, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 1.4503662586212158, |
|
"rewards/margins": 5.015820503234863, |
|
"rewards/rejected": -3.560351610183716, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5130488999732787, |
|
"grad_norm": 6.781715393066406, |
|
"learning_rate": 5.657772440678069e-07, |
|
"logits/chosen": -2.362499952316284, |
|
"logits/rejected": -2.3804688453674316, |
|
"logps/chosen": -92.4749984741211, |
|
"logps/rejected": -106.51875305175781, |
|
"loss": 0.1194, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.975146472454071, |
|
"rewards/margins": 4.7578125, |
|
"rewards/rejected": -3.7816405296325684, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5166117395564265, |
|
"grad_norm": 9.871294975280762, |
|
"learning_rate": 5.596029561713493e-07, |
|
"logits/chosen": -2.3695311546325684, |
|
"logits/rejected": -2.384765625, |
|
"logps/chosen": -99.88749694824219, |
|
"logps/rejected": -107.4312515258789, |
|
"loss": 0.1378, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.3436005115509033, |
|
"rewards/margins": 4.878515720367432, |
|
"rewards/rejected": -3.534374952316284, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5201745791395742, |
|
"grad_norm": 7.945125579833984, |
|
"learning_rate": 5.534194344159136e-07, |
|
"logits/chosen": -2.4078125953674316, |
|
"logits/rejected": -2.4156250953674316, |
|
"logps/chosen": -108.8187484741211, |
|
"logps/rejected": -127.9375, |
|
"loss": 0.1956, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.9227539300918579, |
|
"rewards/margins": 5.07421875, |
|
"rewards/rejected": -4.1484375, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5237374187227221, |
|
"grad_norm": 7.44891881942749, |
|
"learning_rate": 5.472276367702236e-07, |
|
"logits/chosen": -2.3570313453674316, |
|
"logits/rejected": -2.366015672683716, |
|
"logps/chosen": -94.91874694824219, |
|
"logps/rejected": -106.01249694824219, |
|
"loss": 0.1449, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.844006359577179, |
|
"rewards/margins": 4.509179592132568, |
|
"rewards/rejected": -3.6656250953674316, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5273002583058698, |
|
"grad_norm": 18.9903621673584, |
|
"learning_rate": 5.410285224851281e-07, |
|
"logits/chosen": -2.328906297683716, |
|
"logits/rejected": -2.3597655296325684, |
|
"logps/chosen": -83.8187484741211, |
|
"logps/rejected": -95.0062484741211, |
|
"loss": 0.1806, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.957568347454071, |
|
"rewards/margins": 4.262499809265137, |
|
"rewards/rejected": -3.30810546875, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5308630978890175, |
|
"grad_norm": 9.177763938903809, |
|
"learning_rate": 5.348230519449901e-07, |
|
"logits/chosen": -2.382031202316284, |
|
"logits/rejected": -2.3726563453674316, |
|
"logps/chosen": -81.125, |
|
"logps/rejected": -100.1187515258789, |
|
"loss": 0.1606, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.4339721202850342, |
|
"rewards/margins": 4.755859375, |
|
"rewards/rejected": -3.322265625, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5344259374721653, |
|
"grad_norm": 9.381056785583496, |
|
"learning_rate": 5.286121865189017e-07, |
|
"logits/chosen": -2.362499952316284, |
|
"logits/rejected": -2.3539061546325684, |
|
"logps/chosen": -89.9749984741211, |
|
"logps/rejected": -101.79374694824219, |
|
"loss": 0.1385, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.702539086341858, |
|
"rewards/margins": 4.629687309265137, |
|
"rewards/rejected": -2.930468797683716, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5379887770553131, |
|
"grad_norm": 22.414461135864258, |
|
"learning_rate": 5.223968884117458e-07, |
|
"logits/chosen": -2.3519530296325684, |
|
"logits/rejected": -2.3726563453674316, |
|
"logps/chosen": -98.01875305175781, |
|
"logps/rejected": -104.3375015258789, |
|
"loss": 0.1909, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.934985339641571, |
|
"rewards/margins": 4.401562690734863, |
|
"rewards/rejected": -3.46875, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5415516166384609, |
|
"grad_norm": 14.190799713134766, |
|
"learning_rate": 5.161781205151293e-07, |
|
"logits/chosen": -2.3734374046325684, |
|
"logits/rejected": -2.405468702316284, |
|
"logps/chosen": -101.76249694824219, |
|
"logps/rejected": -122.0250015258789, |
|
"loss": 0.1682, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.6546630859375, |
|
"rewards/margins": 4.376562595367432, |
|
"rewards/rejected": -3.7222657203674316, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5451144562216086, |
|
"grad_norm": 6.251968860626221, |
|
"learning_rate": 5.099568462582087e-07, |
|
"logits/chosen": -2.319140672683716, |
|
"logits/rejected": -2.3343749046325684, |
|
"logps/chosen": -73.5999984741211, |
|
"logps/rejected": -96.88749694824219, |
|
"loss": 0.106, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.074597120285034, |
|
"rewards/margins": 5.300000190734863, |
|
"rewards/rejected": -3.227099657058716, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.5486772958047564, |
|
"grad_norm": 9.493823051452637, |
|
"learning_rate": 5.037340294584323e-07, |
|
"logits/chosen": -2.348437547683716, |
|
"logits/rejected": -2.3765625953674316, |
|
"logps/chosen": -95.94999694824219, |
|
"logps/rejected": -111.57499694824219, |
|
"loss": 0.1611, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.665844738483429, |
|
"rewards/margins": 4.508008003234863, |
|
"rewards/rejected": -3.8408203125, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5522401353879042, |
|
"grad_norm": 8.721504211425781, |
|
"learning_rate": 4.975106341722242e-07, |
|
"logits/chosen": -2.349609375, |
|
"logits/rejected": -2.3675780296325684, |
|
"logps/chosen": -81.91874694824219, |
|
"logps/rejected": -91.33125305175781, |
|
"loss": 0.2756, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.837109386920929, |
|
"rewards/margins": 4.758008003234863, |
|
"rewards/rejected": -3.91796875, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5558029749710519, |
|
"grad_norm": 7.29752779006958, |
|
"learning_rate": 4.912876245456287e-07, |
|
"logits/chosen": -2.3472657203674316, |
|
"logits/rejected": -2.35546875, |
|
"logps/chosen": -81.8499984741211, |
|
"logps/rejected": -105.7249984741211, |
|
"loss": 0.1115, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.476263403892517, |
|
"rewards/margins": 5.165234565734863, |
|
"rewards/rejected": -3.6888670921325684, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5593658145541996, |
|
"grad_norm": 15.453743934631348, |
|
"learning_rate": 4.850659646649433e-07, |
|
"logits/chosen": -2.367968797683716, |
|
"logits/rejected": -2.3695311546325684, |
|
"logps/chosen": -90.0, |
|
"logps/rejected": -111.75, |
|
"loss": 0.1826, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.060827612876892, |
|
"rewards/margins": 4.824999809265137, |
|
"rewards/rejected": -3.762500047683716, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.5629286541373475, |
|
"grad_norm": 7.576887130737305, |
|
"learning_rate": 4.788466184073585e-07, |
|
"logits/chosen": -2.3140625953674316, |
|
"logits/rejected": -2.346484422683716, |
|
"logps/chosen": -82.10624694824219, |
|
"logps/rejected": -102.0250015258789, |
|
"loss": 0.2602, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.2728393077850342, |
|
"rewards/margins": 4.8544921875, |
|
"rewards/rejected": -3.58203125, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5664914937204952, |
|
"grad_norm": 14.275249481201172, |
|
"learning_rate": 4.7263054929163175e-07, |
|
"logits/chosen": -2.322265625, |
|
"logits/rejected": -2.338671922683716, |
|
"logps/chosen": -88.67500305175781, |
|
"logps/rejected": -101.13749694824219, |
|
"loss": 0.1402, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.216101050376892, |
|
"rewards/margins": 4.634179592132568, |
|
"rewards/rejected": -3.4203124046325684, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.570054333303643, |
|
"grad_norm": 5.2038044929504395, |
|
"learning_rate": 4.664187203288167e-07, |
|
"logits/chosen": -2.330078125, |
|
"logits/rejected": -2.3597655296325684, |
|
"logps/chosen": -90.9000015258789, |
|
"logps/rejected": -111.6500015258789, |
|
"loss": 0.1215, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.1827635765075684, |
|
"rewards/margins": 5.364453315734863, |
|
"rewards/rejected": -3.179394483566284, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5736171728867908, |
|
"grad_norm": 12.148797035217285, |
|
"learning_rate": 4.6021209387307025e-07, |
|
"logits/chosen": -2.343945264816284, |
|
"logits/rejected": -2.346484422683716, |
|
"logps/chosen": -113.58125305175781, |
|
"logps/rejected": -122.98750305175781, |
|
"loss": 0.215, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.3671875, |
|
"rewards/margins": 4.382177829742432, |
|
"rewards/rejected": -4.015234470367432, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.5771800124699386, |
|
"grad_norm": 13.842655181884766, |
|
"learning_rate": 4.540116314725622e-07, |
|
"logits/chosen": -2.333203077316284, |
|
"logits/rejected": -2.3726563453674316, |
|
"logps/chosen": -101.0374984741211, |
|
"logps/rejected": -114.25, |
|
"loss": 0.2076, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.048675537109375, |
|
"rewards/margins": 4.542578220367432, |
|
"rewards/rejected": -3.491406202316284, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5807428520530863, |
|
"grad_norm": 8.721251487731934, |
|
"learning_rate": 4.478182937205096e-07, |
|
"logits/chosen": -2.307421922683716, |
|
"logits/rejected": -2.313281297683716, |
|
"logps/chosen": -83.90625, |
|
"logps/rejected": -94.54374694824219, |
|
"loss": 0.3232, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.7117431163787842, |
|
"rewards/margins": 4.647753715515137, |
|
"rewards/rejected": -2.9359374046325684, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.584305691636234, |
|
"grad_norm": 6.957128047943115, |
|
"learning_rate": 4.4163304010635873e-07, |
|
"logits/chosen": -2.3324217796325684, |
|
"logits/rejected": -2.37109375, |
|
"logps/chosen": -92.45625305175781, |
|
"logps/rejected": -104.15625, |
|
"loss": 0.2184, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.1681396961212158, |
|
"rewards/margins": 4.538378715515137, |
|
"rewards/rejected": -3.3729491233825684, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5878685312193819, |
|
"grad_norm": 6.509469032287598, |
|
"learning_rate": 4.3545682886713785e-07, |
|
"logits/chosen": -2.346874952316284, |
|
"logits/rejected": -2.367968797683716, |
|
"logps/chosen": -97.5875015258789, |
|
"logps/rejected": -116.90625, |
|
"loss": 0.1391, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.130731225013733, |
|
"rewards/margins": 5.1904296875, |
|
"rewards/rejected": -4.060937404632568, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5914313708025296, |
|
"grad_norm": 8.003087997436523, |
|
"learning_rate": 4.2929061683900547e-07, |
|
"logits/chosen": -2.3363280296325684, |
|
"logits/rejected": -2.3394532203674316, |
|
"logps/chosen": -93.26249694824219, |
|
"logps/rejected": -101.44999694824219, |
|
"loss": 0.1551, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.0355713367462158, |
|
"rewards/margins": 4.733593940734863, |
|
"rewards/rejected": -3.700488328933716, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5949942103856773, |
|
"grad_norm": 9.408036231994629, |
|
"learning_rate": 4.2313535930901357e-07, |
|
"logits/chosen": -2.382031202316284, |
|
"logits/rejected": -2.3828125, |
|
"logps/chosen": -89.88749694824219, |
|
"logps/rejected": -120.4375, |
|
"loss": 0.1501, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.1078369617462158, |
|
"rewards/margins": 5.233984470367432, |
|
"rewards/rejected": -4.1220703125, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.5985570499688252, |
|
"grad_norm": 8.164095878601074, |
|
"learning_rate": 4.1699200986711235e-07, |
|
"logits/chosen": -2.3257813453674316, |
|
"logits/rejected": -2.3433594703674316, |
|
"logps/chosen": -97.4937515258789, |
|
"logps/rejected": -113.9000015258789, |
|
"loss": 0.1906, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.912426769733429, |
|
"rewards/margins": 4.489160060882568, |
|
"rewards/rejected": -3.57568359375, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6021198895519729, |
|
"grad_norm": 17.470115661621094, |
|
"learning_rate": 4.108615202584175e-07, |
|
"logits/chosen": -2.346874952316284, |
|
"logits/rejected": -2.357421875, |
|
"logps/chosen": -97.58125305175781, |
|
"logps/rejected": -116.94999694824219, |
|
"loss": 0.1324, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.9498535394668579, |
|
"rewards/margins": 4.808984279632568, |
|
"rewards/rejected": -3.857714891433716, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6056827291351207, |
|
"grad_norm": 3.3351809978485107, |
|
"learning_rate": 4.047448402357622e-07, |
|
"logits/chosen": -2.279296875, |
|
"logits/rejected": -2.319140672683716, |
|
"logps/chosen": -70.6937484741211, |
|
"logps/rejected": -86.0250015258789, |
|
"loss": 0.2401, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.5939209461212158, |
|
"rewards/margins": 4.990332126617432, |
|
"rewards/rejected": -3.397656202316284, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6092455687182685, |
|
"grad_norm": 9.039243698120117, |
|
"learning_rate": 3.9864291741255997e-07, |
|
"logits/chosen": -2.325000047683716, |
|
"logits/rejected": -2.33984375, |
|
"logps/chosen": -96.15625, |
|
"logps/rejected": -119.4437484741211, |
|
"loss": 0.0906, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.2700684070587158, |
|
"rewards/margins": 5.166796684265137, |
|
"rewards/rejected": -3.899609327316284, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6128084083014163, |
|
"grad_norm": 7.753138065338135, |
|
"learning_rate": 3.9255669711599703e-07, |
|
"logits/chosen": -2.283984422683716, |
|
"logits/rejected": -2.3335938453674316, |
|
"logps/chosen": -80.9375, |
|
"logps/rejected": -90.55000305175781, |
|
"loss": 0.2316, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.857067883014679, |
|
"rewards/margins": 4.355615139007568, |
|
"rewards/rejected": -3.503222703933716, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.616371247884564, |
|
"grad_norm": 12.482671737670898, |
|
"learning_rate": 3.8648712224057975e-07, |
|
"logits/chosen": -2.353515625, |
|
"logits/rejected": -2.335156202316284, |
|
"logps/chosen": -90.9937515258789, |
|
"logps/rejected": -116.0062484741211, |
|
"loss": 0.1113, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.6822509765625, |
|
"rewards/margins": 5.601953029632568, |
|
"rewards/rejected": -3.9214844703674316, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6199340874677117, |
|
"grad_norm": 14.556914329528809, |
|
"learning_rate": 3.804351331020583e-07, |
|
"logits/chosen": -2.313671827316284, |
|
"logits/rejected": -2.323046922683716, |
|
"logps/chosen": -80.7874984741211, |
|
"logps/rejected": -97.20625305175781, |
|
"loss": 0.1081, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.885522484779358, |
|
"rewards/margins": 4.939453125, |
|
"rewards/rejected": -3.056835889816284, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6234969270508596, |
|
"grad_norm": 7.730465412139893, |
|
"learning_rate": 3.744016672917509e-07, |
|
"logits/chosen": -2.325390577316284, |
|
"logits/rejected": -2.3453125953674316, |
|
"logps/chosen": -88.4375, |
|
"logps/rejected": -104.19999694824219, |
|
"loss": 0.219, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.844067394733429, |
|
"rewards/margins": 4.600878715515137, |
|
"rewards/rejected": -3.7572264671325684, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6270597666340073, |
|
"grad_norm": 11.791308403015137, |
|
"learning_rate": 3.6838765953128914e-07, |
|
"logits/chosen": -2.345703125, |
|
"logits/rejected": -2.3687500953674316, |
|
"logps/chosen": -86.75, |
|
"logps/rejected": -112.86250305175781, |
|
"loss": 0.1796, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.0369873046875, |
|
"rewards/margins": 5.278515815734863, |
|
"rewards/rejected": -4.239062309265137, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.630622606217155, |
|
"grad_norm": 14.919589042663574, |
|
"learning_rate": 3.623940415278086e-07, |
|
"logits/chosen": -2.2718749046325684, |
|
"logits/rejected": -2.275390625, |
|
"logps/chosen": -81.6343765258789, |
|
"logps/rejected": -96.8375015258789, |
|
"loss": 0.1462, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.508947730064392, |
|
"rewards/margins": 5.040625095367432, |
|
"rewards/rejected": -3.529296875, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6341854458003029, |
|
"grad_norm": 18.789249420166016, |
|
"learning_rate": 3.564217418296055e-07, |
|
"logits/chosen": -2.305468797683716, |
|
"logits/rejected": -2.328125, |
|
"logps/chosen": -95.85624694824219, |
|
"logps/rejected": -111.40625, |
|
"loss": 0.1828, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.99578857421875, |
|
"rewards/margins": 4.781542778015137, |
|
"rewards/rejected": -3.78662109375, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6377482853834506, |
|
"grad_norm": 17.591827392578125, |
|
"learning_rate": 3.5047168568228394e-07, |
|
"logits/chosen": -2.323437452316284, |
|
"logits/rejected": -2.331249952316284, |
|
"logps/chosen": -89.94999694824219, |
|
"logps/rejected": -107.0687484741211, |
|
"loss": 0.2022, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.7130126953125, |
|
"rewards/margins": 4.793847560882568, |
|
"rewards/rejected": -3.081982374191284, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.6413111249665984, |
|
"grad_norm": 17.939722061157227, |
|
"learning_rate": 3.445447948854141e-07, |
|
"logits/chosen": -2.3042969703674316, |
|
"logits/rejected": -2.328906297683716, |
|
"logps/chosen": -101.6187515258789, |
|
"logps/rejected": -114.15625, |
|
"loss": 0.1749, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.1605224609375, |
|
"rewards/margins": 4.574999809265137, |
|
"rewards/rejected": -3.413281202316284, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6448739645497461, |
|
"grad_norm": 27.744525909423828, |
|
"learning_rate": 3.386419876497244e-07, |
|
"logits/chosen": -2.3695311546325684, |
|
"logits/rejected": -2.375, |
|
"logps/chosen": -110.9312515258789, |
|
"logps/rejected": -131.7375030517578, |
|
"loss": 0.1293, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.83831787109375, |
|
"rewards/margins": 4.964453220367432, |
|
"rewards/rejected": -4.127734184265137, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.648436804132894, |
|
"grad_norm": 11.689591407775879, |
|
"learning_rate": 3.327641784548494e-07, |
|
"logits/chosen": -2.330859422683716, |
|
"logits/rejected": -2.360546827316284, |
|
"logps/chosen": -97.2874984741211, |
|
"logps/rejected": -111.78125, |
|
"loss": 0.1031, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.701641857624054, |
|
"rewards/margins": 4.783984184265137, |
|
"rewards/rejected": -4.083203315734863, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6519996437160417, |
|
"grad_norm": 4.99397087097168, |
|
"learning_rate": 3.2691227790765674e-07, |
|
"logits/chosen": -2.323046922683716, |
|
"logits/rejected": -2.352734327316284, |
|
"logps/chosen": -81.75, |
|
"logps/rejected": -96.2874984741211, |
|
"loss": 0.0998, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.350073218345642, |
|
"rewards/margins": 5.151757717132568, |
|
"rewards/rejected": -3.7984375953674316, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.6555624832991894, |
|
"grad_norm": 12.093426704406738, |
|
"learning_rate": 3.210871926011724e-07, |
|
"logits/chosen": -2.319531202316284, |
|
"logits/rejected": -2.3267579078674316, |
|
"logps/chosen": -84.58125305175781, |
|
"logps/rejected": -104.80000305175781, |
|
"loss": 0.1376, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.0358489751815796, |
|
"rewards/margins": 4.966894626617432, |
|
"rewards/rejected": -3.9325194358825684, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6591253228823373, |
|
"grad_norm": 7.205654621124268, |
|
"learning_rate": 3.1528982497412983e-07, |
|
"logits/chosen": -2.3238282203674316, |
|
"logits/rejected": -2.3511719703674316, |
|
"logps/chosen": -103.3125, |
|
"logps/rejected": -119.3125, |
|
"loss": 0.1892, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.536755383014679, |
|
"rewards/margins": 4.951171875, |
|
"rewards/rejected": -4.412890434265137, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.662688162465485, |
|
"grad_norm": 14.197951316833496, |
|
"learning_rate": 3.095210731711603e-07, |
|
"logits/chosen": -2.317187547683716, |
|
"logits/rejected": -2.340625047683716, |
|
"logps/chosen": -87.2874984741211, |
|
"logps/rejected": -97.8375015258789, |
|
"loss": 0.1569, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.5652587413787842, |
|
"rewards/margins": 5.275976657867432, |
|
"rewards/rejected": -3.709277391433716, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6662510020486327, |
|
"grad_norm": 4.761596202850342, |
|
"learning_rate": 3.0378183090365086e-07, |
|
"logits/chosen": -2.3031249046325684, |
|
"logits/rejected": -2.323437452316284, |
|
"logps/chosen": -81.5999984741211, |
|
"logps/rejected": -97.79374694824219, |
|
"loss": 0.1236, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.888842761516571, |
|
"rewards/margins": 4.931250095367432, |
|
"rewards/rejected": -4.043749809265137, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6698138416317805, |
|
"grad_norm": 12.251235008239746, |
|
"learning_rate": 2.9807298731128774e-07, |
|
"logits/chosen": -2.28515625, |
|
"logits/rejected": -2.3050780296325684, |
|
"logps/chosen": -90.19999694824219, |
|
"logps/rejected": -107.4625015258789, |
|
"loss": 0.1699, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.154022216796875, |
|
"rewards/margins": 4.8662109375, |
|
"rewards/rejected": -3.709179639816284, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6733766812149283, |
|
"grad_norm": 9.046751022338867, |
|
"learning_rate": 2.92395426824308e-07, |
|
"logits/chosen": -2.3316407203674316, |
|
"logits/rejected": -2.342968702316284, |
|
"logps/chosen": -86.04374694824219, |
|
"logps/rejected": -103.79374694824219, |
|
"loss": 0.1121, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.208886742591858, |
|
"rewards/margins": 5.004492282867432, |
|
"rewards/rejected": -3.7982420921325684, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.6769395207980761, |
|
"grad_norm": 6.2202277183532715, |
|
"learning_rate": 2.867500290264814e-07, |
|
"logits/chosen": -2.318359375, |
|
"logits/rejected": -2.328125, |
|
"logps/chosen": -92.0999984741211, |
|
"logps/rejected": -108.3499984741211, |
|
"loss": 0.1039, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.389562964439392, |
|
"rewards/margins": 5.326171875, |
|
"rewards/rejected": -3.936328172683716, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6805023603812238, |
|
"grad_norm": 9.26759147644043, |
|
"learning_rate": 2.8113766851884257e-07, |
|
"logits/chosen": -2.31640625, |
|
"logits/rejected": -2.325390577316284, |
|
"logps/chosen": -88.7437515258789, |
|
"logps/rejected": -104.32499694824219, |
|
"loss": 0.0947, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 1.828588843345642, |
|
"rewards/margins": 5.205273628234863, |
|
"rewards/rejected": -3.3765625953674316, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.6840651999643717, |
|
"grad_norm": 7.322399616241455, |
|
"learning_rate": 2.75559214784196e-07, |
|
"logits/chosen": -2.315624952316284, |
|
"logits/rejected": -2.329296827316284, |
|
"logps/chosen": -88.3687515258789, |
|
"logps/rejected": -102.66874694824219, |
|
"loss": 0.1189, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.0693359375, |
|
"rewards/margins": 5.214062690734863, |
|
"rewards/rejected": -4.143164157867432, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6876280395475194, |
|
"grad_norm": 7.353623390197754, |
|
"learning_rate": 2.700155320524119e-07, |
|
"logits/chosen": -2.282421827316284, |
|
"logits/rejected": -2.315234422683716, |
|
"logps/chosen": -76.9937515258789, |
|
"logps/rejected": -90.01249694824219, |
|
"loss": 0.1001, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.221459984779358, |
|
"rewards/margins": 4.799218654632568, |
|
"rewards/rejected": -3.5804686546325684, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.6911908791306671, |
|
"grad_norm": 16.381595611572266, |
|
"learning_rate": 2.6450747916653853e-07, |
|
"logits/chosen": -2.3167967796325684, |
|
"logits/rejected": -2.3304686546325684, |
|
"logps/chosen": -91.53125, |
|
"logps/rejected": -113.7874984741211, |
|
"loss": 0.1369, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.2250487804412842, |
|
"rewards/margins": 5.068749904632568, |
|
"rewards/rejected": -3.841992139816284, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6947537187138149, |
|
"grad_norm": 6.328347206115723, |
|
"learning_rate": 2.5903590944974787e-07, |
|
"logits/chosen": -2.3199219703674316, |
|
"logits/rejected": -2.327343702316284, |
|
"logps/chosen": -98.11250305175781, |
|
"logps/rejected": -126.39375305175781, |
|
"loss": 0.1543, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.3526367247104645, |
|
"rewards/margins": 5.135546684265137, |
|
"rewards/rejected": -4.785742282867432, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.6983165582969627, |
|
"grad_norm": 4.730679988861084, |
|
"learning_rate": 2.5360167057313507e-07, |
|
"logits/chosen": -2.331249952316284, |
|
"logits/rejected": -2.350390672683716, |
|
"logps/chosen": -101.5625, |
|
"logps/rejected": -118.92500305175781, |
|
"loss": 0.1445, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.254968285560608, |
|
"rewards/margins": 5.006249904632568, |
|
"rewards/rejected": -3.749218702316284, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7018793978801104, |
|
"grad_norm": 27.285436630249023, |
|
"learning_rate": 2.4820560442439597e-07, |
|
"logits/chosen": -2.301953077316284, |
|
"logits/rejected": -2.31640625, |
|
"logps/chosen": -76.82499694824219, |
|
"logps/rejected": -97.3125, |
|
"loss": 0.1167, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.947167992591858, |
|
"rewards/margins": 5.776171684265137, |
|
"rewards/rejected": -3.8340821266174316, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7054422374632582, |
|
"grad_norm": 9.996295928955078, |
|
"learning_rate": 2.428485469773997e-07, |
|
"logits/chosen": -2.302929639816284, |
|
"logits/rejected": -2.3179688453674316, |
|
"logps/chosen": -95.64375305175781, |
|
"logps/rejected": -113.75, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.806933581829071, |
|
"rewards/margins": 4.828125, |
|
"rewards/rejected": -4.019690036773682, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.709005077046406, |
|
"grad_norm": 10.70380973815918, |
|
"learning_rate": 2.3753132816267573e-07, |
|
"logits/chosen": -2.3402342796325684, |
|
"logits/rejected": -2.335156202316284, |
|
"logps/chosen": -100.13749694824219, |
|
"logps/rejected": -118.3375015258789, |
|
"loss": 0.131, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.5045897960662842, |
|
"rewards/margins": 5.556250095367432, |
|
"rewards/rejected": -4.048828125, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7125679166295538, |
|
"grad_norm": 18.55549430847168, |
|
"learning_rate": 2.322547717388406e-07, |
|
"logits/chosen": -2.2847657203674316, |
|
"logits/rejected": -2.299609422683716, |
|
"logps/chosen": -86.5625, |
|
"logps/rejected": -107.65625, |
|
"loss": 0.1475, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.8027588129043579, |
|
"rewards/margins": 4.818457126617432, |
|
"rewards/rejected": -4.014843940734863, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7161307562127015, |
|
"grad_norm": 8.651657104492188, |
|
"learning_rate": 2.2701969516497738e-07, |
|
"logits/chosen": -2.267578125, |
|
"logits/rejected": -2.2945313453674316, |
|
"logps/chosen": -81.4625015258789, |
|
"logps/rejected": -97.1624984741211, |
|
"loss": 0.1165, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.997119128704071, |
|
"rewards/margins": 5.078125, |
|
"rewards/rejected": -4.081250190734863, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.7196935957958492, |
|
"grad_norm": 8.88605785369873, |
|
"learning_rate": 2.2182690947399303e-07, |
|
"logits/chosen": -2.3101563453674316, |
|
"logits/rejected": -2.299999952316284, |
|
"logps/chosen": -94.23750305175781, |
|
"logps/rejected": -113.11250305175781, |
|
"loss": 0.1974, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.884765625, |
|
"rewards/margins": 5.18798828125, |
|
"rewards/rejected": -4.304101467132568, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7232564353789971, |
|
"grad_norm": 355.3295593261719, |
|
"learning_rate": 2.1667721914697173e-07, |
|
"logits/chosen": -2.3101563453674316, |
|
"logits/rejected": -2.3218750953674316, |
|
"logps/chosen": -84.8187484741211, |
|
"logps/rejected": -97.64375305175781, |
|
"loss": 0.1277, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.81243896484375, |
|
"rewards/margins": 5.278515815734863, |
|
"rewards/rejected": -3.4693360328674316, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.7268192749621448, |
|
"grad_norm": 13.823278427124023, |
|
"learning_rate": 2.11571421988541e-07, |
|
"logits/chosen": -2.305468797683716, |
|
"logits/rejected": -2.315624952316284, |
|
"logps/chosen": -89.63749694824219, |
|
"logps/rejected": -110.94999694824219, |
|
"loss": 0.1137, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.272363305091858, |
|
"rewards/margins": 5.442187309265137, |
|
"rewards/rejected": -4.17041015625, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7303821145452926, |
|
"grad_norm": 8.384671211242676, |
|
"learning_rate": 2.065103090032743e-07, |
|
"logits/chosen": -2.3109374046325684, |
|
"logits/rejected": -2.3167967796325684, |
|
"logps/chosen": -91.98750305175781, |
|
"logps/rejected": -102.36250305175781, |
|
"loss": 0.1786, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.9146575927734375, |
|
"rewards/margins": 4.707129001617432, |
|
"rewards/rejected": -3.791015625, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 17.72806167602539, |
|
"learning_rate": 2.014946642731468e-07, |
|
"logits/chosen": -2.2828125953674316, |
|
"logits/rejected": -2.270703077316284, |
|
"logps/chosen": -74.35624694824219, |
|
"logps/rejected": -95.42500305175781, |
|
"loss": 0.1665, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.3673095703125, |
|
"rewards/margins": 5.188672065734863, |
|
"rewards/rejected": -3.819140672683716, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7375077937115881, |
|
"grad_norm": 14.17636775970459, |
|
"learning_rate": 1.9652526483606196e-07, |
|
"logits/chosen": -2.2515625953674316, |
|
"logits/rejected": -2.276562452316284, |
|
"logps/chosen": -74.0625, |
|
"logps/rejected": -95.38749694824219, |
|
"loss": 0.1077, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.250244140625, |
|
"rewards/margins": 5.008203029632568, |
|
"rewards/rejected": -3.7601561546325684, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.7410706332947359, |
|
"grad_norm": 8.393796920776367, |
|
"learning_rate": 1.9160288056547196e-07, |
|
"logits/chosen": -2.256640672683716, |
|
"logits/rejected": -2.3023438453674316, |
|
"logps/chosen": -88.26875305175781, |
|
"logps/rejected": -103.7249984741211, |
|
"loss": 0.1336, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.1659667491912842, |
|
"rewards/margins": 4.723242282867432, |
|
"rewards/rejected": -3.5577149391174316, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7446334728778837, |
|
"grad_norm": 4.969725131988525, |
|
"learning_rate": 1.867282740511056e-07, |
|
"logits/chosen": -2.2894530296325684, |
|
"logits/rejected": -2.305468797683716, |
|
"logps/chosen": -89.5875015258789, |
|
"logps/rejected": -112.2125015258789, |
|
"loss": 0.1522, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.8788086175918579, |
|
"rewards/margins": 5.113476753234863, |
|
"rewards/rejected": -4.235547065734863, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.7481963124610315, |
|
"grad_norm": 10.194605827331543, |
|
"learning_rate": 1.819022004808261e-07, |
|
"logits/chosen": -2.303906202316284, |
|
"logits/rejected": -2.3179688453674316, |
|
"logps/chosen": -95.14375305175781, |
|
"logps/rejected": -117.89375305175781, |
|
"loss": 0.1688, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.0526001453399658, |
|
"rewards/margins": 4.858056545257568, |
|
"rewards/rejected": -3.808300733566284, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7517591520441792, |
|
"grad_norm": 8.145842552185059, |
|
"learning_rate": 1.7712540752363607e-07, |
|
"logits/chosen": -2.301953077316284, |
|
"logits/rejected": -2.302734375, |
|
"logps/chosen": -81.76249694824219, |
|
"logps/rejected": -106.76249694824219, |
|
"loss": 0.2246, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.1788451671600342, |
|
"rewards/margins": 5.237695217132568, |
|
"rewards/rejected": -4.058789253234863, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.7553219916273269, |
|
"grad_norm": 6.336703777313232, |
|
"learning_rate": 1.7239863521384517e-07, |
|
"logits/chosen": -2.332812547683716, |
|
"logits/rejected": -2.321093797683716, |
|
"logps/chosen": -91.76875305175781, |
|
"logps/rejected": -111.5875015258789, |
|
"loss": 0.1455, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.3840820789337158, |
|
"rewards/margins": 4.992383003234863, |
|
"rewards/rejected": -3.6078124046325684, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7588848312104748, |
|
"grad_norm": 8.184171676635742, |
|
"learning_rate": 1.677226158364225e-07, |
|
"logits/chosen": -2.2992186546325684, |
|
"logits/rejected": -2.315234422683716, |
|
"logps/chosen": -107.5374984741211, |
|
"logps/rejected": -118.79374694824219, |
|
"loss": 0.2229, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.201904296875, |
|
"rewards/margins": 4.680468559265137, |
|
"rewards/rejected": -3.4779295921325684, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.7624476707936225, |
|
"grad_norm": 9.234628677368164, |
|
"learning_rate": 1.6309807381354957e-07, |
|
"logits/chosen": -2.291210889816284, |
|
"logits/rejected": -2.314453125, |
|
"logps/chosen": -90.58125305175781, |
|
"logps/rejected": -106.86250305175781, |
|
"loss": 0.1176, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.29840087890625, |
|
"rewards/margins": 5.166796684265137, |
|
"rewards/rejected": -3.872265577316284, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7660105103767703, |
|
"grad_norm": 5.269473075866699, |
|
"learning_rate": 1.5852572559238941e-07, |
|
"logits/chosen": -2.289843797683716, |
|
"logits/rejected": -2.32421875, |
|
"logps/chosen": -99.6312484741211, |
|
"logps/rejected": -115.7125015258789, |
|
"loss": 0.1284, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.0069701671600342, |
|
"rewards/margins": 4.695214748382568, |
|
"rewards/rejected": -3.688183546066284, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7695733499599181, |
|
"grad_norm": 9.90135669708252, |
|
"learning_rate": 1.5400627953409394e-07, |
|
"logits/chosen": -2.3121094703674316, |
|
"logits/rejected": -2.309375047683716, |
|
"logps/chosen": -90.0062484741211, |
|
"logps/rejected": -111.0, |
|
"loss": 0.1327, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.312280297279358, |
|
"rewards/margins": 5.36328125, |
|
"rewards/rejected": -4.051562309265137, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7731361895430658, |
|
"grad_norm": 11.21921443939209, |
|
"learning_rate": 1.4954043580406155e-07, |
|
"logits/chosen": -2.294921875, |
|
"logits/rejected": -2.301562547683716, |
|
"logps/chosen": -97.3499984741211, |
|
"logps/rejected": -112.95625305175781, |
|
"loss": 0.2028, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.258544921875, |
|
"rewards/margins": 5.259570121765137, |
|
"rewards/rejected": -4.002831935882568, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.7766990291262136, |
|
"grad_norm": 17.780261993408203, |
|
"learning_rate": 1.4512888626346598e-07, |
|
"logits/chosen": -2.2953124046325684, |
|
"logits/rejected": -2.334765672683716, |
|
"logps/chosen": -91.4312515258789, |
|
"logps/rejected": -105.42500305175781, |
|
"loss": 0.1559, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.1874573230743408, |
|
"rewards/margins": 4.756054878234863, |
|
"rewards/rejected": -3.5699219703674316, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7802618687093613, |
|
"grad_norm": 7.047048091888428, |
|
"learning_rate": 1.407723143620716e-07, |
|
"logits/chosen": -2.3238282203674316, |
|
"logits/rejected": -2.334765672683716, |
|
"logps/chosen": -104.80000305175781, |
|
"logps/rejected": -128.02499389648438, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.6564819812774658, |
|
"rewards/margins": 5.543359279632568, |
|
"rewards/rejected": -3.887402296066284, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.7838247082925092, |
|
"grad_norm": 9.77812671661377, |
|
"learning_rate": 1.3647139503235045e-07, |
|
"logits/chosen": -2.262500047683716, |
|
"logits/rejected": -2.2998046875, |
|
"logps/chosen": -97.88749694824219, |
|
"logps/rejected": -114.2874984741211, |
|
"loss": 0.1671, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.3259338438510895, |
|
"rewards/margins": 4.857617378234863, |
|
"rewards/rejected": -4.532422065734863, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7873875478756569, |
|
"grad_norm": 8.321101188659668, |
|
"learning_rate": 1.3222679458492086e-07, |
|
"logits/chosen": -2.2890625, |
|
"logits/rejected": -2.3089842796325684, |
|
"logps/chosen": -109.0250015258789, |
|
"logps/rejected": -124.01249694824219, |
|
"loss": 0.1252, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.8846069574356079, |
|
"rewards/margins": 4.947070121765137, |
|
"rewards/rejected": -4.061327934265137, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.7909503874588046, |
|
"grad_norm": 73.0202865600586, |
|
"learning_rate": 1.2803917060531993e-07, |
|
"logits/chosen": -2.2777342796325684, |
|
"logits/rejected": -2.309375047683716, |
|
"logps/chosen": -99.2249984741211, |
|
"logps/rejected": -110.9000015258789, |
|
"loss": 0.1357, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.3677612245082855, |
|
"rewards/margins": 4.772656440734863, |
|
"rewards/rejected": -4.407422065734863, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.7945132270419525, |
|
"grad_norm": 4.360289096832275, |
|
"learning_rate": 1.2390917185212863e-07, |
|
"logits/chosen": -2.262500047683716, |
|
"logits/rejected": -2.278125047683716, |
|
"logps/chosen": -92.60624694824219, |
|
"logps/rejected": -107.5250015258789, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.065679907798767, |
|
"rewards/margins": 4.841601371765137, |
|
"rewards/rejected": -3.7777342796325684, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.7980760666251002, |
|
"grad_norm": 23.889127731323242, |
|
"learning_rate": 1.1983743815646508e-07, |
|
"logits/chosen": -2.251171827316284, |
|
"logits/rejected": -2.2945313453674316, |
|
"logps/chosen": -98.3187484741211, |
|
"logps/rejected": -109.01249694824219, |
|
"loss": 0.2434, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.17170409858226776, |
|
"rewards/margins": 4.185156345367432, |
|
"rewards/rejected": -4.01220703125, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.801638906208248, |
|
"grad_norm": 3.363276481628418, |
|
"learning_rate": 1.158246003228589e-07, |
|
"logits/chosen": -2.2861328125, |
|
"logits/rejected": -2.29296875, |
|
"logps/chosen": -93.25, |
|
"logps/rejected": -108.80000305175781, |
|
"loss": 0.1153, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.910595715045929, |
|
"rewards/margins": 5.132616996765137, |
|
"rewards/rejected": -4.221289157867432, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8052017457913957, |
|
"grad_norm": 7.905906677246094, |
|
"learning_rate": 1.1187128003152579e-07, |
|
"logits/chosen": -2.283203125, |
|
"logits/rejected": -2.288281202316284, |
|
"logps/chosen": -85.39375305175781, |
|
"logps/rejected": -107.3499984741211, |
|
"loss": 0.1325, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.2729613780975342, |
|
"rewards/margins": 5.160742282867432, |
|
"rewards/rejected": -3.8871092796325684, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8087645853745435, |
|
"grad_norm": 7.837643146514893, |
|
"learning_rate": 1.0797808974205552e-07, |
|
"logits/chosen": -2.289843797683716, |
|
"logits/rejected": -2.287890672683716, |
|
"logps/chosen": -82.48750305175781, |
|
"logps/rejected": -99.38749694824219, |
|
"loss": 0.1171, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.919165015220642, |
|
"rewards/margins": 5.359765529632568, |
|
"rewards/rejected": -3.4407715797424316, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.8123274249576913, |
|
"grad_norm": 11.885445594787598, |
|
"learning_rate": 1.0414563259852682e-07, |
|
"logits/chosen": -2.298046827316284, |
|
"logits/rejected": -2.29296875, |
|
"logps/chosen": -97.96875, |
|
"logps/rejected": -120.92500305175781, |
|
"loss": 0.1138, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.620800793170929, |
|
"rewards/margins": 5.161328315734863, |
|
"rewards/rejected": -4.541406154632568, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.815890264540839, |
|
"grad_norm": 8.711338996887207, |
|
"learning_rate": 1.0037450233606782e-07, |
|
"logits/chosen": -2.262500047683716, |
|
"logits/rejected": -2.270703077316284, |
|
"logps/chosen": -84.22187805175781, |
|
"logps/rejected": -105.32499694824219, |
|
"loss": 0.1101, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.4210205078125, |
|
"rewards/margins": 5.448046684265137, |
|
"rewards/rejected": -4.029687404632568, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.8194531041239869, |
|
"grad_norm": 11.828937530517578, |
|
"learning_rate": 9.666528318887196e-08, |
|
"logits/chosen": -2.263867139816284, |
|
"logits/rejected": -2.305468797683716, |
|
"logps/chosen": -90.71875, |
|
"logps/rejected": -104.2750015258789, |
|
"loss": 0.151, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.256616234779358, |
|
"rewards/margins": 4.792187690734863, |
|
"rewards/rejected": -3.5337891578674316, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8230159437071346, |
|
"grad_norm": 6.786935329437256, |
|
"learning_rate": 9.301854979968715e-08, |
|
"logits/chosen": -2.2972655296325684, |
|
"logits/rejected": -2.315624952316284, |
|
"logps/chosen": -87.6500015258789, |
|
"logps/rejected": -104.4000015258789, |
|
"loss": 0.1213, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.2689208984375, |
|
"rewards/margins": 4.808203220367432, |
|
"rewards/rejected": -3.541015625, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.8265787832902823, |
|
"grad_norm": 6.224339962005615, |
|
"learning_rate": 8.943486713079068e-08, |
|
"logits/chosen": -2.317187547683716, |
|
"logits/rejected": -2.309765577316284, |
|
"logps/chosen": -92.5562515258789, |
|
"logps/rejected": -113.80000305175781, |
|
"loss": 0.1581, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.9830077886581421, |
|
"rewards/margins": 5.277148246765137, |
|
"rewards/rejected": -4.294531345367432, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8301416228734301, |
|
"grad_norm": 12.381536483764648, |
|
"learning_rate": 8.59147903764636e-08, |
|
"logits/chosen": -2.2671875953674316, |
|
"logits/rejected": -2.287890672683716, |
|
"logps/chosen": -96.29374694824219, |
|
"logps/rejected": -107.6500015258789, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.2315673828125, |
|
"rewards/margins": 4.880859375, |
|
"rewards/rejected": -3.6522459983825684, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.8337044624565779, |
|
"grad_norm": 6.709888458251953, |
|
"learning_rate": 8.245886487697778e-08, |
|
"logits/chosen": -2.3125, |
|
"logits/rejected": -2.3101563453674316, |
|
"logps/chosen": -92.6624984741211, |
|
"logps/rejected": -111.0875015258789, |
|
"loss": 0.147, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.50537109375, |
|
"rewards/margins": 5.387890815734863, |
|
"rewards/rejected": -3.8837890625, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8372673020397257, |
|
"grad_norm": 9.564090728759766, |
|
"learning_rate": 7.906762603411132e-08, |
|
"logits/chosen": -2.2457032203674316, |
|
"logits/rejected": -2.254687547683716, |
|
"logps/chosen": -71.33125305175781, |
|
"logps/rejected": -92.80000305175781, |
|
"loss": 0.1114, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.35357666015625, |
|
"rewards/margins": 4.905859470367432, |
|
"rewards/rejected": -3.548046827316284, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8408301416228734, |
|
"grad_norm": 6.03350305557251, |
|
"learning_rate": 7.574159922820184e-08, |
|
"logits/chosen": -2.30859375, |
|
"logits/rejected": -2.325390577316284, |
|
"logps/chosen": -93.82499694824219, |
|
"logps/rejected": -116.7562484741211, |
|
"loss": 0.1185, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.304834008216858, |
|
"rewards/margins": 5.476758003234863, |
|
"rewards/rejected": -4.173047065734863, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8443929812060212, |
|
"grad_norm": 7.182165145874023, |
|
"learning_rate": 7.24812997367531e-08, |
|
"logits/chosen": -2.262890577316284, |
|
"logits/rejected": -2.2757811546325684, |
|
"logps/chosen": -87.3812484741211, |
|
"logps/rejected": -101.5250015258789, |
|
"loss": 0.091, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.500463843345642, |
|
"rewards/margins": 5.475976467132568, |
|
"rewards/rejected": -3.97216796875, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.847955820789169, |
|
"grad_norm": 9.104799270629883, |
|
"learning_rate": 6.928723265460734e-08, |
|
"logits/chosen": -2.255859375, |
|
"logits/rejected": -2.260546922683716, |
|
"logps/chosen": -88.0687484741211, |
|
"logps/rejected": -105.2874984741211, |
|
"loss": 0.1121, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.0933105945587158, |
|
"rewards/margins": 5.320508003234863, |
|
"rewards/rejected": -4.226758003234863, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8515186603723167, |
|
"grad_norm": 17.943103790283203, |
|
"learning_rate": 6.615989281569373e-08, |
|
"logits/chosen": -2.3070311546325684, |
|
"logits/rejected": -2.317187547683716, |
|
"logps/chosen": -100.07499694824219, |
|
"logps/rejected": -117.2249984741211, |
|
"loss": 0.1486, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.299291968345642, |
|
"rewards/margins": 5.075781345367432, |
|
"rewards/rejected": -3.7769532203674316, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.8550814999554645, |
|
"grad_norm": 7.1349287033081055, |
|
"learning_rate": 6.309976471636808e-08, |
|
"logits/chosen": -2.3128905296325684, |
|
"logits/rejected": -2.315624952316284, |
|
"logps/chosen": -87.25, |
|
"logps/rejected": -102.5, |
|
"loss": 0.0928, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.4921143054962158, |
|
"rewards/margins": 5.408398628234863, |
|
"rewards/rejected": -3.91796875, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8586443395386123, |
|
"grad_norm": 10.989272117614746, |
|
"learning_rate": 6.010732244035266e-08, |
|
"logits/chosen": -2.272265672683716, |
|
"logits/rejected": -2.297656297683716, |
|
"logps/chosen": -77.6812515258789, |
|
"logps/rejected": -101.48750305175781, |
|
"loss": 0.1136, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.5633544921875, |
|
"rewards/margins": 5.681640625, |
|
"rewards/rejected": -4.119531154632568, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.86220717912176, |
|
"grad_norm": 26.202138900756836, |
|
"learning_rate": 5.7183029585289975e-08, |
|
"logits/chosen": -2.2964844703674316, |
|
"logits/rejected": -2.2992186546325684, |
|
"logps/chosen": -95.7874984741211, |
|
"logps/rejected": -115.6875, |
|
"loss": 0.1312, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.4753844738006592, |
|
"rewards/margins": 5.235547065734863, |
|
"rewards/rejected": -3.76171875, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8657700187049078, |
|
"grad_norm": 10.135671615600586, |
|
"learning_rate": 5.432733919092147e-08, |
|
"logits/chosen": -2.2984375953674316, |
|
"logits/rejected": -2.2890625, |
|
"logps/chosen": -94.0625, |
|
"logps/rejected": -120.42500305175781, |
|
"loss": 0.1334, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.158136010169983, |
|
"rewards/margins": 5.209374904632568, |
|
"rewards/rejected": -4.052783012390137, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.8693328582880556, |
|
"grad_norm": 14.827692031860352, |
|
"learning_rate": 5.1540693668900346e-08, |
|
"logits/chosen": -2.2582030296325684, |
|
"logits/rejected": -2.268359422683716, |
|
"logps/chosen": -92.1937484741211, |
|
"logps/rejected": -112.4375, |
|
"loss": 0.1352, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.57037353515625, |
|
"rewards/margins": 5.102734565734863, |
|
"rewards/rejected": -3.5328125953674316, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8728956978712034, |
|
"grad_norm": 2.7754666805267334, |
|
"learning_rate": 4.882352473425255e-08, |
|
"logits/chosen": -2.2503905296325684, |
|
"logits/rejected": -2.2750000953674316, |
|
"logps/chosen": -82.34375, |
|
"logps/rejected": -100.625, |
|
"loss": 0.1322, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.242883324623108, |
|
"rewards/margins": 4.870898246765137, |
|
"rewards/rejected": -3.628710985183716, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.8764585374543511, |
|
"grad_norm": 8.408917427062988, |
|
"learning_rate": 4.6176253338494344e-08, |
|
"logits/chosen": -2.232421875, |
|
"logits/rejected": -2.251171827316284, |
|
"logps/chosen": -86.6875, |
|
"logps/rejected": -100.7750015258789, |
|
"loss": 0.1269, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.8370605707168579, |
|
"rewards/margins": 4.563672065734863, |
|
"rewards/rejected": -3.7256836891174316, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8800213770374989, |
|
"grad_norm": 4.768385887145996, |
|
"learning_rate": 4.3599289604416614e-08, |
|
"logits/chosen": -2.2777342796325684, |
|
"logits/rejected": -2.2796874046325684, |
|
"logps/chosen": -84.90625, |
|
"logps/rejected": -101.0374984741211, |
|
"loss": 0.1378, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.4487793445587158, |
|
"rewards/margins": 5.4912109375, |
|
"rewards/rejected": -4.040234565734863, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.8835842166206467, |
|
"grad_norm": 9.690278053283691, |
|
"learning_rate": 4.10930327625485e-08, |
|
"logits/chosen": -2.298828125, |
|
"logits/rejected": -2.307812452316284, |
|
"logps/chosen": -94.4937515258789, |
|
"logps/rejected": -111.41874694824219, |
|
"loss": 0.1103, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.762249767780304, |
|
"rewards/margins": 4.697265625, |
|
"rewards/rejected": -3.93701171875, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8871470562037944, |
|
"grad_norm": 8.591952323913574, |
|
"learning_rate": 3.865787108930646e-08, |
|
"logits/chosen": -2.2587890625, |
|
"logits/rejected": -2.2367186546325684, |
|
"logps/chosen": -97.4625015258789, |
|
"logps/rejected": -110.23750305175781, |
|
"loss": 0.2029, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.166015625, |
|
"rewards/margins": 5.281054496765137, |
|
"rewards/rejected": -4.11328125, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.8907098957869422, |
|
"grad_norm": 4.010148525238037, |
|
"learning_rate": 3.629418184684185e-08, |
|
"logits/chosen": -2.291015625, |
|
"logits/rejected": -2.291796922683716, |
|
"logps/chosen": -95.9625015258789, |
|
"logps/rejected": -115.5250015258789, |
|
"loss": 0.1164, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.5761597156524658, |
|
"rewards/margins": 5.4794921875, |
|
"rewards/rejected": -3.8990235328674316, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.89427273537009, |
|
"grad_norm": 12.358043670654297, |
|
"learning_rate": 3.400233122459473e-08, |
|
"logits/chosen": -2.2300782203674316, |
|
"logits/rejected": -2.2855467796325684, |
|
"logps/chosen": -99.0687484741211, |
|
"logps/rejected": -109.19999694824219, |
|
"loss": 0.163, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.37403565645217896, |
|
"rewards/margins": 4.477343559265137, |
|
"rewards/rejected": -4.104687690734863, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.8978355749532377, |
|
"grad_norm": 5.314749717712402, |
|
"learning_rate": 3.1782674282562094e-08, |
|
"logits/chosen": -2.2640624046325684, |
|
"logits/rejected": -2.268749952316284, |
|
"logps/chosen": -76.625, |
|
"logps/rejected": -94.67500305175781, |
|
"loss": 0.1039, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.71014404296875, |
|
"rewards/margins": 5.317773342132568, |
|
"rewards/rejected": -3.611035108566284, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9013984145363855, |
|
"grad_norm": 11.101000785827637, |
|
"learning_rate": 2.9635554896291326e-08, |
|
"logits/chosen": -2.26171875, |
|
"logits/rejected": -2.278125047683716, |
|
"logps/chosen": -98.7874984741211, |
|
"logps/rejected": -113.3125, |
|
"loss": 0.2612, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.4896484315395355, |
|
"rewards/margins": 4.54150390625, |
|
"rewards/rejected": -4.050000190734863, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.9049612541195333, |
|
"grad_norm": 7.049961090087891, |
|
"learning_rate": 2.7561305703606207e-08, |
|
"logits/chosen": -2.285937547683716, |
|
"logits/rejected": -2.295703172683716, |
|
"logps/chosen": -97.1937484741211, |
|
"logps/rejected": -112.5875015258789, |
|
"loss": 0.1048, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.5007812976837158, |
|
"rewards/margins": 5.321875095367432, |
|
"rewards/rejected": -3.821484327316284, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9085240937026811, |
|
"grad_norm": 14.669742584228516, |
|
"learning_rate": 2.5560248053073164e-08, |
|
"logits/chosen": -2.2640624046325684, |
|
"logits/rejected": -2.280078172683716, |
|
"logps/chosen": -98.5, |
|
"logps/rejected": -121.6875, |
|
"loss": 0.1746, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.07766113430261612, |
|
"rewards/margins": 4.587890625, |
|
"rewards/rejected": -4.511034965515137, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.9120869332858288, |
|
"grad_norm": 5.834444522857666, |
|
"learning_rate": 2.3632691954217742e-08, |
|
"logits/chosen": -2.253124952316284, |
|
"logits/rejected": -2.270312547683716, |
|
"logps/chosen": -87.1312484741211, |
|
"logps/rejected": -107.2750015258789, |
|
"loss": 0.1106, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.564550757408142, |
|
"rewards/margins": 5.344336032867432, |
|
"rewards/rejected": -3.7816405296325684, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9156497728689765, |
|
"grad_norm": 3.6613357067108154, |
|
"learning_rate": 2.1778936029496376e-08, |
|
"logits/chosen": -2.3011717796325684, |
|
"logits/rejected": -2.299609422683716, |
|
"logps/chosen": -99.10624694824219, |
|
"logps/rejected": -116.0999984741211, |
|
"loss": 0.119, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.7597290277481079, |
|
"rewards/margins": 5.019140720367432, |
|
"rewards/rejected": -4.260546684265137, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.9192126124521244, |
|
"grad_norm": 10.60155963897705, |
|
"learning_rate": 1.999926746803332e-08, |
|
"logits/chosen": -2.265625, |
|
"logits/rejected": -2.250195264816284, |
|
"logps/chosen": -79.88749694824219, |
|
"logps/rejected": -101.5, |
|
"loss": 0.13, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.0681030750274658, |
|
"rewards/margins": 5.093359470367432, |
|
"rewards/rejected": -4.023046970367432, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9227754520352721, |
|
"grad_norm": 3.0230162143707275, |
|
"learning_rate": 1.8293961981128592e-08, |
|
"logits/chosen": -2.306640625, |
|
"logits/rejected": -2.313671827316284, |
|
"logps/chosen": -107.9000015258789, |
|
"logps/rejected": -122.07499694824219, |
|
"loss": 0.2062, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.44135743379592896, |
|
"rewards/margins": 5.301171779632568, |
|
"rewards/rejected": -4.86376953125, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.9263382916184199, |
|
"grad_norm": 6.370534896850586, |
|
"learning_rate": 1.6663283759543678e-08, |
|
"logits/chosen": -2.247265577316284, |
|
"logits/rejected": -2.274218797683716, |
|
"logps/chosen": -96.35624694824219, |
|
"logps/rejected": -115.8687515258789, |
|
"loss": 0.1412, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.6146484613418579, |
|
"rewards/margins": 4.799218654632568, |
|
"rewards/rejected": -4.187402248382568, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9299011312015677, |
|
"grad_norm": 6.875704765319824, |
|
"learning_rate": 1.510748543257262e-08, |
|
"logits/chosen": -2.276171922683716, |
|
"logits/rejected": -2.276171922683716, |
|
"logps/chosen": -81.54374694824219, |
|
"logps/rejected": -98.6875, |
|
"loss": 0.0842, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.9011962413787842, |
|
"rewards/margins": 5.594336032867432, |
|
"rewards/rejected": -3.694140672683716, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.9334639707847154, |
|
"grad_norm": 5.4383721351623535, |
|
"learning_rate": 1.3626808028903757e-08, |
|
"logits/chosen": -2.278125047683716, |
|
"logits/rejected": -2.3148436546325684, |
|
"logps/chosen": -86.5625, |
|
"logps/rejected": -110.7750015258789, |
|
"loss": 0.0813, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 1.6395142078399658, |
|
"rewards/margins": 5.509130954742432, |
|
"rewards/rejected": -3.87109375, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9370268103678632, |
|
"grad_norm": 4.379004955291748, |
|
"learning_rate": 1.2221480939278938e-08, |
|
"logits/chosen": -2.288281202316284, |
|
"logits/rejected": -2.274218797683716, |
|
"logps/chosen": -97.1312484741211, |
|
"logps/rejected": -116.13749694824219, |
|
"loss": 0.1139, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.6423828601837158, |
|
"rewards/margins": 5.208300590515137, |
|
"rewards/rejected": -3.566210985183716, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.9405896499510109, |
|
"grad_norm": 9.59549617767334, |
|
"learning_rate": 1.0891721880955996e-08, |
|
"logits/chosen": -2.291015625, |
|
"logits/rejected": -2.3125, |
|
"logps/chosen": -92.05000305175781, |
|
"logps/rejected": -101.92500305175781, |
|
"loss": 0.0903, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.4332764148712158, |
|
"rewards/margins": 5.380273342132568, |
|
"rewards/rejected": -3.946484327316284, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9441524895341588, |
|
"grad_norm": 10.30654239654541, |
|
"learning_rate": 9.63773686397873e-09, |
|
"logits/chosen": -2.2796874046325684, |
|
"logits/rejected": -2.299609422683716, |
|
"logps/chosen": -98.1500015258789, |
|
"logps/rejected": -116.11250305175781, |
|
"loss": 0.1958, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.719470202922821, |
|
"rewards/margins": 4.872265815734863, |
|
"rewards/rejected": -4.152734279632568, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.9477153291173065, |
|
"grad_norm": 8.464811325073242, |
|
"learning_rate": 8.459720159261718e-09, |
|
"logits/chosen": -2.241992235183716, |
|
"logits/rejected": -2.255859375, |
|
"logps/chosen": -97.71875, |
|
"logps/rejected": -104.10624694824219, |
|
"loss": 0.1578, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.4008544981479645, |
|
"rewards/margins": 4.505468845367432, |
|
"rewards/rejected": -4.103125095367432, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9512781687004542, |
|
"grad_norm": 12.606581687927246, |
|
"learning_rate": 7.35785426849328e-09, |
|
"logits/chosen": -2.2738280296325684, |
|
"logits/rejected": -2.298828125, |
|
"logps/chosen": -78.5687484741211, |
|
"logps/rejected": -97.07499694824219, |
|
"loss": 0.1097, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.5250976085662842, |
|
"rewards/margins": 5.138671875, |
|
"rewards/rejected": -3.612109422683716, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.9548410082836021, |
|
"grad_norm": 6.5607805252075195, |
|
"learning_rate": 6.3323098958615314e-09, |
|
"logits/chosen": -2.283984422683716, |
|
"logits/rejected": -2.2874999046325684, |
|
"logps/chosen": -87.58125305175781, |
|
"logps/rejected": -105.1500015258789, |
|
"loss": 0.1212, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 1.039770483970642, |
|
"rewards/margins": 5.138476371765137, |
|
"rewards/rejected": -4.1015625, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9584038478667498, |
|
"grad_norm": 84.18962097167969, |
|
"learning_rate": 5.38324592160877e-09, |
|
"logits/chosen": -2.268359422683716, |
|
"logits/rejected": -2.274609327316284, |
|
"logps/chosen": -103.46875, |
|
"logps/rejected": -119.51875305175781, |
|
"loss": 0.2778, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.715136706829071, |
|
"rewards/margins": 4.048925876617432, |
|
"rewards/rejected": -3.3335938453674316, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.9619666874498976, |
|
"grad_norm": 9.378565788269043, |
|
"learning_rate": 4.5108093774169356e-09, |
|
"logits/chosen": -2.267578125, |
|
"logits/rejected": -2.291210889816284, |
|
"logps/chosen": -104.89375305175781, |
|
"logps/rejected": -122.2750015258789, |
|
"loss": 0.2457, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.806683361530304, |
|
"rewards/margins": 5.057812690734863, |
|
"rewards/rejected": -4.248437404632568, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9655295270330453, |
|
"grad_norm": 11.539667129516602, |
|
"learning_rate": 3.7151354236293897e-09, |
|
"logits/chosen": -2.283984422683716, |
|
"logits/rejected": -2.319531202316284, |
|
"logps/chosen": -103.3499984741211, |
|
"logps/rejected": -116.25, |
|
"loss": 0.1492, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.752197265625, |
|
"rewards/margins": 4.77294921875, |
|
"rewards/rejected": -4.019140720367432, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.9690923666161931, |
|
"grad_norm": 29.576169967651367, |
|
"learning_rate": 2.9963473283112216e-09, |
|
"logits/chosen": -2.2437500953674316, |
|
"logits/rejected": -2.2601561546325684, |
|
"logps/chosen": -82.6187515258789, |
|
"logps/rejected": -96.5875015258789, |
|
"loss": 0.1919, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.0454590320587158, |
|
"rewards/margins": 5.02978515625, |
|
"rewards/rejected": -3.981738328933716, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9726552061993409, |
|
"grad_norm": 5.123119831085205, |
|
"learning_rate": 2.3545564481523005e-09, |
|
"logits/chosen": -2.278515577316284, |
|
"logits/rejected": -2.282421827316284, |
|
"logps/chosen": -84.13749694824219, |
|
"logps/rejected": -93.4375, |
|
"loss": 0.1302, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.513513207435608, |
|
"rewards/margins": 5.091796875, |
|
"rewards/rejected": -3.576367139816284, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.9762180457824886, |
|
"grad_norm": 12.932695388793945, |
|
"learning_rate": 1.7898622112156314e-09, |
|
"logits/chosen": -2.2816405296325684, |
|
"logits/rejected": -2.280468702316284, |
|
"logps/chosen": -85.46875, |
|
"logps/rejected": -110.5875015258789, |
|
"loss": 0.1621, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.1466248035430908, |
|
"rewards/margins": 5.542578220367432, |
|
"rewards/rejected": -4.399218559265137, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9797808853656365, |
|
"grad_norm": 8.279297828674316, |
|
"learning_rate": 1.3023521015336768e-09, |
|
"logits/chosen": -2.303515672683716, |
|
"logits/rejected": -2.294140577316284, |
|
"logps/chosen": -109.96875, |
|
"logps/rejected": -120.4000015258789, |
|
"loss": 0.1537, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.3752075135707855, |
|
"rewards/margins": 4.730664253234863, |
|
"rewards/rejected": -4.355859279632568, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.9833437249487842, |
|
"grad_norm": 14.932358741760254, |
|
"learning_rate": 8.921016455548658e-10, |
|
"logits/chosen": -2.234375, |
|
"logits/rejected": -2.270312547683716, |
|
"logps/chosen": -90.1500015258789, |
|
"logps/rejected": -101.625, |
|
"loss": 0.1677, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.3759964108467102, |
|
"rewards/margins": 4.690625190734863, |
|
"rewards/rejected": -4.31494140625, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.9869065645319319, |
|
"grad_norm": 5.358438968658447, |
|
"learning_rate": 5.591744004432853e-10, |
|
"logits/chosen": -2.2476563453674316, |
|
"logits/rejected": -2.2632813453674316, |
|
"logps/chosen": -89.57499694824219, |
|
"logps/rejected": -108.0374984741211, |
|
"loss": 0.1115, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.094567894935608, |
|
"rewards/margins": 4.923828125, |
|
"rewards/rejected": -3.8304686546325684, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.9904694041150797, |
|
"grad_norm": 12.948234558105469, |
|
"learning_rate": 3.036219442317245e-10, |
|
"logits/chosen": -2.258984327316284, |
|
"logits/rejected": -2.28515625, |
|
"logps/chosen": -81.38749694824219, |
|
"logps/rejected": -102.125, |
|
"loss": 0.1064, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.3934326171875, |
|
"rewards/margins": 5.1748046875, |
|
"rewards/rejected": -3.782031297683716, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9940322436982275, |
|
"grad_norm": 7.298853874206543, |
|
"learning_rate": 1.2548386783134413e-10, |
|
"logits/chosen": -2.2681641578674316, |
|
"logits/rejected": -2.2554688453674316, |
|
"logps/chosen": -89.92500305175781, |
|
"logps/rejected": -107.67500305175781, |
|
"loss": 0.2167, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.764086902141571, |
|
"rewards/margins": 5.166211128234863, |
|
"rewards/rejected": -4.402539253234863, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.9975950832813753, |
|
"grad_norm": 14.198148727416992, |
|
"learning_rate": 2.4787768897971405e-11, |
|
"logits/chosen": -2.305859327316284, |
|
"logits/rejected": -2.309765577316284, |
|
"logps/chosen": -111.25, |
|
"logps/rejected": -129.9499969482422, |
|
"loss": 0.1927, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.5697265863418579, |
|
"rewards/margins": 4.261132717132568, |
|
"rewards/rejected": -3.6939454078674316, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9997327870312639, |
|
"step": 1403, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2128710214231835, |
|
"train_runtime": 9706.3948, |
|
"train_samples_per_second": 4.626, |
|
"train_steps_per_second": 0.145 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1403, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|