{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100.0, "global_step": 239, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -3.130502223968506, "logits/oppo_generated": -3.1088104248046875, "logits/oppo_real": -3.130502223968506, "logits/real": -3.1088104248046875, "logps/generated": -99.40917205810547, "logps/oppo_gen": -99.40917205810547, "logps/oppo_real": -459.3097229003906, "logps/real": -459.3097229003906, "loss": 0.9762, "loss/gen": 3.6945278644561768, "loss/real": -2.7182817459106445, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -3.0933988094329834, "logits/oppo_generated": -2.919645309448242, "logits/oppo_real": -3.0933988094329834, "logits/real": -2.919645309448242, "logps/generated": -103.65153503417969, "logps/oppo_gen": -103.65153503417969, "logps/oppo_real": -392.1358642578125, "logps/real": -392.1358642578125, "loss": 0.9762, "loss/gen": 3.6945278644561768, "loss/real": -2.7182817459106445, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 2 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -2.6572537422180176, "logits/oppo_generated": -2.8074941635131836, "logits/oppo_real": -2.6572537422180176, "logits/real": -2.8074941635131836, "logps/generated": -72.88986206054688, "logps/oppo_gen": -72.88986206054688, "logps/oppo_real": -291.916748046875, "logps/real": -291.916748046875, "loss": 0.9762, "loss/gen": 3.6945278644561768, "loss/real": -2.7182817459106445, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 3 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -2.8966193199157715, "logits/oppo_generated": -2.768460273742676, "logits/oppo_real": -2.8966193199157715, "logits/real": -2.768460273742676, "logps/generated": -64.05287170410156, "logps/oppo_gen": -64.05287170410156, "logps/oppo_real": -376.8367919921875, "logps/real": -376.8367919921875, "loss": 0.9762, "loss/gen": 3.6945278644561768, "loss/real": -2.7182817459106445, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 4 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -2.889317512512207, "logits/oppo_generated": -2.708950996398926, "logits/oppo_real": -2.889317512512207, "logits/real": -2.708950996398926, "logps/generated": -48.29164123535156, "logps/oppo_gen": -48.29164123535156, "logps/oppo_real": -173.0751953125, "logps/real": -173.0751953125, "loss": 0.9762, "loss/gen": 3.6945278644561768, "loss/real": -2.7182817459106445, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 5 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -2.957958698272705, "logits/oppo_generated": -2.749436378479004, "logits/oppo_real": -2.957958698272705, "logits/real": -2.749436378479004, "logps/generated": -48.84138488769531, "logps/oppo_gen": -48.84138488769531, "logps/oppo_real": -139.2998046875, "logps/real": -139.2998046875, "loss": 0.9762, "loss/gen": 3.6945278644561768, "loss/real": -2.7182817459106445, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 6 }, { "epoch": 0.03, "grad_norm": 140.3248950538535, "learning_rate": 1.6666666666666667e-08, "logits/generated": -3.1195316314697266, "logits/oppo_generated": -2.9545342922210693, "logits/oppo_real": -3.1195316314697266, "logits/real": -2.9545342922210693, "logps/generated": -163.2059783935547, "logps/oppo_gen": -163.2059783935547, "logps/oppo_real": -432.88226318359375, "logps/real": -432.88226318359375, "loss": 0.9762, "loss/gen": 3.6945278644561768, "loss/real": -2.7182817459106445, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 7 }, { "epoch": 0.03, "grad_norm": 140.3248950538535, "learning_rate": 1.6666666666666667e-08, "logits/generated": -2.910332441329956, "logits/oppo_generated": -2.9416637420654297, "logits/oppo_real": -2.910332441329956, "logits/real": -2.9416637420654297, "logps/generated": -69.29386901855469, "logps/oppo_gen": -69.29386901855469, "logps/oppo_real": -311.59619140625, "logps/real": -311.59619140625, "loss": 0.9762, "loss/gen": 3.6945278644561768, "loss/real": -2.7182817459106445, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 8 }, { "epoch": 0.04, "grad_norm": 144.39084058121554, "learning_rate": 3.3333333333333334e-08, "logits/generated": -2.409976005554199, "logits/oppo_generated": -2.294548273086548, "logits/oppo_real": -2.409976005554199, "logits/real": -2.294548273086548, "logps/generated": -82.20011138916016, "logps/oppo_gen": -82.20011138916016, "logps/oppo_real": -381.1852111816406, "logps/real": -381.1852111816406, "loss": 0.9762, "loss/gen": 3.6945278644561768, "loss/real": -2.7182817459106445, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 9 }, { "epoch": 0.04, "grad_norm": 144.39084058121554, "learning_rate": 3.3333333333333334e-08, "logits/generated": -2.963313579559326, "logits/oppo_generated": -2.9239017963409424, "logits/oppo_real": -2.963313579559326, "logits/real": -2.9239017963409424, "logps/generated": -93.09856414794922, "logps/oppo_gen": -93.09856414794922, "logps/oppo_real": -233.10401916503906, "logps/real": -233.10401916503906, "loss": 0.9762, "loss/gen": 3.6945278644561768, "loss/real": -2.7182817459106445, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 10 }, { "epoch": 0.05, "grad_norm": 147.09211346550842, "learning_rate": 5e-08, "logits/generated": -2.857771396636963, "logits/oppo_generated": -2.837850570678711, "logits/oppo_real": -2.857771396636963, "logits/real": -2.837850570678711, "logps/generated": -59.46293640136719, "logps/oppo_gen": -59.46293640136719, "logps/oppo_real": -142.69805908203125, "logps/real": -142.69805908203125, "loss": 0.9762, "loss/gen": 3.6945278644561768, "loss/real": -2.7182817459106445, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 11 }, { "epoch": 0.05, "grad_norm": 147.36241597037218, "learning_rate": 6.666666666666667e-08, "logits/generated": -2.8778512477874756, "logits/oppo_generated": -2.7672762870788574, "logits/oppo_real": -2.8780808448791504, "logits/real": -2.766920328140259, "logps/generated": -70.60530090332031, "logps/oppo_gen": -70.58644104003906, "logps/oppo_real": -343.4704284667969, "logps/real": -343.4797058105469, "loss": 0.9737, "loss/gen": 3.693136215209961, "loss/real": -2.718029499053955, "rewards/accuracies": 0.5, "rewards/generated": -0.01885223388671875, "rewards/margins": 0.009566187858581543, "rewards/real": -0.009286046028137207, "step": 12 }, { "epoch": 0.05, "grad_norm": 141.20291665498627, "learning_rate": 8.333333333333333e-08, "logits/generated": -2.8214950561523438, "logits/oppo_generated": -2.8374581336975098, "logits/oppo_real": -2.822021961212158, "logits/real": -2.836732864379883, "logps/generated": -106.83735656738281, "logps/oppo_gen": -106.73956298828125, "logps/oppo_real": -280.41741943359375, "logps/real": -280.4453430175781, "loss": 0.9675, "loss/gen": 3.6873114109039307, "loss/real": -2.7175238132476807, "rewards/accuracies": 0.875, "rewards/generated": -0.09777355194091797, "rewards/margins": 0.06986618041992188, "rewards/real": -0.027907371520996094, "step": 13 }, { "epoch": 0.06, "grad_norm": 147.7225611683097, "learning_rate": 1e-07, "logits/generated": -2.7692794799804688, "logits/oppo_generated": -2.8255615234375, "logits/oppo_real": -2.771684169769287, "logits/real": -2.8233795166015625, "logps/generated": -86.35212707519531, "logps/oppo_gen": -85.86231994628906, "logps/oppo_real": -289.01318359375, "logps/real": -289.048095703125, "loss": 0.9484, "loss/gen": 3.6585421562194824, "loss/real": -2.717336654663086, "rewards/accuracies": 0.875, "rewards/generated": -0.48981738090515137, "rewards/margins": 0.4548964500427246, "rewards/real": -0.03492093086242676, "step": 14 }, { "epoch": 0.06, "grad_norm": 135.88560072492965, "learning_rate": 1.1666666666666667e-07, "logits/generated": -3.1533312797546387, "logits/oppo_generated": -2.7394165992736816, "logits/oppo_real": -3.1553921699523926, "logits/real": -2.7368688583374023, "logps/generated": -75.06793212890625, "logps/oppo_gen": -74.47514343261719, "logps/oppo_real": -366.370361328125, "logps/real": -366.428466796875, "loss": 0.9381, "loss/gen": 3.6510140895843506, "loss/real": -2.716707944869995, "rewards/accuracies": 1.0, "rewards/generated": -0.5927925109863281, "rewards/margins": 0.5347006320953369, "rewards/real": -0.05809187889099121, "step": 15 }, { "epoch": 0.07, "grad_norm": 138.72056275567078, "learning_rate": 1.3333333333333334e-07, "logits/generated": -2.1314597129821777, "logits/oppo_generated": -2.1468427181243896, "logits/oppo_real": -2.142064094543457, "logits/real": -2.137998580932617, "logps/generated": -79.51522064208984, "logps/oppo_gen": -78.08332824707031, "logps/oppo_real": -437.152587890625, "logps/real": -437.42681884765625, "loss": 0.8706, "loss/gen": 3.590456008911133, "loss/real": -2.710862874984741, "rewards/accuracies": 1.0, "rewards/generated": -1.4318904876708984, "rewards/margins": 1.1576709747314453, "rewards/real": -0.2742195129394531, "step": 16 }, { "epoch": 0.07, "grad_norm": 128.9259113793655, "learning_rate": 1.5e-07, "logits/generated": -2.9416465759277344, "logits/oppo_generated": -2.902646064758301, "logits/oppo_real": -2.953411817550659, "logits/real": -2.887700080871582, "logps/generated": -74.43273162841797, "logps/oppo_gen": -72.53976440429688, "logps/oppo_real": -310.7004089355469, "logps/real": -310.87109375, "loss": 0.8455, "loss/gen": 3.557424545288086, "loss/real": -2.7137060165405273, "rewards/accuracies": 1.0, "rewards/generated": -1.8929705619812012, "rewards/margins": 1.7222943305969238, "rewards/real": -0.17067623138427734, "step": 17 }, { "epoch": 0.08, "grad_norm": 131.5700784634371, "learning_rate": 1.6666666666666665e-07, "logits/generated": -2.94179630279541, "logits/oppo_generated": -2.947140693664551, "logits/oppo_real": -2.9634807109832764, "logits/real": -2.920558214187622, "logps/generated": -79.8861083984375, "logps/oppo_gen": -74.80116271972656, "logps/oppo_real": -309.46124267578125, "logps/real": -310.43719482421875, "loss": 0.6796, "loss/gen": 3.3385138511657715, "loss/real": -2.6920909881591797, "rewards/accuracies": 1.0, "rewards/generated": -5.084942817687988, "rewards/margins": 4.109025955200195, "rewards/real": -0.9759171009063721, "step": 18 }, { "epoch": 0.08, "grad_norm": 132.43793505119672, "learning_rate": 1.833333333333333e-07, "logits/generated": -2.4398093223571777, "logits/oppo_generated": -2.6668543815612793, "logits/oppo_real": -2.47564697265625, "logits/real": -2.6329777240753174, "logps/generated": -73.18605041503906, "logps/oppo_gen": -67.190673828125, "logps/oppo_real": -285.60797119140625, "logps/real": -287.226806640625, "loss": 0.5909, "loss/gen": 3.2783122062683105, "loss/real": -2.6751227378845215, "rewards/accuracies": 1.0, "rewards/generated": -5.995372772216797, "rewards/margins": 4.3765668869018555, "rewards/real": -1.6188058853149414, "step": 19 }, { "epoch": 0.08, "grad_norm": 122.69142409669472, "learning_rate": 2e-07, "logits/generated": -3.083611011505127, "logits/oppo_generated": -2.7376956939697266, "logits/oppo_real": -3.1153059005737305, "logits/real": -2.70223331451416, "logps/generated": -102.092529296875, "logps/oppo_gen": -93.65745544433594, "logps/oppo_real": -173.968994140625, "logps/real": -176.39892578125, "loss": 0.5272, "loss/gen": 3.124610185623169, "loss/real": -2.653409957885742, "rewards/accuracies": 1.0, "rewards/generated": -8.435081481933594, "rewards/margins": 6.005127429962158, "rewards/real": -2.4299545288085938, "step": 20 }, { "epoch": 0.09, "grad_norm": 107.67073560019686, "learning_rate": 2.1666666666666667e-07, "logits/generated": -2.8323276042938232, "logits/oppo_generated": -2.6699156761169434, "logits/oppo_real": -2.8930060863494873, "logits/real": -2.6024348735809326, "logps/generated": -59.84043884277344, "logps/oppo_gen": -50.189754486083984, "logps/oppo_real": -197.0562286376953, "logps/real": -199.4170379638672, "loss": 0.291, "loss/gen": 3.0484681129455566, "loss/real": -2.6573870182037354, "rewards/accuracies": 1.0, "rewards/generated": -9.650688171386719, "rewards/margins": 7.289878845214844, "rewards/real": -2.3608102798461914, "step": 21 }, { "epoch": 0.09, "grad_norm": 97.9560398453689, "learning_rate": 2.3333333333333333e-07, "logits/generated": -2.9114887714385986, "logits/oppo_generated": -2.8113152980804443, "logits/oppo_real": -2.997610330581665, "logits/real": -2.724991798400879, "logps/generated": -74.77532196044922, "logps/oppo_gen": -59.91856384277344, "logps/oppo_real": -175.6089324951172, "logps/real": -181.092529296875, "loss": 0.1564, "loss/gen": 2.7578284740448, "loss/real": -2.5756349563598633, "rewards/accuracies": 1.0, "rewards/generated": -14.856756210327148, "rewards/margins": 9.373159408569336, "rewards/real": -5.4835968017578125, "step": 22 }, { "epoch": 0.1, "grad_norm": 101.1866700089493, "learning_rate": 2.5e-07, "logits/generated": -2.738328218460083, "logits/oppo_generated": -2.712057113647461, "logits/oppo_real": -2.83805513381958, "logits/real": -2.6210412979125977, "logps/generated": -103.88157653808594, "logps/oppo_gen": -84.5518798828125, "logps/oppo_real": -331.96221923828125, "logps/real": -338.4169616699219, "loss": -0.0064, "loss/gen": 2.5180134773254395, "loss/real": -2.5506632328033447, "rewards/accuracies": 1.0, "rewards/generated": -19.329689025878906, "rewards/margins": 12.874977111816406, "rewards/real": -6.454712867736816, "step": 23 }, { "epoch": 0.1, "grad_norm": 91.5968525574842, "learning_rate": 2.6666666666666667e-07, "logits/generated": -2.2416625022888184, "logits/oppo_generated": -2.4313888549804688, "logits/oppo_real": -2.3368191719055176, "logits/real": -2.3420183658599854, "logps/generated": -93.81153106689453, "logps/oppo_gen": -70.7446060180664, "logps/oppo_real": -186.56976318359375, "logps/real": -194.1738739013672, "loss": -0.0762, "loss/gen": 2.358870029449463, "loss/real": -2.5205307006835938, "rewards/accuracies": 1.0, "rewards/generated": -23.066925048828125, "rewards/margins": 15.46281623840332, "rewards/real": -7.604110240936279, "step": 24 }, { "epoch": 0.1, "grad_norm": 91.5968525574842, "learning_rate": 2.6666666666666667e-07, "logits/generated": -2.808882474899292, "logits/oppo_generated": -2.8222999572753906, "logits/oppo_real": -2.956730842590332, "logits/real": -2.6913347244262695, "logps/generated": -77.1277847290039, "logps/oppo_gen": -55.461936950683594, "logps/oppo_real": -125.98847198486328, "logps/real": -135.24478149414062, "loss": -0.2171, "loss/gen": 2.427062511444092, "loss/real": -2.4810240268707275, "rewards/accuracies": 0.875, "rewards/generated": -21.665851593017578, "rewards/margins": 12.40954875946045, "rewards/real": -9.256302833557129, "step": 25 }, { "epoch": 0.11, "grad_norm": 90.71163423694614, "learning_rate": 2.833333333333333e-07, "logits/generated": -2.524838447570801, "logits/oppo_generated": -2.9076757431030273, "logits/oppo_real": -2.661245822906494, "logits/real": -2.7569193840026855, "logps/generated": -94.57086181640625, "logps/oppo_gen": -71.46342468261719, "logps/oppo_real": -293.69677734375, "logps/real": -298.1125183105469, "loss": -0.199, "loss/gen": 2.345475196838379, "loss/real": -2.6118550300598145, "rewards/accuracies": 1.0, "rewards/generated": -23.107433319091797, "rewards/margins": 18.69169807434082, "rewards/real": -4.415735244750977, "step": 26 }, { "epoch": 0.11, "grad_norm": 71.87999982609905, "learning_rate": 3e-07, "logits/generated": -2.6987175941467285, "logits/oppo_generated": -3.018123149871826, "logits/oppo_real": -2.837935447692871, "logits/real": -2.857689142227173, "logps/generated": -76.849853515625, "logps/oppo_gen": -51.06623458862305, "logps/oppo_real": -151.72972106933594, "logps/real": -170.76156616210938, "loss": -0.2983, "loss/gen": 2.2412900924682617, "loss/real": -2.2491354942321777, "rewards/accuracies": 0.75, "rewards/generated": -25.783626556396484, "rewards/margins": 6.751780033111572, "rewards/real": -19.031845092773438, "step": 27 }, { "epoch": 0.12, "grad_norm": 77.17411637512444, "learning_rate": 3.166666666666666e-07, "logits/generated": -2.4447317123413086, "logits/oppo_generated": -2.7700376510620117, "logits/oppo_real": -2.6328747272491455, "logits/real": -2.6101927757263184, "logps/generated": -109.12590026855469, "logps/oppo_gen": -72.09120178222656, "logps/oppo_real": -411.427978515625, "logps/real": -411.268310546875, "loss": -0.5509, "loss/gen": 1.7849677801132202, "loss/real": -2.807443141937256, "rewards/accuracies": 1.0, "rewards/generated": -37.03469467163086, "rewards/margins": 37.19430160522461, "rewards/real": 0.1596088409423828, "step": 28 }, { "epoch": 0.12, "grad_norm": 77.17411637512444, "learning_rate": 3.166666666666666e-07, "logits/generated": -2.7265658378601074, "logits/oppo_generated": -2.91198468208313, "logits/oppo_real": -2.9211230278015137, "logits/real": -2.7112436294555664, "logps/generated": -121.98545837402344, "logps/oppo_gen": -82.21741485595703, "logps/oppo_real": -301.3589172363281, "logps/real": -312.0211181640625, "loss": -0.7401, "loss/gen": 1.702211618423462, "loss/real": -2.461573600769043, "rewards/accuracies": 0.875, "rewards/generated": -39.768035888671875, "rewards/margins": 29.1058292388916, "rewards/real": -10.66220760345459, "step": 29 }, { "epoch": 0.13, "grad_norm": 85.01337498171243, "learning_rate": 3.333333333333333e-07, "logits/generated": -2.7819857597351074, "logits/oppo_generated": -2.4022648334503174, "logits/oppo_real": -2.97650146484375, "logits/real": -2.2471132278442383, "logps/generated": -130.29348754882812, "logps/oppo_gen": -99.30915832519531, "logps/oppo_real": -226.3162841796875, "logps/real": -240.45065307617188, "loss": -0.6496, "loss/gen": 2.034857749938965, "loss/real": -2.3712759017944336, "rewards/accuracies": 1.0, "rewards/generated": -30.984325408935547, "rewards/margins": 16.849956512451172, "rewards/real": -14.134370803833008, "step": 30 }, { "epoch": 0.13, "grad_norm": 58.31929890696561, "learning_rate": 3.5e-07, "logits/generated": -2.751274585723877, "logits/oppo_generated": -2.854034900665283, "logits/oppo_real": -2.9424033164978027, "logits/real": -2.689624309539795, "logps/generated": -90.87772369384766, "logps/oppo_gen": -54.3837890625, "logps/oppo_real": -252.91123962402344, "logps/real": -270.4813537597656, "loss": -0.6919, "loss/gen": 1.8102836608886719, "loss/real": -2.287971258163452, "rewards/accuracies": 0.875, "rewards/generated": -36.493934631347656, "rewards/margins": 18.923805236816406, "rewards/real": -17.570131301879883, "step": 31 }, { "epoch": 0.13, "grad_norm": 49.377771381874105, "learning_rate": 3.666666666666666e-07, "logits/generated": -2.703281879425049, "logits/oppo_generated": -2.9263906478881836, "logits/oppo_real": -2.9535346031188965, "logits/real": -2.689378261566162, "logps/generated": -125.86929321289062, "logps/oppo_gen": -78.93435668945312, "logps/oppo_real": -298.2490234375, "logps/real": -317.0907287597656, "loss": -0.7566, "loss/gen": 1.4581267833709717, "loss/real": -2.2758255004882812, "rewards/accuracies": 1.0, "rewards/generated": -46.9349365234375, "rewards/margins": 28.093202590942383, "rewards/real": -18.841733932495117, "step": 32 }, { "epoch": 0.14, "grad_norm": 47.132596888492415, "learning_rate": 3.8333333333333335e-07, "logits/generated": -2.8561768531799316, "logits/oppo_generated": -2.9521539211273193, "logits/oppo_real": -3.0699048042297363, "logits/real": -2.701744794845581, "logps/generated": -182.20703125, "logps/oppo_gen": -136.80690002441406, "logps/oppo_real": -344.64990234375, "logps/real": -365.87115478515625, "loss": -0.8056, "loss/gen": 1.5829627513885498, "loss/real": -2.2397522926330566, "rewards/accuracies": 1.0, "rewards/generated": -45.400123596191406, "rewards/margins": 24.178863525390625, "rewards/real": -21.22126007080078, "step": 33 }, { "epoch": 0.14, "grad_norm": 47.136771677116634, "learning_rate": 4e-07, "logits/generated": -2.74558162689209, "logits/oppo_generated": -2.8447458744049072, "logits/oppo_real": -2.998192548751831, "logits/real": -2.603461742401123, "logps/generated": -134.50888061523438, "logps/oppo_gen": -79.24800109863281, "logps/oppo_real": -401.9757385253906, "logps/real": -427.4682922363281, "loss": -0.995, "loss/gen": 1.2431423664093018, "loss/real": -2.120981454849243, "rewards/accuracies": 1.0, "rewards/generated": -55.260887145996094, "rewards/margins": 29.768321990966797, "rewards/real": -25.492568969726562, "step": 34 }, { "epoch": 0.15, "grad_norm": 47.136771677116634, "learning_rate": 4e-07, "logits/generated": -2.696960926055908, "logits/oppo_generated": -2.942030906677246, "logits/oppo_real": -2.9536867141723633, "logits/real": -2.6742172241210938, "logps/generated": -125.61725616455078, "logps/oppo_gen": -62.21235656738281, "logps/oppo_real": -296.8402404785156, "logps/real": -324.08892822265625, "loss": -5.4743, "loss/gen": 1.1838252544403076, "loss/real": -2.137930154800415, "rewards/accuracies": 0.875, "rewards/generated": -63.40489959716797, "rewards/margins": 36.15622329711914, "rewards/real": -27.248676300048828, "step": 35 }, { "epoch": 0.15, "grad_norm": 38.149193463480486, "learning_rate": 4.1666666666666667e-07, "logits/generated": -2.420623779296875, "logits/oppo_generated": -2.792217493057251, "logits/oppo_real": -2.680948257446289, "logits/real": -2.556100845336914, "logps/generated": -105.52731323242188, "logps/oppo_gen": -49.044715881347656, "logps/oppo_real": -183.3726348876953, "logps/real": -208.43609619140625, "loss": -0.9189, "loss/gen": 1.2789992094039917, "loss/real": -2.150240182876587, "rewards/accuracies": 0.75, "rewards/generated": -56.48259735107422, "rewards/margins": 31.41913604736328, "rewards/real": -25.063465118408203, "step": 36 }, { "epoch": 0.15, "grad_norm": 40.49399399669891, "learning_rate": 4.3333333333333335e-07, "logits/generated": -2.5155656337738037, "logits/oppo_generated": -2.5968940258026123, "logits/oppo_real": -2.84472393989563, "logits/real": -2.363577127456665, "logps/generated": -156.7322235107422, "logps/oppo_gen": -96.46727752685547, "logps/oppo_real": -441.2087097167969, "logps/real": -452.6773681640625, "loss": -0.993, "loss/gen": 1.218324899673462, "loss/real": -2.478170394897461, "rewards/accuracies": 1.0, "rewards/generated": -60.26493835449219, "rewards/margins": 48.7962532043457, "rewards/real": -11.4686861038208, "step": 37 }, { "epoch": 0.16, "grad_norm": 38.69477383912377, "learning_rate": 4.5e-07, "logits/generated": -2.845750331878662, "logits/oppo_generated": -3.097993850708008, "logits/oppo_real": -3.161780834197998, "logits/real": -2.804795742034912, "logps/generated": -169.83187866210938, "logps/oppo_gen": -86.33152770996094, "logps/oppo_real": -374.5130615234375, "logps/real": -400.9438171386719, "loss": -1.0833, "loss/gen": 0.779202401638031, "loss/real": -2.109189987182617, "rewards/accuracies": 0.875, "rewards/generated": -83.50035095214844, "rewards/margins": 57.069610595703125, "rewards/real": -26.430742263793945, "step": 38 }, { "epoch": 0.16, "grad_norm": 39.95823930895698, "learning_rate": 4.6666666666666666e-07, "logits/generated": -2.4254915714263916, "logits/oppo_generated": -2.648486614227295, "logits/oppo_real": -2.7488012313842773, "logits/real": -2.350640296936035, "logps/generated": -155.58016967773438, "logps/oppo_gen": -78.30477142333984, "logps/oppo_real": -363.86407470703125, "logps/real": -395.0137939453125, "loss": -1.1548, "loss/gen": 0.8230071067810059, "loss/real": -2.0378403663635254, "rewards/accuracies": 1.0, "rewards/generated": -77.275390625, "rewards/margins": 46.12569046020508, "rewards/real": -31.149703979492188, "step": 39 }, { "epoch": 0.17, "grad_norm": 40.89272509652924, "learning_rate": 4.833333333333333e-07, "logits/generated": -2.461397409439087, "logits/oppo_generated": -2.864193916320801, "logits/oppo_real": -2.7761850357055664, "logits/real": -2.5565099716186523, "logps/generated": -136.72689819335938, "logps/oppo_gen": -60.6450309753418, "logps/oppo_real": -320.1565856933594, "logps/real": -337.87396240234375, "loss": -1.1347, "loss/gen": 0.9023051857948303, "loss/real": -2.3106727600097656, "rewards/accuracies": 1.0, "rewards/generated": -76.08185577392578, "rewards/margins": 58.36448669433594, "rewards/real": -17.717369079589844, "step": 40 }, { "epoch": 0.17, "grad_norm": 89.36429678043967, "learning_rate": 5e-07, "logits/generated": -2.6618571281433105, "logits/oppo_generated": -2.812058210372925, "logits/oppo_real": -2.982236862182617, "logits/real": -2.515589714050293, "logps/generated": -162.60000610351562, "logps/oppo_gen": -90.06674194335938, "logps/oppo_real": -176.9713592529297, "logps/real": -211.77285766601562, "loss": -1.1724, "loss/gen": 0.8981304168701172, "loss/real": -1.989371657371521, "rewards/accuracies": 1.0, "rewards/generated": -72.53326416015625, "rewards/margins": 37.731773376464844, "rewards/real": -34.80148696899414, "step": 41 }, { "epoch": 0.18, "grad_norm": 89.36429678043967, "learning_rate": 5e-07, "logits/generated": -2.5393388271331787, "logits/oppo_generated": -2.9253015518188477, "logits/oppo_real": -2.9079301357269287, "logits/real": -2.55344557762146, "logps/generated": -130.3933868408203, "logps/oppo_gen": -54.79414367675781, "logps/oppo_real": -186.92176818847656, "logps/real": -235.45858764648438, "loss": -5.6809, "loss/gen": 0.8535439372062683, "loss/real": -1.694696307182312, "rewards/accuracies": 1.0, "rewards/generated": -75.5992431640625, "rewards/margins": 27.062450408935547, "rewards/real": -48.53679275512695, "step": 42 }, { "epoch": 0.18, "grad_norm": 43.34401895870049, "learning_rate": 4.996438746438746e-07, "logits/generated": -2.4617252349853516, "logits/oppo_generated": -2.9949498176574707, "logits/oppo_real": -2.9107003211975098, "logits/real": -2.6696996688842773, "logps/generated": -172.76368713378906, "logps/oppo_gen": -79.9820785522461, "logps/oppo_real": -404.1100158691406, "logps/real": -422.02642822265625, "loss": -1.2059, "loss/gen": 0.6165514588356018, "loss/real": -2.3567748069763184, "rewards/accuracies": 1.0, "rewards/generated": -92.7816162109375, "rewards/margins": 74.865234375, "rewards/real": -17.916383743286133, "step": 43 }, { "epoch": 0.18, "grad_norm": 49.41177428002358, "learning_rate": 4.992877492877492e-07, "logits/generated": -2.026392698287964, "logits/oppo_generated": -2.4440221786499023, "logits/oppo_real": -2.3998050689697266, "logits/real": -2.089980125427246, "logps/generated": -209.39190673828125, "logps/oppo_gen": -93.22187805175781, "logps/oppo_real": -290.8685302734375, "logps/real": -320.3968811035156, "loss": -1.3765, "loss/gen": 0.5650486350059509, "loss/real": -2.131740093231201, "rewards/accuracies": 1.0, "rewards/generated": -116.17002868652344, "rewards/margins": 86.64169311523438, "rewards/real": -29.528339385986328, "step": 44 }, { "epoch": 0.19, "grad_norm": 59.38357051631053, "learning_rate": 4.98931623931624e-07, "logits/generated": -2.3456368446350098, "logits/oppo_generated": -2.9232547283172607, "logits/oppo_real": -2.7114880084991455, "logits/real": -2.5829110145568848, "logps/generated": -168.85809326171875, "logps/oppo_gen": -64.50846862792969, "logps/oppo_real": -239.8323974609375, "logps/real": -297.2595520019531, "loss": -1.4436, "loss/gen": 0.5499280691146851, "loss/real": -1.58909273147583, "rewards/accuracies": 0.875, "rewards/generated": -104.34961700439453, "rewards/margins": 46.92247009277344, "rewards/real": -57.42715072631836, "step": 45 }, { "epoch": 0.19, "grad_norm": 44.12861838917575, "learning_rate": 4.985754985754986e-07, "logits/generated": -2.6069109439849854, "logits/oppo_generated": -2.741456985473633, "logits/oppo_real": -2.9938759803771973, "logits/real": -2.428788185119629, "logps/generated": -149.2159423828125, "logps/oppo_gen": -58.174400329589844, "logps/oppo_real": -258.21685791015625, "logps/real": -301.842041015625, "loss": -1.4547, "loss/gen": 0.676410973072052, "loss/real": -1.867649793624878, "rewards/accuracies": 1.0, "rewards/generated": -91.04153442382812, "rewards/margins": 47.41633605957031, "rewards/real": -43.62519836425781, "step": 46 }, { "epoch": 0.2, "grad_norm": 44.34332055817426, "learning_rate": 4.982193732193732e-07, "logits/generated": -2.587238311767578, "logits/oppo_generated": -2.814079761505127, "logits/oppo_real": -2.964923620223999, "logits/real": -2.480611801147461, "logps/generated": -175.05799865722656, "logps/oppo_gen": -78.5189208984375, "logps/oppo_real": -288.56396484375, "logps/real": -318.1793518066406, "loss": -1.5609, "loss/gen": 0.7453894019126892, "loss/real": -2.1940207481384277, "rewards/accuracies": 0.875, "rewards/generated": -96.53907775878906, "rewards/margins": 66.9237060546875, "rewards/real": -29.615373611450195, "step": 47 }, { "epoch": 0.2, "grad_norm": 41.77245636004139, "learning_rate": 4.978632478632478e-07, "logits/generated": -2.5828328132629395, "logits/oppo_generated": -2.7121076583862305, "logits/oppo_real": -2.932806968688965, "logits/real": -2.3821425437927246, "logps/generated": -170.45315551757812, "logps/oppo_gen": -72.10917663574219, "logps/oppo_real": -299.3392333984375, "logps/real": -351.05755615234375, "loss": -1.5561, "loss/gen": 0.6000806093215942, "loss/real": -1.664915680885315, "rewards/accuracies": 0.875, "rewards/generated": -98.34397888183594, "rewards/margins": 46.625675201416016, "rewards/real": -51.718299865722656, "step": 48 }, { "epoch": 0.21, "grad_norm": 40.92458049952987, "learning_rate": 4.975071225071225e-07, "logits/generated": -2.7721643447875977, "logits/oppo_generated": -2.814209461212158, "logits/oppo_real": -3.157527208328247, "logits/real": -2.545376777648926, "logps/generated": -190.73538208007812, "logps/oppo_gen": -80.24543762207031, "logps/oppo_real": -294.9969482421875, "logps/real": -325.6192626953125, "loss": -1.6521, "loss/gen": 0.5949017405509949, "loss/real": -2.104870319366455, "rewards/accuracies": 1.0, "rewards/generated": -110.48993682861328, "rewards/margins": 79.86763000488281, "rewards/real": -30.622314453125, "step": 49 }, { "epoch": 0.21, "grad_norm": 40.13348241970552, "learning_rate": 4.971509971509972e-07, "logits/generated": -2.4653735160827637, "logits/oppo_generated": -2.9343652725219727, "logits/oppo_real": -2.7617945671081543, "logits/real": -2.6601219177246094, "logps/generated": -198.1933135986328, "logps/oppo_gen": -82.74765014648438, "logps/oppo_real": -315.32562255859375, "logps/real": -342.7396240234375, "loss": -1.6584, "loss/gen": 0.4857123792171478, "loss/real": -2.1372337341308594, "rewards/accuracies": 1.0, "rewards/generated": -115.44567108154297, "rewards/margins": 88.03167724609375, "rewards/real": -27.413999557495117, "step": 50 }, { "epoch": 0.21, "grad_norm": 601.4569550267084, "learning_rate": 4.967948717948718e-07, "logits/generated": -2.5435636043548584, "logits/oppo_generated": -2.805569648742676, "logits/oppo_real": -2.7846250534057617, "logits/real": -2.5612943172454834, "logps/generated": -126.34326934814453, "logps/oppo_gen": -45.456573486328125, "logps/oppo_real": -161.39598083496094, "logps/real": -196.76950073242188, "loss": -2.1364, "loss/gen": 0.7522258758544922, "loss/real": -2.0092098712921143, "rewards/accuracies": 1.0, "rewards/generated": -80.88670349121094, "rewards/margins": 45.51318359375, "rewards/real": -35.3735237121582, "step": 51 }, { "epoch": 0.22, "grad_norm": 38.65978819953379, "learning_rate": 4.964387464387464e-07, "logits/generated": -2.464820384979248, "logits/oppo_generated": -2.7444612979888916, "logits/oppo_real": -2.7595162391662598, "logits/real": -2.45442271232605, "logps/generated": -139.13876342773438, "logps/oppo_gen": -50.193504333496094, "logps/oppo_real": -148.25294494628906, "logps/real": -181.2758026123047, "loss": -1.7596, "loss/gen": 0.6654144525527954, "loss/real": -2.03794002532959, "rewards/accuracies": 1.0, "rewards/generated": -88.94526672363281, "rewards/margins": 55.92240524291992, "rewards/real": -33.022857666015625, "step": 52 }, { "epoch": 0.22, "grad_norm": 167.4533750406363, "learning_rate": 4.96082621082621e-07, "logits/generated": -2.297238349914551, "logits/oppo_generated": -2.660369396209717, "logits/oppo_real": -2.6082496643066406, "logits/real": -2.3693835735321045, "logps/generated": -138.73458862304688, "logps/oppo_gen": -55.80210876464844, "logps/oppo_real": -201.49038696289062, "logps/real": -226.22634887695312, "loss": -2.0472, "loss/gen": 0.7817223072052002, "loss/real": -2.228806257247925, "rewards/accuracies": 0.875, "rewards/generated": -82.93248748779297, "rewards/margins": 58.19652557373047, "rewards/real": -24.735958099365234, "step": 53 }, { "epoch": 0.23, "grad_norm": 38.20762371262606, "learning_rate": 4.957264957264958e-07, "logits/generated": -2.6939735412597656, "logits/oppo_generated": -2.746832847595215, "logits/oppo_real": -2.973560333251953, "logits/real": -2.453509569168091, "logps/generated": -155.50794982910156, "logps/oppo_gen": -77.28608703613281, "logps/oppo_real": -547.3628540039062, "logps/real": -561.0299072265625, "loss": -1.7775, "loss/gen": 0.9930198192596436, "loss/real": -2.6198465824127197, "rewards/accuracies": 0.875, "rewards/generated": -78.22187042236328, "rewards/margins": 64.55480194091797, "rewards/real": -13.66706657409668, "step": 54 }, { "epoch": 0.23, "grad_norm": 111.84459258553808, "learning_rate": 4.953703703703703e-07, "logits/generated": -2.3411145210266113, "logits/oppo_generated": -2.664555072784424, "logits/oppo_real": -2.6400251388549805, "logits/real": -2.3643062114715576, "logps/generated": -196.82240295410156, "logps/oppo_gen": -78.57785034179688, "logps/oppo_real": -398.628662109375, "logps/real": -393.2767333984375, "loss": -2.0424, "loss/gen": 0.5210200548171997, "loss/real": -2.969128131866455, "rewards/accuracies": 1.0, "rewards/generated": -118.24455261230469, "rewards/margins": 123.59645080566406, "rewards/real": 5.351901054382324, "step": 55 }, { "epoch": 0.23, "grad_norm": 50.014668258578155, "learning_rate": 4.95014245014245e-07, "logits/generated": -2.841848373413086, "logits/oppo_generated": -2.638930320739746, "logits/oppo_real": -3.1015210151672363, "logits/real": -2.4049315452575684, "logps/generated": -181.6864471435547, "logps/oppo_gen": -84.6130599975586, "logps/oppo_real": -310.54534912109375, "logps/real": -329.8880615234375, "loss": -1.8582, "loss/gen": 0.7178683876991272, "loss/real": -2.3166608810424805, "rewards/accuracies": 0.875, "rewards/generated": -97.0733871459961, "rewards/margins": 77.73066711425781, "rewards/real": -19.342731475830078, "step": 56 }, { "epoch": 0.24, "grad_norm": 578.0217340204432, "learning_rate": 4.946581196581196e-07, "logits/generated": -2.5697083473205566, "logits/oppo_generated": -2.9305167198181152, "logits/oppo_real": -2.7986156940460205, "logits/real": -2.666802406311035, "logps/generated": -182.54356384277344, "logps/oppo_gen": -55.247596740722656, "logps/oppo_real": -159.6094970703125, "logps/real": -191.58706665039062, "loss": -3.1972, "loss/gen": 0.5683310031890869, "loss/real": -2.0497186183929443, "rewards/accuracies": 1.0, "rewards/generated": -127.29595947265625, "rewards/margins": 95.31836700439453, "rewards/real": -31.977588653564453, "step": 57 }, { "epoch": 0.24, "grad_norm": 51.64103394316142, "learning_rate": 4.943019943019943e-07, "logits/generated": -2.674006462097168, "logits/oppo_generated": -2.733177900314331, "logits/oppo_real": -3.0261659622192383, "logits/real": -2.440023899078369, "logps/generated": -159.27865600585938, "logps/oppo_gen": -77.4105453491211, "logps/oppo_real": -291.50042724609375, "logps/real": -305.1040954589844, "loss": -1.8105, "loss/gen": 0.7389234900474548, "loss/real": -2.533874988555908, "rewards/accuracies": 0.875, "rewards/generated": -81.86811065673828, "rewards/margins": 68.26446533203125, "rewards/real": -13.60364055633545, "step": 58 }, { "epoch": 0.25, "grad_norm": 93.81772033276816, "learning_rate": 4.93945868945869e-07, "logits/generated": -2.253323554992676, "logits/oppo_generated": -2.70068359375, "logits/oppo_real": -2.622352361679077, "logits/real": -2.379178047180176, "logps/generated": -198.3895263671875, "logps/oppo_gen": -66.53448486328125, "logps/oppo_real": -142.07913208007812, "logps/real": -186.49630737304688, "loss": -2.138, "loss/gen": 0.36330240964889526, "loss/real": -1.9270637035369873, "rewards/accuracies": 1.0, "rewards/generated": -131.85504150390625, "rewards/margins": 87.43788146972656, "rewards/real": -44.41715621948242, "step": 59 }, { "epoch": 0.25, "grad_norm": 39.40381974811817, "learning_rate": 4.935897435897436e-07, "logits/generated": -2.8230233192443848, "logits/oppo_generated": -3.0608558654785156, "logits/oppo_real": -3.0881457328796387, "logits/real": -2.815178394317627, "logps/generated": -176.8870849609375, "logps/oppo_gen": -78.30126953125, "logps/oppo_real": -296.7585144042969, "logps/real": -305.8564453125, "loss": -1.9511, "loss/gen": 0.5859768390655518, "loss/real": -2.5944645404815674, "rewards/accuracies": 1.0, "rewards/generated": -98.58580780029297, "rewards/margins": 89.48786926269531, "rewards/real": -9.097940444946289, "step": 60 }, { "epoch": 0.26, "grad_norm": 37.537286150739504, "learning_rate": 4.932336182336182e-07, "logits/generated": -2.67462158203125, "logits/oppo_generated": -2.904336929321289, "logits/oppo_real": -3.0007967948913574, "logits/real": -2.706181526184082, "logps/generated": -194.5768585205078, "logps/oppo_gen": -78.76142883300781, "logps/oppo_real": -321.17315673828125, "logps/real": -332.7289733886719, "loss": -2.0148, "loss/gen": 0.5784947276115417, "loss/real": -2.6833224296569824, "rewards/accuracies": 0.875, "rewards/generated": -115.8154296875, "rewards/margins": 104.25957489013672, "rewards/real": -11.5558500289917, "step": 61 }, { "epoch": 0.26, "grad_norm": 52.37389057595874, "learning_rate": 4.928774928774928e-07, "logits/generated": -2.8456006050109863, "logits/oppo_generated": -3.0246148109436035, "logits/oppo_real": -3.155604839324951, "logits/real": -2.7388291358947754, "logps/generated": -199.48080444335938, "logps/oppo_gen": -99.78816986083984, "logps/oppo_real": -357.6624755859375, "logps/real": -361.3135070800781, "loss": -2.1519, "loss/gen": 0.5312547087669373, "loss/real": -2.7395927906036377, "rewards/accuracies": 1.0, "rewards/generated": -99.692626953125, "rewards/margins": 96.04158020019531, "rewards/real": -3.65103816986084, "step": 62 }, { "epoch": 0.26, "grad_norm": 42.730668543561166, "learning_rate": 4.925213675213676e-07, "logits/generated": -2.5994668006896973, "logits/oppo_generated": -2.718918800354004, "logits/oppo_real": -2.8950438499450684, "logits/real": -2.5016493797302246, "logps/generated": -158.23098754882812, "logps/oppo_gen": -73.73533630371094, "logps/oppo_real": -276.2977294921875, "logps/real": -278.3821105957031, "loss": -2.1712, "loss/gen": 0.7339967489242554, "loss/real": -2.8307507038116455, "rewards/accuracies": 0.875, "rewards/generated": -84.49565124511719, "rewards/margins": 82.41130065917969, "rewards/real": -2.084348678588867, "step": 63 }, { "epoch": 0.27, "grad_norm": 42.60172940894316, "learning_rate": 4.921652421652421e-07, "logits/generated": -2.6288089752197266, "logits/oppo_generated": -2.7741386890411377, "logits/oppo_real": -2.8905487060546875, "logits/real": -2.5671515464782715, "logps/generated": -164.08560180664062, "logps/oppo_gen": -70.42605590820312, "logps/oppo_real": -291.8798522949219, "logps/real": -327.316650390625, "loss": -2.0118, "loss/gen": 0.6031943559646606, "loss/real": -2.0373241901397705, "rewards/accuracies": 1.0, "rewards/generated": -93.65955352783203, "rewards/margins": 58.222755432128906, "rewards/real": -35.436798095703125, "step": 64 }, { "epoch": 0.27, "grad_norm": 525.7316627805482, "learning_rate": 4.918091168091168e-07, "logits/generated": -2.4973931312561035, "logits/oppo_generated": -2.731257438659668, "logits/oppo_real": -2.804780960083008, "logits/real": -2.5444960594177246, "logps/generated": -230.39053344726562, "logps/oppo_gen": -143.67832946777344, "logps/oppo_real": -309.55450439453125, "logps/real": -315.0069274902344, "loss": -2.927, "loss/gen": 0.7850175499916077, "loss/real": -2.6631596088409424, "rewards/accuracies": 1.0, "rewards/generated": -86.71220397949219, "rewards/margins": 81.25978088378906, "rewards/real": -5.452421188354492, "step": 65 }, { "epoch": 0.28, "grad_norm": 80.44494186631624, "learning_rate": 4.914529914529914e-07, "logits/generated": -2.6201300621032715, "logits/oppo_generated": -2.710496664047241, "logits/oppo_real": -2.980191707611084, "logits/real": -2.4632492065429688, "logps/generated": -194.9330291748047, "logps/oppo_gen": -71.51214599609375, "logps/oppo_real": -284.34765625, "logps/real": -298.09637451171875, "loss": -2.3734, "loss/gen": 0.33017057180404663, "loss/real": -2.5317859649658203, "rewards/accuracies": 1.0, "rewards/generated": -123.42086791992188, "rewards/margins": 109.67212677001953, "rewards/real": -13.74874210357666, "step": 66 }, { "epoch": 0.28, "grad_norm": 45.73295767790172, "learning_rate": 4.910968660968661e-07, "logits/generated": -2.7911667823791504, "logits/oppo_generated": -3.0934062004089355, "logits/oppo_real": -3.077010154724121, "logits/real": -2.8539376258850098, "logps/generated": -222.52537536621094, "logps/oppo_gen": -109.1805419921875, "logps/oppo_real": -348.23834228515625, "logps/real": -337.4581298828125, "loss": -2.0979, "loss/gen": 0.41786307096481323, "loss/real": -3.0975918769836426, "rewards/accuracies": 1.0, "rewards/generated": -113.3448257446289, "rewards/margins": 124.12504577636719, "rewards/real": 10.780221939086914, "step": 67 }, { "epoch": 0.28, "grad_norm": 79.69396419859851, "learning_rate": 4.907407407407407e-07, "logits/generated": -2.657637596130371, "logits/oppo_generated": -2.838265895843506, "logits/oppo_real": -3.01387357711792, "logits/real": -2.6080217361450195, "logps/generated": -174.41976928710938, "logps/oppo_gen": -75.5096206665039, "logps/oppo_real": -242.11915588378906, "logps/real": -260.3476867675781, "loss": -2.2245, "loss/gen": 0.5530567765235901, "loss/real": -2.540099620819092, "rewards/accuracies": 0.875, "rewards/generated": -98.91015625, "rewards/margins": 80.68161010742188, "rewards/real": -18.228544235229492, "step": 68 }, { "epoch": 0.29, "grad_norm": 57.15850101499557, "learning_rate": 4.903846153846153e-07, "logits/generated": -2.718892812728882, "logits/oppo_generated": -2.786154270172119, "logits/oppo_real": -2.980445146560669, "logits/real": -2.5882253646850586, "logps/generated": -203.54293823242188, "logps/oppo_gen": -78.40753173828125, "logps/oppo_real": -188.29739379882812, "logps/real": -220.8904571533203, "loss": -2.1241, "loss/gen": 0.3356163501739502, "loss/real": -2.0496373176574707, "rewards/accuracies": 1.0, "rewards/generated": -125.13542175292969, "rewards/margins": 92.5423583984375, "rewards/real": -32.59306335449219, "step": 69 }, { "epoch": 0.29, "grad_norm": 55.560476534442856, "learning_rate": 4.9002849002849e-07, "logits/generated": -2.484227180480957, "logits/oppo_generated": -2.8353500366210938, "logits/oppo_real": -2.788581371307373, "logits/real": -2.584005832672119, "logps/generated": -167.95159912109375, "logps/oppo_gen": -74.27359008789062, "logps/oppo_real": -262.4258728027344, "logps/real": -275.72314453125, "loss": -2.2186, "loss/gen": 0.6950039863586426, "loss/real": -2.613152027130127, "rewards/accuracies": 0.875, "rewards/generated": -93.67799377441406, "rewards/margins": 80.38072967529297, "rewards/real": -13.297256469726562, "step": 70 }, { "epoch": 0.3, "grad_norm": 40.88330591021765, "learning_rate": 4.896723646723647e-07, "logits/generated": -2.44921612739563, "logits/oppo_generated": -2.8188014030456543, "logits/oppo_real": -2.757133960723877, "logits/real": -2.499187469482422, "logps/generated": -161.24481201171875, "logps/oppo_gen": -55.317054748535156, "logps/oppo_real": -178.10824584960938, "logps/real": -189.52215576171875, "loss": -2.1209, "loss/gen": 0.4848253130912781, "loss/real": -2.4801671504974365, "rewards/accuracies": 1.0, "rewards/generated": -105.9277572631836, "rewards/margins": 94.51385498046875, "rewards/real": -11.413912773132324, "step": 71 }, { "epoch": 0.3, "grad_norm": 35.69595968854091, "learning_rate": 4.893162393162393e-07, "logits/generated": -2.509648323059082, "logits/oppo_generated": -2.865746259689331, "logits/oppo_real": -2.85042142868042, "logits/real": -2.612628936767578, "logps/generated": -207.73446655273438, "logps/oppo_gen": -101.81581115722656, "logps/oppo_real": -463.47314453125, "logps/real": -449.06451416015625, "loss": -2.2045, "loss/gen": 0.5114428997039795, "loss/real": -3.246914863586426, "rewards/accuracies": 1.0, "rewards/generated": -105.91865539550781, "rewards/margins": 120.32732391357422, "rewards/real": 14.408672332763672, "step": 72 }, { "epoch": 0.31, "grad_norm": 46.60751808654372, "learning_rate": 4.889601139601139e-07, "logits/generated": -2.513535499572754, "logits/oppo_generated": -2.9923882484436035, "logits/oppo_real": -2.813816547393799, "logits/real": -2.6687417030334473, "logps/generated": -200.91436767578125, "logps/oppo_gen": -78.51251220703125, "logps/oppo_real": -286.4658508300781, "logps/real": -272.64630126953125, "loss": -2.3923, "loss/gen": 0.3351864218711853, "loss/real": -3.2229790687561035, "rewards/accuracies": 1.0, "rewards/generated": -122.40186309814453, "rewards/margins": 136.22140502929688, "rewards/real": 13.81955337524414, "step": 73 }, { "epoch": 0.31, "grad_norm": 46.60751808654372, "learning_rate": 4.889601139601139e-07, "logits/generated": -2.667757987976074, "logits/oppo_generated": -2.7725887298583984, "logits/oppo_real": -3.063380002975464, "logits/real": -2.553708076477051, "logps/generated": -177.4560546875, "logps/oppo_gen": -79.40229034423828, "logps/oppo_real": -383.419677734375, "logps/real": -384.32568359375, "loss": -22602.7559, "loss/gen": 0.5979279279708862, "loss/real": -2.8606982231140137, "rewards/accuracies": 0.875, "rewards/generated": -98.05377960205078, "rewards/margins": 97.14777374267578, "rewards/real": -0.9059967994689941, "step": 74 }, { "epoch": 0.31, "grad_norm": 41.5718210882534, "learning_rate": 4.886039886039886e-07, "logits/generated": -2.7659826278686523, "logits/oppo_generated": -2.8321666717529297, "logits/oppo_real": -3.1668171882629395, "logits/real": -2.5931761264801025, "logps/generated": -241.8350067138672, "logps/oppo_gen": -99.83964538574219, "logps/oppo_real": -322.6613464355469, "logps/real": -311.7099914550781, "loss": -2.2896, "loss/gen": 0.5622150897979736, "loss/real": -3.6310153007507324, "rewards/accuracies": 1.0, "rewards/generated": -141.99537658691406, "rewards/margins": 152.9467315673828, "rewards/real": 10.951353073120117, "step": 75 }, { "epoch": 0.32, "grad_norm": 44.716500642240554, "learning_rate": 4.882478632478633e-07, "logits/generated": -2.7758758068084717, "logits/oppo_generated": -3.000812530517578, "logits/oppo_real": -3.1619484424591064, "logits/real": -2.7301864624023438, "logps/generated": -200.3653564453125, "logps/oppo_gen": -83.82888793945312, "logps/oppo_real": -441.3746337890625, "logps/real": -431.2779541015625, "loss": -2.3134, "loss/gen": 0.3644047975540161, "loss/real": -3.1670141220092773, "rewards/accuracies": 1.0, "rewards/generated": -116.53646850585938, "rewards/margins": 126.63313293457031, "rewards/real": 10.096664428710938, "step": 76 }, { "epoch": 0.32, "grad_norm": 44.716500642240554, "learning_rate": 4.882478632478633e-07, "logits/generated": -2.254303455352783, "logits/oppo_generated": -2.4111037254333496, "logits/oppo_real": -2.622360944747925, "logits/real": -2.1454672813415527, "logps/generated": -177.43157958984375, "logps/oppo_gen": -94.29784393310547, "logps/oppo_real": -307.8828125, "logps/real": -284.0107727050781, "loss": -17.4644, "loss/gen": 1.3658581972122192, "loss/real": -3.9946789741516113, "rewards/accuracies": 1.0, "rewards/generated": -83.13372802734375, "rewards/margins": 107.00576782226562, "rewards/real": 23.872041702270508, "step": 77 }, { "epoch": 0.33, "grad_norm": 44.716500642240554, "learning_rate": 4.882478632478633e-07, "logits/generated": -2.561386823654175, "logits/oppo_generated": -2.7816574573516846, "logits/oppo_real": -2.923349380493164, "logits/real": -2.5139307975769043, "logps/generated": -170.64508056640625, "logps/oppo_gen": -70.22672271728516, "logps/oppo_real": -286.0644836425781, "logps/real": -304.5027160644531, "loss": -51.313, "loss/gen": 0.5637646317481995, "loss/real": -2.394735813140869, "rewards/accuracies": 0.875, "rewards/generated": -100.41835021972656, "rewards/margins": 81.98014831542969, "rewards/real": -18.438209533691406, "step": 78 }, { "epoch": 0.33, "grad_norm": 57.8592273155015, "learning_rate": 4.878917378917379e-07, "logits/generated": -2.341658115386963, "logits/oppo_generated": -2.624129056930542, "logits/oppo_real": -2.6314826011657715, "logits/real": -2.3068737983703613, "logps/generated": -137.337646484375, "logps/oppo_gen": -48.185340881347656, "logps/oppo_real": -148.66656494140625, "logps/real": -167.26583862304688, "loss": -2.4266, "loss/gen": 0.7307255268096924, "loss/real": -2.38840389251709, "rewards/accuracies": 1.0, "rewards/generated": -89.15231323242188, "rewards/margins": 70.55303955078125, "rewards/real": -18.599275588989258, "step": 79 }, { "epoch": 0.33, "grad_norm": 61.02295290503402, "learning_rate": 4.875356125356125e-07, "logits/generated": -2.563333034515381, "logits/oppo_generated": -2.668670177459717, "logits/oppo_real": -2.9500231742858887, "logits/real": -2.375744581222534, "logps/generated": -193.91883850097656, "logps/oppo_gen": -76.79248809814453, "logps/oppo_real": -287.1414794921875, "logps/real": -309.30792236328125, "loss": -2.2982, "loss/gen": 0.386036217212677, "loss/real": -2.4096016883850098, "rewards/accuracies": 1.0, "rewards/generated": -117.1263427734375, "rewards/margins": 94.95994567871094, "rewards/real": -22.166412353515625, "step": 80 }, { "epoch": 0.34, "grad_norm": 169.99358903352797, "learning_rate": 4.871794871794871e-07, "logits/generated": -2.613680601119995, "logits/oppo_generated": -2.8624868392944336, "logits/oppo_real": -3.0077338218688965, "logits/real": -2.5658488273620605, "logps/generated": -205.80078125, "logps/oppo_gen": -103.01863861083984, "logps/oppo_real": -484.10565185546875, "logps/real": -483.44097900390625, "loss": -3.0697, "loss/gen": 0.7189458608627319, "loss/real": -2.95969820022583, "rewards/accuracies": 0.875, "rewards/generated": -102.78215026855469, "rewards/margins": 103.44681549072266, "rewards/real": 0.6646575927734375, "step": 81 }, { "epoch": 0.34, "grad_norm": 43.23124085134354, "learning_rate": 4.868233618233618e-07, "logits/generated": -2.5590624809265137, "logits/oppo_generated": -2.976921796798706, "logits/oppo_real": -3.0094780921936035, "logits/real": -2.6058220863342285, "logps/generated": -179.38499450683594, "logps/oppo_gen": -66.51390075683594, "logps/oppo_real": -174.39071655273438, "logps/real": -176.55557250976562, "loss": -2.4127, "loss/gen": 0.44477635622024536, "loss/real": -2.9173386096954346, "rewards/accuracies": 1.0, "rewards/generated": -112.87110137939453, "rewards/margins": 110.70625305175781, "rewards/real": -2.1648406982421875, "step": 82 }, { "epoch": 0.35, "grad_norm": 58.48061786622663, "learning_rate": 4.864672364672365e-07, "logits/generated": -2.5132930278778076, "logits/oppo_generated": -3.01529598236084, "logits/oppo_real": -2.9185380935668945, "logits/real": -2.643099308013916, "logps/generated": -246.02755737304688, "logps/oppo_gen": -86.220458984375, "logps/oppo_real": -329.8023376464844, "logps/real": -310.6354064941406, "loss": -2.4253, "loss/gen": 0.5145424008369446, "loss/real": -3.408470392227173, "rewards/accuracies": 1.0, "rewards/generated": -159.80709838867188, "rewards/margins": 178.97406005859375, "rewards/real": 19.166940689086914, "step": 83 }, { "epoch": 0.35, "grad_norm": 54.441314178233476, "learning_rate": 4.861111111111111e-07, "logits/generated": -2.3987717628479004, "logits/oppo_generated": -2.864108085632324, "logits/oppo_real": -2.8596436977386475, "logits/real": -2.5680923461914062, "logps/generated": -177.2393798828125, "logps/oppo_gen": -79.35113525390625, "logps/oppo_real": -357.43438720703125, "logps/real": -336.9925537109375, "loss": -2.5489, "loss/gen": 0.5846430659294128, "loss/real": -3.432420492172241, "rewards/accuracies": 1.0, "rewards/generated": -97.88824462890625, "rewards/margins": 118.33008575439453, "rewards/real": 20.441844940185547, "step": 84 }, { "epoch": 0.36, "grad_norm": 66.89113415188145, "learning_rate": 4.857549857549857e-07, "logits/generated": -2.436213493347168, "logits/oppo_generated": -2.635812282562256, "logits/oppo_real": -2.784547805786133, "logits/real": -2.3119587898254395, "logps/generated": -188.01727294921875, "logps/oppo_gen": -87.48421478271484, "logps/oppo_real": -250.10626220703125, "logps/real": -244.0000457763672, "loss": -2.5429, "loss/gen": 0.7033488154411316, "loss/real": -2.972754955291748, "rewards/accuracies": 1.0, "rewards/generated": -100.53305053710938, "rewards/margins": 106.6392593383789, "rewards/real": 6.106204986572266, "step": 85 }, { "epoch": 0.36, "grad_norm": 60.396710964360466, "learning_rate": 4.853988603988603e-07, "logits/generated": -2.538017749786377, "logits/oppo_generated": -2.9845218658447266, "logits/oppo_real": -3.016307830810547, "logits/real": -2.62971830368042, "logps/generated": -155.26116943359375, "logps/oppo_gen": -55.523197174072266, "logps/oppo_real": -291.81378173828125, "logps/real": -305.18359375, "loss": -2.5841, "loss/gen": 0.6104675531387329, "loss/real": -2.7806365489959717, "rewards/accuracies": 0.875, "rewards/generated": -99.73796081542969, "rewards/margins": 86.36811828613281, "rewards/real": -13.369840621948242, "step": 86 }, { "epoch": 0.36, "grad_norm": 53.259132139474445, "learning_rate": 4.850427350427351e-07, "logits/generated": -2.45882511138916, "logits/oppo_generated": -2.8317785263061523, "logits/oppo_real": -2.849785327911377, "logits/real": -2.4766674041748047, "logps/generated": -163.40484619140625, "logps/oppo_gen": -65.48351287841797, "logps/oppo_real": -259.8980712890625, "logps/real": -273.74273681640625, "loss": -2.6208, "loss/gen": 0.5979644656181335, "loss/real": -2.596888542175293, "rewards/accuracies": 1.0, "rewards/generated": -97.92133331298828, "rewards/margins": 84.07666015625, "rewards/real": -13.844667434692383, "step": 87 }, { "epoch": 0.37, "grad_norm": 1690.1221109730504, "learning_rate": 4.846866096866097e-07, "logits/generated": -2.439664602279663, "logits/oppo_generated": -2.9616637229919434, "logits/oppo_real": -2.8549320697784424, "logits/real": -2.6093478202819824, "logps/generated": -177.17694091796875, "logps/oppo_gen": -66.1073226928711, "logps/oppo_real": -297.0393981933594, "logps/real": -275.50140380859375, "loss": -7.118, "loss/gen": 0.45209312438964844, "loss/real": -3.4258365631103516, "rewards/accuracies": 1.0, "rewards/generated": -111.06962585449219, "rewards/margins": 132.60760498046875, "rewards/real": 21.537994384765625, "step": 88 }, { "epoch": 0.37, "grad_norm": 74.6983974790457, "learning_rate": 4.843304843304843e-07, "logits/generated": -2.5593514442443848, "logits/oppo_generated": -2.944060802459717, "logits/oppo_real": -2.977362632751465, "logits/real": -2.5549235343933105, "logps/generated": -160.49493408203125, "logps/oppo_gen": -49.032493591308594, "logps/oppo_real": -197.13412475585938, "logps/real": -235.08087158203125, "loss": -2.3729, "loss/gen": 0.49238741397857666, "loss/real": -2.1076858043670654, "rewards/accuracies": 0.875, "rewards/generated": -111.46244812011719, "rewards/margins": 73.51570892333984, "rewards/real": -37.94673538208008, "step": 89 }, { "epoch": 0.38, "grad_norm": 70.76835216372322, "learning_rate": 4.839743589743589e-07, "logits/generated": -2.497036933898926, "logits/oppo_generated": -2.9935152530670166, "logits/oppo_real": -2.782620906829834, "logits/real": -2.689803123474121, "logps/generated": -172.08953857421875, "logps/oppo_gen": -79.41259002685547, "logps/oppo_real": -304.58465576171875, "logps/real": -297.86407470703125, "loss": -2.6668, "loss/gen": 0.9129126071929932, "loss/real": -3.119077205657959, "rewards/accuracies": 0.875, "rewards/generated": -92.67694854736328, "rewards/margins": 99.39753723144531, "rewards/real": 6.720589637756348, "step": 90 }, { "epoch": 0.38, "grad_norm": 68.6735548002741, "learning_rate": 4.836182336182337e-07, "logits/generated": -2.508333683013916, "logits/oppo_generated": -3.0348973274230957, "logits/oppo_real": -2.8550362586975098, "logits/real": -2.699089527130127, "logps/generated": -235.1026611328125, "logps/oppo_gen": -147.11734008789062, "logps/oppo_real": -324.0049743652344, "logps/real": -307.71380615234375, "loss": -2.9712, "loss/gen": 0.950553297996521, "loss/real": -3.3368782997131348, "rewards/accuracies": 1.0, "rewards/generated": -87.98532104492188, "rewards/margins": 104.27648162841797, "rewards/real": 16.291156768798828, "step": 91 }, { "epoch": 0.38, "grad_norm": 70.78148799628862, "learning_rate": 4.832621082621082e-07, "logits/generated": -2.5086488723754883, "logits/oppo_generated": -2.8708338737487793, "logits/oppo_real": -2.8143606185913086, "logits/real": -2.600031852722168, "logps/generated": -205.0748291015625, "logps/oppo_gen": -81.77798461914062, "logps/oppo_real": -330.5220031738281, "logps/real": -311.5235900878906, "loss": -2.4969, "loss/gen": 0.43894362449645996, "loss/real": -3.3736114501953125, "rewards/accuracies": 1.0, "rewards/generated": -123.29684448242188, "rewards/margins": 142.29525756835938, "rewards/real": 18.99840545654297, "step": 92 }, { "epoch": 0.39, "grad_norm": 76.29366705574257, "learning_rate": 4.829059829059829e-07, "logits/generated": -2.3690929412841797, "logits/oppo_generated": -2.7298922538757324, "logits/oppo_real": -2.698655605316162, "logits/real": -2.4298644065856934, "logps/generated": -173.87249755859375, "logps/oppo_gen": -74.60616302490234, "logps/oppo_real": -251.41427612304688, "logps/real": -237.06617736816406, "loss": -2.5674, "loss/gen": 0.6722112894058228, "loss/real": -3.512993335723877, "rewards/accuracies": 0.875, "rewards/generated": -99.26634216308594, "rewards/margins": 113.61441802978516, "rewards/real": 14.348082542419434, "step": 93 }, { "epoch": 0.39, "grad_norm": 249.33590702353723, "learning_rate": 4.825498575498575e-07, "logits/generated": -2.570150375366211, "logits/oppo_generated": -2.9584808349609375, "logits/oppo_real": -2.8358330726623535, "logits/real": -2.728276491165161, "logps/generated": -160.3553466796875, "logps/oppo_gen": -83.23335266113281, "logps/oppo_real": -311.66064453125, "logps/real": -289.5158996582031, "loss": -3.7493, "loss/gen": 1.0953956842422485, "loss/real": -3.4564929008483887, "rewards/accuracies": 1.0, "rewards/generated": -77.12197875976562, "rewards/margins": 99.26671600341797, "rewards/real": 22.14473533630371, "step": 94 }, { "epoch": 0.4, "grad_norm": 1622.4536202924262, "learning_rate": 4.821937321937321e-07, "logits/generated": -2.4929990768432617, "logits/oppo_generated": -2.83894681930542, "logits/oppo_real": -2.731696605682373, "logits/real": -2.6017203330993652, "logps/generated": -202.414306640625, "logps/oppo_gen": -103.72628021240234, "logps/oppo_real": -218.9561767578125, "logps/real": -203.55921936035156, "loss": -7.3232, "loss/gen": 0.5733932256698608, "loss/real": -3.2266201972961426, "rewards/accuracies": 1.0, "rewards/generated": -98.68803405761719, "rewards/margins": 114.0849838256836, "rewards/real": 15.396947860717773, "step": 95 }, { "epoch": 0.4, "grad_norm": 66.96757975529091, "learning_rate": 4.818376068376069e-07, "logits/generated": -2.6456146240234375, "logits/oppo_generated": -2.7633142471313477, "logits/oppo_real": -2.9560418128967285, "logits/real": -2.4849910736083984, "logps/generated": -207.35745239257812, "logps/oppo_gen": -74.91079711914062, "logps/oppo_real": -299.2713623046875, "logps/real": -269.4769287109375, "loss": -2.824, "loss/gen": 0.35913562774658203, "loss/real": -3.8368678092956543, "rewards/accuracies": 1.0, "rewards/generated": -132.4466552734375, "rewards/margins": 162.2410888671875, "rewards/real": 29.7944393157959, "step": 96 }, { "epoch": 0.41, "grad_norm": 702.0471182548932, "learning_rate": 4.814814814814814e-07, "logits/generated": -2.798750400543213, "logits/oppo_generated": -2.8308515548706055, "logits/oppo_real": -3.085522174835205, "logits/real": -2.5982208251953125, "logps/generated": -237.33787536621094, "logps/oppo_gen": -134.01483154296875, "logps/oppo_real": -442.37945556640625, "logps/real": -406.73846435546875, "loss": -1.4584, "loss/gen": 0.540956437587738, "loss/real": -3.9525904655456543, "rewards/accuracies": 1.0, "rewards/generated": -103.32305908203125, "rewards/margins": 138.96401977539062, "rewards/real": 35.640968322753906, "step": 97 }, { "epoch": 0.41, "grad_norm": 54.58323990131952, "learning_rate": 4.811253561253561e-07, "logits/generated": -2.40437650680542, "logits/oppo_generated": -2.8044867515563965, "logits/oppo_real": -2.8060150146484375, "logits/real": -2.5216751098632812, "logps/generated": -167.4996337890625, "logps/oppo_gen": -51.423309326171875, "logps/oppo_real": -222.54879760742188, "logps/real": -225.21975708007812, "loss": -2.7112, "loss/gen": 0.3818909823894501, "loss/real": -2.9921202659606934, "rewards/accuracies": 1.0, "rewards/generated": -116.07633972167969, "rewards/margins": 113.40538024902344, "rewards/real": -2.670961380004883, "step": 98 }, { "epoch": 0.41, "grad_norm": 45.42335040173446, "learning_rate": 4.807692307692307e-07, "logits/generated": -2.6010522842407227, "logits/oppo_generated": -2.932793140411377, "logits/oppo_real": -2.9959638118743896, "logits/real": -2.675575017929077, "logps/generated": -186.99935913085938, "logps/oppo_gen": -68.20332336425781, "logps/oppo_real": -376.541015625, "logps/real": -360.2162170410156, "loss": -2.6531, "loss/gen": 0.3653205931186676, "loss/real": -3.4390110969543457, "rewards/accuracies": 1.0, "rewards/generated": -118.7960205078125, "rewards/margins": 135.12083435058594, "rewards/real": 16.324806213378906, "step": 99 }, { "epoch": 0.42, "grad_norm": 183.07126984797478, "learning_rate": 4.804131054131054e-07, "logits/generated": -2.4377760887145996, "logits/oppo_generated": -2.780601739883423, "logits/oppo_real": -2.8726038932800293, "logits/real": -2.5523815155029297, "logps/generated": -195.5144500732422, "logps/oppo_gen": -75.83106994628906, "logps/oppo_real": -327.609619140625, "logps/real": -326.1234130859375, "loss": -2.9695, "loss/gen": 0.4366985857486725, "loss/real": -2.959474563598633, "rewards/accuracies": 1.0, "rewards/generated": -119.68338012695312, "rewards/margins": 121.16956329345703, "rewards/real": 1.4861793518066406, "step": 100 }, { "epoch": 0.42, "grad_norm": 549.6681100900797, "learning_rate": 4.8005698005698e-07, "logits/generated": -2.454486846923828, "logits/oppo_generated": -2.91953706741333, "logits/oppo_real": -2.820370674133301, "logits/real": -2.6601805686950684, "logps/generated": -184.12876892089844, "logps/oppo_gen": -75.91517639160156, "logps/oppo_real": -531.0400390625, "logps/real": -524.9949340820312, "loss": -4.2352, "loss/gen": 0.5441170334815979, "loss/real": -3.007982015609741, "rewards/accuracies": 0.875, "rewards/generated": -108.21359252929688, "rewards/margins": 114.25873565673828, "rewards/real": 6.045146942138672, "step": 101 }, { "epoch": 0.43, "grad_norm": 60.524447459141456, "learning_rate": 4.797008547008547e-07, "logits/generated": -2.492274761199951, "logits/oppo_generated": -2.927794933319092, "logits/oppo_real": -2.8259315490722656, "logits/real": -2.6615185737609863, "logps/generated": -186.5205078125, "logps/oppo_gen": -75.32722473144531, "logps/oppo_real": -334.3116149902344, "logps/real": -322.076904296875, "loss": -2.7939, "loss/gen": 0.4799632132053375, "loss/real": -3.1898889541625977, "rewards/accuracies": 1.0, "rewards/generated": -111.19327545166016, "rewards/margins": 123.42797088623047, "rewards/real": 12.23469066619873, "step": 102 }, { "epoch": 0.43, "grad_norm": 72.22195132995404, "learning_rate": 4.793447293447293e-07, "logits/generated": -2.681981325149536, "logits/oppo_generated": -2.798323154449463, "logits/oppo_real": -3.0827927589416504, "logits/real": -2.6118640899658203, "logps/generated": -193.87255859375, "logps/oppo_gen": -85.98326110839844, "logps/oppo_real": -484.7052001953125, "logps/real": -468.4195556640625, "loss": -2.7665, "loss/gen": 0.48763740062713623, "loss/real": -3.367074489593506, "rewards/accuracies": 1.0, "rewards/generated": -107.88929748535156, "rewards/margins": 124.17497253417969, "rewards/real": 16.285675048828125, "step": 103 }, { "epoch": 0.44, "grad_norm": 120.03039979521854, "learning_rate": 4.78988603988604e-07, "logits/generated": -2.369338035583496, "logits/oppo_generated": -2.820817232131958, "logits/oppo_real": -2.7580766677856445, "logits/real": -2.554074287414551, "logps/generated": -224.9398651123047, "logps/oppo_gen": -98.39456176757812, "logps/oppo_real": -435.86871337890625, "logps/real": -420.9836730957031, "loss": -2.9534, "loss/gen": 0.3016844391822815, "loss/real": -3.4657280445098877, "rewards/accuracies": 1.0, "rewards/generated": -126.5452880859375, "rewards/margins": 141.43032836914062, "rewards/real": 14.885029792785645, "step": 104 }, { "epoch": 0.44, "grad_norm": 1841.6121334309241, "learning_rate": 4.786324786324786e-07, "logits/generated": -2.6123902797698975, "logits/oppo_generated": -2.991581439971924, "logits/oppo_real": -3.002182960510254, "logits/real": -2.710818290710449, "logps/generated": -204.66802978515625, "logps/oppo_gen": -81.12940216064453, "logps/oppo_real": -296.61138916015625, "logps/real": -273.9120788574219, "loss": -8.6558, "loss/gen": 0.40045416355133057, "loss/real": -3.615565776824951, "rewards/accuracies": 1.0, "rewards/generated": -123.53861236572266, "rewards/margins": 146.23793029785156, "rewards/real": 22.69931411743164, "step": 105 }, { "epoch": 0.44, "grad_norm": 50.14175949168393, "learning_rate": 4.782763532763532e-07, "logits/generated": -2.58475399017334, "logits/oppo_generated": -2.8433456420898438, "logits/oppo_real": -3.012195110321045, "logits/real": -2.584439992904663, "logps/generated": -161.61810302734375, "logps/oppo_gen": -63.396881103515625, "logps/oppo_real": -288.55780029296875, "logps/real": -261.4425048828125, "loss": -2.8566, "loss/gen": 0.7563031315803528, "loss/real": -3.606753349304199, "rewards/accuracies": 1.0, "rewards/generated": -98.22122192382812, "rewards/margins": 125.3365249633789, "rewards/real": 27.115306854248047, "step": 106 }, { "epoch": 0.45, "grad_norm": 365.902095853522, "learning_rate": 4.779202279202279e-07, "logits/generated": -2.642536163330078, "logits/oppo_generated": -2.75607967376709, "logits/oppo_real": -3.044626235961914, "logits/real": -2.5140504837036133, "logps/generated": -215.69821166992188, "logps/oppo_gen": -89.79308319091797, "logps/oppo_real": -237.51071166992188, "logps/real": -235.16732788085938, "loss": -4.2838, "loss/gen": 0.3768947720527649, "loss/real": -2.9775023460388184, "rewards/accuracies": 1.0, "rewards/generated": -125.90511322021484, "rewards/margins": 128.24850463867188, "rewards/real": 2.3433871269226074, "step": 107 }, { "epoch": 0.45, "grad_norm": 100.40770890630111, "learning_rate": 4.775641025641026e-07, "logits/generated": -2.698265790939331, "logits/oppo_generated": -2.9334537982940674, "logits/oppo_real": -3.0197911262512207, "logits/real": -2.6614885330200195, "logps/generated": -193.49476623535156, "logps/oppo_gen": -86.25882720947266, "logps/oppo_real": -171.73361206054688, "logps/real": -154.22259521484375, "loss": -3.0162, "loss/gen": 0.695202112197876, "loss/real": -3.3781354427337646, "rewards/accuracies": 1.0, "rewards/generated": -107.23593139648438, "rewards/margins": 124.74696350097656, "rewards/real": 17.511028289794922, "step": 108 }, { "epoch": 0.46, "grad_norm": 95.44328629315685, "learning_rate": 4.772079772079772e-07, "logits/generated": -2.5307321548461914, "logits/oppo_generated": -2.8885016441345215, "logits/oppo_real": -2.9670629501342773, "logits/real": -2.6184444427490234, "logps/generated": -137.50279235839844, "logps/oppo_gen": -52.36747741699219, "logps/oppo_real": -234.88699340820312, "logps/real": -211.22215270996094, "loss": -2.7515, "loss/gen": 1.0063905715942383, "loss/real": -3.66544771194458, "rewards/accuracies": 0.875, "rewards/generated": -85.13531494140625, "rewards/margins": 108.8001708984375, "rewards/real": 23.664859771728516, "step": 109 }, { "epoch": 0.46, "grad_norm": 92.31076504801594, "learning_rate": 4.768518518518518e-07, "logits/generated": -2.4487879276275635, "logits/oppo_generated": -2.902094841003418, "logits/oppo_real": -2.738150119781494, "logits/real": -2.5988502502441406, "logps/generated": -183.7650146484375, "logps/oppo_gen": -71.77503967285156, "logps/oppo_real": -226.59805297851562, "logps/real": -210.24148559570312, "loss": -2.6027, "loss/gen": 0.4320296347141266, "loss/real": -3.250072956085205, "rewards/accuracies": 1.0, "rewards/generated": -111.989990234375, "rewards/margins": 128.34658813476562, "rewards/real": 16.356592178344727, "step": 110 }, { "epoch": 0.46, "grad_norm": 92.31076504801594, "learning_rate": 4.768518518518518e-07, "logits/generated": -2.4111056327819824, "logits/oppo_generated": -2.78233003616333, "logits/oppo_real": -2.810633420944214, "logits/real": -2.52742075920105, "logps/generated": -161.51727294921875, "logps/oppo_gen": -51.96064758300781, "logps/oppo_real": -160.8415069580078, "logps/real": -171.3201446533203, "loss": -128.9964, "loss/gen": 0.43095916509628296, "loss/real": -2.8278121948242188, "rewards/accuracies": 1.0, "rewards/generated": -109.556640625, "rewards/margins": 99.07798767089844, "rewards/real": -10.478641510009766, "step": 111 }, { "epoch": 0.47, "grad_norm": 68.96497993207313, "learning_rate": 4.764957264957264e-07, "logits/generated": -2.3004653453826904, "logits/oppo_generated": -2.7906460762023926, "logits/oppo_real": -2.7454147338867188, "logits/real": -2.5157923698425293, "logps/generated": -148.928955078125, "logps/oppo_gen": -67.77021789550781, "logps/oppo_real": -355.9058837890625, "logps/real": -322.17315673828125, "loss": -2.7744, "loss/gen": 0.9168766736984253, "loss/real": -3.8560690879821777, "rewards/accuracies": 1.0, "rewards/generated": -81.15873718261719, "rewards/margins": 114.89146423339844, "rewards/real": 33.73272705078125, "step": 112 }, { "epoch": 0.47, "grad_norm": 82.86539584244439, "learning_rate": 4.761396011396011e-07, "logits/generated": -2.309685707092285, "logits/oppo_generated": -2.784420967102051, "logits/oppo_real": -2.58797550201416, "logits/real": -2.521721363067627, "logps/generated": -174.80889892578125, "logps/oppo_gen": -53.4489631652832, "logps/oppo_real": -213.77337646484375, "logps/real": -204.11801147460938, "loss": -2.8077, "loss/gen": 0.35567396879196167, "loss/real": -3.218747138977051, "rewards/accuracies": 1.0, "rewards/generated": -121.35994720458984, "rewards/margins": 131.01528930664062, "rewards/real": 9.655345916748047, "step": 113 }, { "epoch": 0.48, "grad_norm": 61.933964171274795, "learning_rate": 4.7578347578347577e-07, "logits/generated": -2.5905487537384033, "logits/oppo_generated": -2.9693868160247803, "logits/oppo_real": -2.897064208984375, "logits/real": -2.695655345916748, "logps/generated": -178.83404541015625, "logps/oppo_gen": -65.07535552978516, "logps/oppo_real": -380.3414306640625, "logps/real": -379.77105712890625, "loss": -2.6882, "loss/gen": 0.38962453603744507, "loss/real": -3.060286521911621, "rewards/accuracies": 1.0, "rewards/generated": -113.75869750976562, "rewards/margins": 114.32907104492188, "rewards/real": 0.5703763961791992, "step": 114 }, { "epoch": 0.48, "grad_norm": 66.47038447097135, "learning_rate": 4.754273504273504e-07, "logits/generated": -2.6704883575439453, "logits/oppo_generated": -2.8074076175689697, "logits/oppo_real": -2.9744620323181152, "logits/real": -2.5615124702453613, "logps/generated": -175.31643676757812, "logps/oppo_gen": -81.67523193359375, "logps/oppo_real": -332.10321044921875, "logps/real": -320.36962890625, "loss": -2.8096, "loss/gen": 1.0026687383651733, "loss/real": -3.2720324993133545, "rewards/accuracies": 0.875, "rewards/generated": -93.64120483398438, "rewards/margins": 105.37479400634766, "rewards/real": 11.733586311340332, "step": 115 }, { "epoch": 0.49, "grad_norm": 54.16138220416485, "learning_rate": 4.7507122507122507e-07, "logits/generated": -2.586947441101074, "logits/oppo_generated": -2.8780970573425293, "logits/oppo_real": -2.880333185195923, "logits/real": -2.6156821250915527, "logps/generated": -207.31790161132812, "logps/oppo_gen": -83.72149658203125, "logps/oppo_real": -272.17291259765625, "logps/real": -258.2806701660156, "loss": -2.7218, "loss/gen": 0.3455054759979248, "loss/real": -3.568074941635132, "rewards/accuracies": 1.0, "rewards/generated": -123.59640502929688, "rewards/margins": 137.48866271972656, "rewards/real": 13.892251968383789, "step": 116 }, { "epoch": 0.49, "grad_norm": 64.72151128826876, "learning_rate": 4.747150997150997e-07, "logits/generated": -2.5787789821624756, "logits/oppo_generated": -2.8689210414886475, "logits/oppo_real": -3.036574602127075, "logits/real": -2.5574660301208496, "logps/generated": -197.68472290039062, "logps/oppo_gen": -61.806739807128906, "logps/oppo_real": -213.864013671875, "logps/real": -206.0244140625, "loss": -2.8435, "loss/gen": 0.28257864713668823, "loss/real": -3.154269218444824, "rewards/accuracies": 1.0, "rewards/generated": -135.8779754638672, "rewards/margins": 143.7175750732422, "rewards/real": 7.839590072631836, "step": 117 }, { "epoch": 0.49, "grad_norm": 63.413190000311644, "learning_rate": 4.743589743589743e-07, "logits/generated": -2.537674903869629, "logits/oppo_generated": -2.847443103790283, "logits/oppo_real": -2.9110074043273926, "logits/real": -2.5497055053710938, "logps/generated": -195.18397521972656, "logps/oppo_gen": -68.70259857177734, "logps/oppo_real": -252.70947265625, "logps/real": -234.55947875976562, "loss": -2.7751, "loss/gen": 0.33247071504592896, "loss/real": -3.345210075378418, "rewards/accuracies": 1.0, "rewards/generated": -126.48136901855469, "rewards/margins": 144.63134765625, "rewards/real": 18.149982452392578, "step": 118 }, { "epoch": 0.5, "grad_norm": 67.1754822100761, "learning_rate": 4.74002849002849e-07, "logits/generated": -2.6043078899383545, "logits/oppo_generated": -2.850525140762329, "logits/oppo_real": -2.9623799324035645, "logits/real": -2.5423567295074463, "logps/generated": -186.6025390625, "logps/oppo_gen": -70.65492248535156, "logps/oppo_real": -241.07968139648438, "logps/real": -243.4227294921875, "loss": -2.7129, "loss/gen": 0.38259631395339966, "loss/real": -2.913571357727051, "rewards/accuracies": 0.875, "rewards/generated": -115.94760131835938, "rewards/margins": 113.60454559326172, "rewards/real": -2.3430585861206055, "step": 119 }, { "epoch": 0.5, "grad_norm": 64.89191358881898, "learning_rate": 4.7364672364672366e-07, "logits/generated": -2.326094150543213, "logits/oppo_generated": -2.760641574859619, "logits/oppo_real": -2.835960865020752, "logits/real": -2.4331917762756348, "logps/generated": -192.00738525390625, "logps/oppo_gen": -77.80702209472656, "logps/oppo_real": -309.97265625, "logps/real": -309.2704162597656, "loss": -2.821, "loss/gen": 0.539390504360199, "loss/real": -3.1048460006713867, "rewards/accuracies": 1.0, "rewards/generated": -114.20036315917969, "rewards/margins": 114.902587890625, "rewards/real": 0.7022085189819336, "step": 120 }, { "epoch": 0.51, "grad_norm": 118.88704263396411, "learning_rate": 4.7329059829059823e-07, "logits/generated": -2.453880786895752, "logits/oppo_generated": -2.762300491333008, "logits/oppo_real": -2.91391658782959, "logits/real": -2.4560084342956543, "logps/generated": -197.6976318359375, "logps/oppo_gen": -79.30331420898438, "logps/oppo_real": -206.95407104492188, "logps/real": -221.3215789794922, "loss": -3.2051, "loss/gen": 0.37958478927612305, "loss/real": -2.6625490188598633, "rewards/accuracies": 1.0, "rewards/generated": -118.39431762695312, "rewards/margins": 104.02679443359375, "rewards/real": -14.367520332336426, "step": 121 }, { "epoch": 0.51, "grad_norm": 118.88704263396411, "learning_rate": 4.7329059829059823e-07, "logits/generated": -2.224625825881958, "logits/oppo_generated": -2.8723740577697754, "logits/oppo_real": -2.730229139328003, "logits/real": -2.5184946060180664, "logps/generated": -221.04774475097656, "logps/oppo_gen": -68.4917984008789, "logps/oppo_real": -205.74790954589844, "logps/real": -212.26702880859375, "loss": -316.7941, "loss/gen": 0.3251597583293915, "loss/real": -2.7914090156555176, "rewards/accuracies": 1.0, "rewards/generated": -152.55593872070312, "rewards/margins": 146.0368194580078, "rewards/real": -6.51912784576416, "step": 122 }, { "epoch": 0.51, "grad_norm": 1343.0752491949213, "learning_rate": 4.729344729344729e-07, "logits/generated": -2.4904122352600098, "logits/oppo_generated": -2.833265781402588, "logits/oppo_real": -2.8581643104553223, "logits/real": -2.5306711196899414, "logps/generated": -204.9275360107422, "logps/oppo_gen": -72.44357299804688, "logps/oppo_real": -294.85699462890625, "logps/real": -290.6130065917969, "loss": -4.5958, "loss/gen": 0.28193220496177673, "loss/real": -3.1163246631622314, "rewards/accuracies": 1.0, "rewards/generated": -132.48397827148438, "rewards/margins": 136.72792053222656, "rewards/real": 4.243948936462402, "step": 123 }, { "epoch": 0.52, "grad_norm": 664.7720944052932, "learning_rate": 4.725783475783476e-07, "logits/generated": -2.349301815032959, "logits/oppo_generated": -2.8131227493286133, "logits/oppo_real": -2.815453052520752, "logits/real": -2.5294294357299805, "logps/generated": -248.4262237548828, "logps/oppo_gen": -118.46414184570312, "logps/oppo_real": -350.6376953125, "logps/real": -330.9083557128906, "loss": -5.5005, "loss/gen": 0.29418256878852844, "loss/real": -3.4984450340270996, "rewards/accuracies": 1.0, "rewards/generated": -129.9620819091797, "rewards/margins": 149.69143676757812, "rewards/real": 19.729347229003906, "step": 124 }, { "epoch": 0.52, "grad_norm": 40.89130414749342, "learning_rate": 4.722222222222222e-07, "logits/generated": -2.5400872230529785, "logits/oppo_generated": -2.868478775024414, "logits/oppo_real": -2.87443208694458, "logits/real": -2.5824098587036133, "logps/generated": -186.3829345703125, "logps/oppo_gen": -72.4801025390625, "logps/oppo_real": -315.2503356933594, "logps/real": -298.22100830078125, "loss": -2.7752, "loss/gen": 0.39562442898750305, "loss/real": -3.2849442958831787, "rewards/accuracies": 1.0, "rewards/generated": -113.90283203125, "rewards/margins": 130.93215942382812, "rewards/real": 17.02932357788086, "step": 125 }, { "epoch": 0.53, "grad_norm": 94.72539979956538, "learning_rate": 4.7186609686609683e-07, "logits/generated": -2.2407426834106445, "logits/oppo_generated": -2.5010550022125244, "logits/oppo_real": -2.635188102722168, "logits/real": -2.2052998542785645, "logps/generated": -200.30931091308594, "logps/oppo_gen": -80.23007202148438, "logps/oppo_real": -347.019287109375, "logps/real": -327.673828125, "loss": -2.9739, "loss/gen": 0.40606454014778137, "loss/real": -4.382803916931152, "rewards/accuracies": 1.0, "rewards/generated": -120.0792465209961, "rewards/margins": 139.4246826171875, "rewards/real": 19.345449447631836, "step": 126 }, { "epoch": 0.53, "grad_norm": 60.16715201536986, "learning_rate": 4.715099715099715e-07, "logits/generated": -2.1603076457977295, "logits/oppo_generated": -2.6126623153686523, "logits/oppo_real": -2.6145567893981934, "logits/real": -2.2864603996276855, "logps/generated": -194.54769897460938, "logps/oppo_gen": -73.5291748046875, "logps/oppo_real": -317.5265808105469, "logps/real": -296.9075012207031, "loss": -2.8726, "loss/gen": 0.3832360804080963, "loss/real": -3.425445795059204, "rewards/accuracies": 1.0, "rewards/generated": -121.01852416992188, "rewards/margins": 141.63758850097656, "rewards/real": 20.619068145751953, "step": 127 }, { "epoch": 0.54, "grad_norm": 99.14064346926541, "learning_rate": 4.711538461538461e-07, "logits/generated": -2.6689248085021973, "logits/oppo_generated": -3.0297465324401855, "logits/oppo_real": -3.101362705230713, "logits/real": -2.7913174629211426, "logps/generated": -225.02159118652344, "logps/oppo_gen": -120.2161865234375, "logps/oppo_real": -532.0965576171875, "logps/real": -496.509033203125, "loss": -2.6576, "loss/gen": 0.5110812187194824, "loss/real": -3.9389498233795166, "rewards/accuracies": 1.0, "rewards/generated": -104.80540466308594, "rewards/margins": 140.39291381835938, "rewards/real": 35.58751678466797, "step": 128 }, { "epoch": 0.54, "grad_norm": 70.39239557984051, "learning_rate": 4.707977207977208e-07, "logits/generated": -2.5018911361694336, "logits/oppo_generated": -2.4462087154388428, "logits/oppo_real": -2.882254123687744, "logits/real": -2.1281325817108154, "logps/generated": -162.50857543945312, "logps/oppo_gen": -74.71348571777344, "logps/oppo_real": -324.086669921875, "logps/real": -299.44586181640625, "loss": -2.7935, "loss/gen": 0.9241290092468262, "loss/real": -3.661430597305298, "rewards/accuracies": 1.0, "rewards/generated": -87.79508972167969, "rewards/margins": 112.4359130859375, "rewards/real": 24.640825271606445, "step": 129 }, { "epoch": 0.54, "grad_norm": 19246.478742876578, "learning_rate": 4.7044159544159537e-07, "logits/generated": -2.6820337772369385, "logits/oppo_generated": -2.9427778720855713, "logits/oppo_real": -2.9869794845581055, "logits/real": -2.646888494491577, "logps/generated": -157.2148895263672, "logps/oppo_gen": -57.98387908935547, "logps/oppo_real": -299.8202209472656, "logps/real": -309.00274658203125, "loss": -58.9365, "loss/gen": 0.7235317230224609, "loss/real": -2.887176990509033, "rewards/accuracies": 1.0, "rewards/generated": -99.23101806640625, "rewards/margins": 90.04846954345703, "rewards/real": -9.182544708251953, "step": 130 }, { "epoch": 0.55, "grad_norm": 132.2171139995689, "learning_rate": 4.7008547008547005e-07, "logits/generated": -2.256178855895996, "logits/oppo_generated": -2.462200880050659, "logits/oppo_real": -2.7382378578186035, "logits/real": -2.1479060649871826, "logps/generated": -191.7882537841797, "logps/oppo_gen": -109.31198120117188, "logps/oppo_real": -333.22021484375, "logps/real": -315.614013671875, "loss": -3.274, "loss/gen": 1.3347947597503662, "loss/real": -3.325333595275879, "rewards/accuracies": 1.0, "rewards/generated": -82.47628021240234, "rewards/margins": 100.08245849609375, "rewards/real": 17.606182098388672, "step": 131 }, { "epoch": 0.55, "grad_norm": 74.27982684269834, "learning_rate": 4.697293447293447e-07, "logits/generated": -2.584226131439209, "logits/oppo_generated": -2.9814329147338867, "logits/oppo_real": -2.8366198539733887, "logits/real": -2.7369401454925537, "logps/generated": -231.37564086914062, "logps/oppo_gen": -117.97686767578125, "logps/oppo_real": -333.4208679199219, "logps/real": -292.571044921875, "loss": -2.9361, "loss/gen": 0.7850100994110107, "loss/real": -4.297349452972412, "rewards/accuracies": 1.0, "rewards/generated": -113.39878845214844, "rewards/margins": 154.24859619140625, "rewards/real": 40.84980773925781, "step": 132 }, { "epoch": 0.56, "grad_norm": 62.146904934749166, "learning_rate": 4.6937321937321934e-07, "logits/generated": -2.1917073726654053, "logits/oppo_generated": -2.6781723499298096, "logits/oppo_real": -2.516916513442993, "logits/real": -2.4100053310394287, "logps/generated": -191.9784393310547, "logps/oppo_gen": -60.19814682006836, "logps/oppo_real": -262.58551025390625, "logps/real": -243.27468872070312, "loss": -2.8222, "loss/gen": 0.7389452457427979, "loss/real": -3.3546838760375977, "rewards/accuracies": 1.0, "rewards/generated": -131.78028869628906, "rewards/margins": 151.09109497070312, "rewards/real": 19.31081771850586, "step": 133 }, { "epoch": 0.56, "grad_norm": 62.146904934749166, "learning_rate": 4.6937321937321934e-07, "logits/generated": -2.438559055328369, "logits/oppo_generated": -2.8787498474121094, "logits/oppo_real": -2.805894374847412, "logits/real": -2.6161952018737793, "logps/generated": -236.9315185546875, "logps/oppo_gen": -124.28936767578125, "logps/oppo_real": -606.1627807617188, "logps/real": -575.891845703125, "loss": -30094.6035, "loss/gen": 0.43543320894241333, "loss/real": -4.007241725921631, "rewards/accuracies": 1.0, "rewards/generated": -112.64216613769531, "rewards/margins": 142.91311645507812, "rewards/real": 30.27095603942871, "step": 134 }, { "epoch": 0.56, "grad_norm": 78.11314597568548, "learning_rate": 4.69017094017094e-07, "logits/generated": -2.526062488555908, "logits/oppo_generated": -2.765538454055786, "logits/oppo_real": -2.839543342590332, "logits/real": -2.4912123680114746, "logps/generated": -192.7285919189453, "logps/oppo_gen": -83.72669982910156, "logps/oppo_real": -361.6756591796875, "logps/real": -346.46002197265625, "loss": -3.0693, "loss/gen": 0.47312071919441223, "loss/real": -3.3567910194396973, "rewards/accuracies": 1.0, "rewards/generated": -109.00190734863281, "rewards/margins": 124.21757507324219, "rewards/real": 15.215664863586426, "step": 135 }, { "epoch": 0.57, "grad_norm": 54.23171224736731, "learning_rate": 4.6866096866096864e-07, "logits/generated": -2.607853889465332, "logits/oppo_generated": -2.7416014671325684, "logits/oppo_real": -2.8941569328308105, "logits/real": -2.4866786003112793, "logps/generated": -161.49307250976562, "logps/oppo_gen": -51.659912109375, "logps/oppo_real": -267.5926513671875, "logps/real": -247.1339111328125, "loss": -2.9068, "loss/gen": 0.421144962310791, "loss/real": -3.4416115283966064, "rewards/accuracies": 1.0, "rewards/generated": -109.8331527709961, "rewards/margins": 130.2919158935547, "rewards/real": 20.458759307861328, "step": 136 }, { "epoch": 0.57, "grad_norm": 72.46861187459885, "learning_rate": 4.6830484330484326e-07, "logits/generated": -2.2220163345336914, "logits/oppo_generated": -2.609920024871826, "logits/oppo_real": -2.5399818420410156, "logits/real": -2.291043758392334, "logps/generated": -210.66543579101562, "logps/oppo_gen": -81.96345520019531, "logps/oppo_real": -258.99554443359375, "logps/real": -252.50823974609375, "loss": -3.0398, "loss/gen": 0.3264577388763428, "loss/real": -3.0477218627929688, "rewards/accuracies": 1.0, "rewards/generated": -128.70199584960938, "rewards/margins": 135.1892852783203, "rewards/real": 6.4872941970825195, "step": 137 }, { "epoch": 0.58, "grad_norm": 72.46861187459885, "learning_rate": 4.6830484330484326e-07, "logits/generated": -2.443568706512451, "logits/oppo_generated": -2.89731502532959, "logits/oppo_real": -2.861166000366211, "logits/real": -2.563661575317383, "logps/generated": -180.6520538330078, "logps/oppo_gen": -61.10588073730469, "logps/oppo_real": -297.8720703125, "logps/real": -281.30902099609375, "loss": -231011.7969, "loss/gen": 0.3417486846446991, "loss/real": -3.499724864959717, "rewards/accuracies": 1.0, "rewards/generated": -119.54617309570312, "rewards/margins": 136.10922241210938, "rewards/real": 16.563053131103516, "step": 138 }, { "epoch": 0.58, "grad_norm": 68.7764082802926, "learning_rate": 4.6794871794871794e-07, "logits/generated": -2.4255595207214355, "logits/oppo_generated": -2.8648695945739746, "logits/oppo_real": -2.711393356323242, "logits/real": -2.6028270721435547, "logps/generated": -223.78445434570312, "logps/oppo_gen": -111.59371948242188, "logps/oppo_real": -521.255859375, "logps/real": -493.6855773925781, "loss": -2.7501, "loss/gen": 0.418493390083313, "loss/real": -3.638298749923706, "rewards/accuracies": 1.0, "rewards/generated": -112.19073486328125, "rewards/margins": 139.76104736328125, "rewards/real": 27.570310592651367, "step": 139 }, { "epoch": 0.59, "grad_norm": 60.205945382287624, "learning_rate": 4.675925925925926e-07, "logits/generated": -2.525608539581299, "logits/oppo_generated": -2.8064088821411133, "logits/oppo_real": -2.845989227294922, "logits/real": -2.5157408714294434, "logps/generated": -162.0449676513672, "logps/oppo_gen": -52.78784942626953, "logps/oppo_real": -172.55088806152344, "logps/real": -161.2114715576172, "loss": -2.904, "loss/gen": 0.4695492088794708, "loss/real": -3.171067237854004, "rewards/accuracies": 1.0, "rewards/generated": -109.25712585449219, "rewards/margins": 120.59654235839844, "rewards/real": 11.339418411254883, "step": 140 }, { "epoch": 0.59, "grad_norm": 70.03484065984587, "learning_rate": 4.672364672364672e-07, "logits/generated": -2.5758299827575684, "logits/oppo_generated": -3.0264251232147217, "logits/oppo_real": -2.836057186126709, "logits/real": -2.738698959350586, "logps/generated": -218.2009735107422, "logps/oppo_gen": -74.337158203125, "logps/oppo_real": -371.032470703125, "logps/real": -321.6204833984375, "loss": -2.9986, "loss/gen": 0.24696138501167297, "loss/real": -4.774725437164307, "rewards/accuracies": 1.0, "rewards/generated": -143.8638153076172, "rewards/margins": 193.27581787109375, "rewards/real": 49.41199493408203, "step": 141 }, { "epoch": 0.59, "grad_norm": 7817.082320149891, "learning_rate": 4.6688034188034186e-07, "logits/generated": -2.6040773391723633, "logits/oppo_generated": -2.876476764678955, "logits/oppo_real": -2.912707805633545, "logits/real": -2.6422886848449707, "logps/generated": -201.4759063720703, "logps/oppo_gen": -90.53692626953125, "logps/oppo_real": -383.74615478515625, "logps/real": -350.41131591796875, "loss": -19.0115, "loss/gen": 0.4651464819908142, "loss/real": -3.848228931427002, "rewards/accuracies": 1.0, "rewards/generated": -110.93898010253906, "rewards/margins": 144.27383422851562, "rewards/real": 33.33485794067383, "step": 142 }, { "epoch": 0.6, "grad_norm": 105.7367382346363, "learning_rate": 4.6652421652421653e-07, "logits/generated": -2.674943447113037, "logits/oppo_generated": -2.9819746017456055, "logits/oppo_real": -3.1959123611450195, "logits/real": -2.714082717895508, "logps/generated": -279.74652099609375, "logps/oppo_gen": -152.70217895507812, "logps/oppo_real": -483.54266357421875, "logps/real": -443.05084228515625, "loss": -3.1497, "loss/gen": 0.3712007403373718, "loss/real": -4.19202184677124, "rewards/accuracies": 1.0, "rewards/generated": -127.04434204101562, "rewards/margins": 167.5361328125, "rewards/real": 40.49180603027344, "step": 143 }, { "epoch": 0.6, "grad_norm": 320.13193081134534, "learning_rate": 4.6616809116809116e-07, "logits/generated": -2.6575989723205566, "logits/oppo_generated": -2.7378830909729004, "logits/oppo_real": -3.110536813735962, "logits/real": -2.483811378479004, "logps/generated": -208.11447143554688, "logps/oppo_gen": -86.0918960571289, "logps/oppo_real": -447.7939147949219, "logps/real": -449.82757568359375, "loss": -3.1025, "loss/gen": 0.3562849164009094, "loss/real": -3.004971981048584, "rewards/accuracies": 1.0, "rewards/generated": -122.0225830078125, "rewards/margins": 119.98893737792969, "rewards/real": -2.033646583557129, "step": 144 }, { "epoch": 0.61, "grad_norm": 56.81756199946017, "learning_rate": 4.658119658119658e-07, "logits/generated": -2.8450493812561035, "logits/oppo_generated": -2.7491419315338135, "logits/oppo_real": -3.191051483154297, "logits/real": -2.4600586891174316, "logps/generated": -215.31295776367188, "logps/oppo_gen": -96.26548767089844, "logps/oppo_real": -305.7531433105469, "logps/real": -280.7630615234375, "loss": -2.8775, "loss/gen": 0.5062470436096191, "loss/real": -3.553849220275879, "rewards/accuracies": 1.0, "rewards/generated": -119.04747009277344, "rewards/margins": 144.03756713867188, "rewards/real": 24.990097045898438, "step": 145 }, { "epoch": 0.61, "grad_norm": 56.194760202520214, "learning_rate": 4.654558404558404e-07, "logits/generated": -2.451647996902466, "logits/oppo_generated": -2.8662476539611816, "logits/oppo_real": -2.7619881629943848, "logits/real": -2.580970287322998, "logps/generated": -190.9722900390625, "logps/oppo_gen": -76.39656066894531, "logps/oppo_real": -342.36138916015625, "logps/real": -320.16766357421875, "loss": -2.8042, "loss/gen": 0.40042293071746826, "loss/real": -3.5132551193237305, "rewards/accuracies": 1.0, "rewards/generated": -114.57573699951172, "rewards/margins": 136.7694854736328, "rewards/real": 22.193754196166992, "step": 146 }, { "epoch": 0.62, "grad_norm": 54.320377107864644, "learning_rate": 4.650997150997151e-07, "logits/generated": -2.6203999519348145, "logits/oppo_generated": -2.973456859588623, "logits/oppo_real": -2.9541869163513184, "logits/real": -2.606893539428711, "logps/generated": -158.5798797607422, "logps/oppo_gen": -58.52758026123047, "logps/oppo_real": -196.6337127685547, "logps/real": -194.17990112304688, "loss": -3.0855, "loss/gen": 0.5960197448730469, "loss/real": -3.0248513221740723, "rewards/accuracies": 1.0, "rewards/generated": -100.05229949951172, "rewards/margins": 102.50611877441406, "rewards/real": 2.453828811645508, "step": 147 }, { "epoch": 0.62, "grad_norm": 75.8143516622595, "learning_rate": 4.6474358974358975e-07, "logits/generated": -2.4562883377075195, "logits/oppo_generated": -2.9579458236694336, "logits/oppo_real": -2.8345115184783936, "logits/real": -2.6457347869873047, "logps/generated": -204.59548950195312, "logps/oppo_gen": -86.37559509277344, "logps/oppo_real": -329.4002685546875, "logps/real": -317.261962890625, "loss": -3.0377, "loss/gen": 0.4204404056072235, "loss/real": -3.355125904083252, "rewards/accuracies": 1.0, "rewards/generated": -118.21987915039062, "rewards/margins": 130.35821533203125, "rewards/real": 12.138343811035156, "step": 148 }, { "epoch": 0.62, "grad_norm": 75.08773189056086, "learning_rate": 4.643874643874643e-07, "logits/generated": -2.1544671058654785, "logits/oppo_generated": -2.4297678470611572, "logits/oppo_real": -2.5349526405334473, "logits/real": -2.043349027633667, "logps/generated": -245.35202026367188, "logps/oppo_gen": -139.25880432128906, "logps/oppo_real": -366.9024658203125, "logps/real": -337.96356201171875, "loss": -2.9923, "loss/gen": 0.5029778480529785, "loss/real": -3.993164300918579, "rewards/accuracies": 1.0, "rewards/generated": -106.09322357177734, "rewards/margins": 135.03216552734375, "rewards/real": 28.938934326171875, "step": 149 }, { "epoch": 0.63, "grad_norm": 576.2930027338524, "learning_rate": 4.64031339031339e-07, "logits/generated": -2.114830493927002, "logits/oppo_generated": -2.59027099609375, "logits/oppo_real": -2.5751681327819824, "logits/real": -2.2125301361083984, "logps/generated": -178.6730499267578, "logps/oppo_gen": -44.13750076293945, "logps/oppo_real": -126.39328002929688, "logps/real": -146.06198120117188, "loss": -4.0466, "loss/gen": 0.268838495016098, "loss/real": -2.595045566558838, "rewards/accuracies": 0.875, "rewards/generated": -134.53555297851562, "rewards/margins": 114.86683654785156, "rewards/real": -19.668712615966797, "step": 150 }, { "epoch": 0.63, "grad_norm": 54.61241720782235, "learning_rate": 4.6367521367521367e-07, "logits/generated": -2.424686908721924, "logits/oppo_generated": -2.8061888217926025, "logits/oppo_real": -2.885352611541748, "logits/real": -2.449500560760498, "logps/generated": -222.48379516601562, "logps/oppo_gen": -82.9956283569336, "logps/oppo_real": -287.7582702636719, "logps/real": -284.30731201171875, "loss": -3.0876, "loss/gen": 0.32225707173347473, "loss/real": -2.8702611923217773, "rewards/accuracies": 1.0, "rewards/generated": -139.4881591796875, "rewards/margins": 142.93911743164062, "rewards/real": 3.450957775115967, "step": 151 }, { "epoch": 0.64, "grad_norm": 85.11727319006701, "learning_rate": 4.633190883190883e-07, "logits/generated": -2.086930274963379, "logits/oppo_generated": -2.6804826259613037, "logits/oppo_real": -2.560675621032715, "logits/real": -2.307936668395996, "logps/generated": -248.29327392578125, "logps/oppo_gen": -125.20469665527344, "logps/oppo_real": -214.75454711914062, "logps/real": -237.43409729003906, "loss": -2.705, "loss/gen": 0.33005163073539734, "loss/real": -2.6607136726379395, "rewards/accuracies": 0.875, "rewards/generated": -123.08856201171875, "rewards/margins": 100.40898895263672, "rewards/real": -22.67957878112793, "step": 152 }, { "epoch": 0.64, "grad_norm": 76.16983490857781, "learning_rate": 4.6296296296296297e-07, "logits/generated": -2.4300737380981445, "logits/oppo_generated": -2.8161306381225586, "logits/oppo_real": -2.873737096786499, "logits/real": -2.3974549770355225, "logps/generated": -211.3475341796875, "logps/oppo_gen": -39.4675178527832, "logps/oppo_real": -94.7720718383789, "logps/real": -112.44818115234375, "loss": -2.8425, "loss/gen": 0.24527525901794434, "loss/real": -2.5662083625793457, "rewards/accuracies": 0.875, "rewards/generated": -171.88002014160156, "rewards/margins": 154.2039031982422, "rewards/real": -17.676116943359375, "step": 153 }, { "epoch": 0.64, "grad_norm": 52.66541092417774, "learning_rate": 4.626068376068376e-07, "logits/generated": -2.2791929244995117, "logits/oppo_generated": -2.754338026046753, "logits/oppo_real": -2.6611428260803223, "logits/real": -2.365473747253418, "logps/generated": -180.72125244140625, "logps/oppo_gen": -53.64311981201172, "logps/oppo_real": -189.60964965820312, "logps/real": -185.12130737304688, "loss": -2.8864, "loss/gen": 0.31177347898483276, "loss/real": -3.0672144889831543, "rewards/accuracies": 1.0, "rewards/generated": -127.07814025878906, "rewards/margins": 131.56646728515625, "rewards/real": 4.488343238830566, "step": 154 }, { "epoch": 0.65, "grad_norm": 89.10240608160505, "learning_rate": 4.622507122507122e-07, "logits/generated": -2.525489568710327, "logits/oppo_generated": -2.8700437545776367, "logits/oppo_real": -3.012883186340332, "logits/real": -2.459331512451172, "logps/generated": -198.41644287109375, "logps/oppo_gen": -64.43563842773438, "logps/oppo_real": -366.68572998046875, "logps/real": -346.22503662109375, "loss": -3.2314, "loss/gen": 0.27841734886169434, "loss/real": -3.4703869819641113, "rewards/accuracies": 1.0, "rewards/generated": -133.98080444335938, "rewards/margins": 154.4415283203125, "rewards/real": 20.46072006225586, "step": 155 }, { "epoch": 0.65, "grad_norm": 1254.2688109013952, "learning_rate": 4.618945868945869e-07, "logits/generated": -2.289478302001953, "logits/oppo_generated": -2.896176338195801, "logits/oppo_real": -2.7520911693573, "logits/real": -2.514561653137207, "logps/generated": -221.06515502929688, "logps/oppo_gen": -94.6259765625, "logps/oppo_real": -329.9571533203125, "logps/real": -310.5443115234375, "loss": -5.4989, "loss/gen": 0.3454495668411255, "loss/real": -3.5192689895629883, "rewards/accuracies": 1.0, "rewards/generated": -126.43919372558594, "rewards/margins": 145.85203552246094, "rewards/real": 19.412845611572266, "step": 156 }, { "epoch": 0.66, "grad_norm": 49.926036602752426, "learning_rate": 4.6153846153846156e-07, "logits/generated": -2.4086711406707764, "logits/oppo_generated": -2.72526478767395, "logits/oppo_real": -2.760162591934204, "logits/real": -2.3816709518432617, "logps/generated": -189.68716430664062, "logps/oppo_gen": -70.71673583984375, "logps/oppo_real": -391.76458740234375, "logps/real": -400.5279846191406, "loss": -2.873, "loss/gen": 0.38557717204093933, "loss/real": -2.8944320678710938, "rewards/accuracies": 0.875, "rewards/generated": -118.97042846679688, "rewards/margins": 110.20704650878906, "rewards/real": -8.763385772705078, "step": 157 }, { "epoch": 0.66, "grad_norm": 108.6453699255058, "learning_rate": 4.6118233618233613e-07, "logits/generated": -2.9071619510650635, "logits/oppo_generated": -2.979785919189453, "logits/oppo_real": -3.2641677856445312, "logits/real": -2.598475933074951, "logps/generated": -202.7903594970703, "logps/oppo_gen": -92.89317321777344, "logps/oppo_real": -330.3245849609375, "logps/real": -319.0426025390625, "loss": -3.0021, "loss/gen": 0.49392998218536377, "loss/real": -3.2759296894073486, "rewards/accuracies": 1.0, "rewards/generated": -109.89718627929688, "rewards/margins": 121.17916870117188, "rewards/real": 11.28197956085205, "step": 158 }, { "epoch": 0.67, "grad_norm": 59.173970949148114, "learning_rate": 4.608262108262108e-07, "logits/generated": -2.1723835468292236, "logits/oppo_generated": -2.775574207305908, "logits/oppo_real": -2.598371744155884, "logits/real": -2.396592140197754, "logps/generated": -187.12681579589844, "logps/oppo_gen": -65.71693420410156, "logps/oppo_real": -220.19737243652344, "logps/real": -200.814453125, "loss": -3.0664, "loss/gen": 0.3779526352882385, "loss/real": -3.3624069690704346, "rewards/accuracies": 1.0, "rewards/generated": -121.40988159179688, "rewards/margins": 140.79281616210938, "rewards/real": 19.38292694091797, "step": 159 }, { "epoch": 0.67, "grad_norm": 75.21973020757254, "learning_rate": 4.6047008547008543e-07, "logits/generated": -2.0699994564056396, "logits/oppo_generated": -2.6892812252044678, "logits/oppo_real": -2.527797222137451, "logits/real": -2.2784643173217773, "logps/generated": -172.57388305664062, "logps/oppo_gen": -56.507102966308594, "logps/oppo_real": -203.99942016601562, "logps/real": -220.61575317382812, "loss": -2.9974, "loss/gen": 0.3955667018890381, "loss/real": -2.5530004501342773, "rewards/accuracies": 1.0, "rewards/generated": -116.06678771972656, "rewards/margins": 99.45044708251953, "rewards/real": -16.6163387298584, "step": 160 }, { "epoch": 0.67, "grad_norm": 69.13334884030283, "learning_rate": 4.601139601139601e-07, "logits/generated": -2.4354324340820312, "logits/oppo_generated": -2.892515182495117, "logits/oppo_real": -2.87583589553833, "logits/real": -2.509371280670166, "logps/generated": -193.70269775390625, "logps/oppo_gen": -70.63409423828125, "logps/oppo_real": -236.45480346679688, "logps/real": -236.11924743652344, "loss": -2.9182, "loss/gen": 0.3297494649887085, "loss/real": -3.017835855484009, "rewards/accuracies": 0.875, "rewards/generated": -123.068603515625, "rewards/margins": 123.4041748046875, "rewards/real": 0.3355722427368164, "step": 161 }, { "epoch": 0.68, "grad_norm": 96.21606544647312, "learning_rate": 4.5975783475783473e-07, "logits/generated": -2.1324949264526367, "logits/oppo_generated": -2.2372124195098877, "logits/oppo_real": -2.6531500816345215, "logits/real": -1.7291717529296875, "logps/generated": -151.3114471435547, "logps/oppo_gen": -49.9699821472168, "logps/oppo_real": -257.7629699707031, "logps/real": -268.58935546875, "loss": -3.1964, "loss/gen": 1.1664835214614868, "loss/real": -2.592834234237671, "rewards/accuracies": 0.875, "rewards/generated": -101.34146118164062, "rewards/margins": 90.51508331298828, "rewards/real": -10.826382637023926, "step": 162 }, { "epoch": 0.68, "grad_norm": 1100.3644850684705, "learning_rate": 4.5940170940170935e-07, "logits/generated": -2.265676736831665, "logits/oppo_generated": -2.6594979763031006, "logits/oppo_real": -2.72336483001709, "logits/real": -2.2372395992279053, "logps/generated": -186.74288940429688, "logps/oppo_gen": -69.47285461425781, "logps/oppo_real": -203.925048828125, "logps/real": -171.45407104492188, "loss": -4.8218, "loss/gen": 0.37407732009887695, "loss/real": -3.8044371604919434, "rewards/accuracies": 1.0, "rewards/generated": -117.27003479003906, "rewards/margins": 149.74102783203125, "rewards/real": 32.470985412597656, "step": 163 }, { "epoch": 0.69, "grad_norm": 48.85153938608921, "learning_rate": 4.59045584045584e-07, "logits/generated": -2.573637008666992, "logits/oppo_generated": -2.84741473197937, "logits/oppo_real": -2.9322423934936523, "logits/real": -2.466978073120117, "logps/generated": -193.22213745117188, "logps/oppo_gen": -72.28129577636719, "logps/oppo_real": -342.0706787109375, "logps/real": -368.0989990234375, "loss": -2.7908, "loss/gen": 0.3612110912799835, "loss/real": -2.5483293533325195, "rewards/accuracies": 0.875, "rewards/generated": -120.94083404541016, "rewards/margins": 94.91249084472656, "rewards/real": -26.028343200683594, "step": 164 }, { "epoch": 0.69, "grad_norm": 61.90407697473366, "learning_rate": 4.586894586894587e-07, "logits/generated": -2.5092499256134033, "logits/oppo_generated": -2.8123486042022705, "logits/oppo_real": -2.9484448432922363, "logits/real": -2.4517569541931152, "logps/generated": -189.08139038085938, "logps/oppo_gen": -78.67784118652344, "logps/oppo_real": -224.94638061523438, "logps/real": -222.41128540039062, "loss": -2.8246, "loss/gen": 0.4234386086463928, "loss/real": -3.0529117584228516, "rewards/accuracies": 1.0, "rewards/generated": -110.4035415649414, "rewards/margins": 112.93864440917969, "rewards/real": 2.5351076126098633, "step": 165 }, { "epoch": 0.69, "grad_norm": 162.38187413278305, "learning_rate": 4.5833333333333327e-07, "logits/generated": -2.1803882122039795, "logits/oppo_generated": -2.6430654525756836, "logits/oppo_real": -2.7417783737182617, "logits/real": -2.243424415588379, "logps/generated": -164.8372802734375, "logps/oppo_gen": -63.871150970458984, "logps/oppo_real": -224.14703369140625, "logps/real": -182.39511108398438, "loss": -3.7083, "loss/gen": 0.7786407470703125, "loss/real": -4.343791961669922, "rewards/accuracies": 0.875, "rewards/generated": -100.96613311767578, "rewards/margins": 142.7180633544922, "rewards/real": 41.75192642211914, "step": 166 }, { "epoch": 0.7, "grad_norm": 49.259806784569555, "learning_rate": 4.5797720797720794e-07, "logits/generated": -2.4166438579559326, "logits/oppo_generated": -2.757966995239258, "logits/oppo_real": -2.906935691833496, "logits/real": -2.3613169193267822, "logps/generated": -156.58056640625, "logps/oppo_gen": -53.980133056640625, "logps/oppo_real": -168.99293518066406, "logps/real": -158.2866973876953, "loss": -2.9404, "loss/gen": 0.9939805269241333, "loss/real": -3.2384204864501953, "rewards/accuracies": 1.0, "rewards/generated": -102.6004409790039, "rewards/margins": 113.30667114257812, "rewards/real": 10.706242561340332, "step": 167 }, { "epoch": 0.7, "grad_norm": 49.259806784569555, "learning_rate": 4.5797720797720794e-07, "logits/generated": -2.1523754596710205, "logits/oppo_generated": -2.34848690032959, "logits/oppo_real": -2.549453020095825, "logits/real": -1.9709889888763428, "logps/generated": -172.17507934570312, "logps/oppo_gen": -41.99907684326172, "logps/oppo_real": -137.05735778808594, "logps/real": -117.31524658203125, "loss": -36278.4766, "loss/gen": 0.308816134929657, "loss/real": -3.7620303630828857, "rewards/accuracies": 1.0, "rewards/generated": -130.17599487304688, "rewards/margins": 149.91812133789062, "rewards/real": 19.742107391357422, "step": 168 }, { "epoch": 0.71, "grad_norm": 58.361834787518674, "learning_rate": 4.576210826210826e-07, "logits/generated": -2.2917189598083496, "logits/oppo_generated": -2.5094847679138184, "logits/oppo_real": -2.6891722679138184, "logits/real": -2.1305155754089355, "logps/generated": -260.20501708984375, "logps/oppo_gen": -68.40258026123047, "logps/oppo_real": -223.42794799804688, "logps/real": -207.68116760253906, "loss": -2.9255, "loss/gen": 0.7335460186004639, "loss/real": -3.7286908626556396, "rewards/accuracies": 1.0, "rewards/generated": -191.8024444580078, "rewards/margins": 207.54922485351562, "rewards/real": 15.746776580810547, "step": 169 }, { "epoch": 0.71, "grad_norm": 170.55447260332988, "learning_rate": 4.5726495726495724e-07, "logits/generated": -2.4084904193878174, "logits/oppo_generated": -2.8935999870300293, "logits/oppo_real": -2.775484561920166, "logits/real": -2.5147581100463867, "logps/generated": -170.41473388671875, "logps/oppo_gen": -50.93283462524414, "logps/oppo_real": -316.0002136230469, "logps/real": -287.05810546875, "loss": -3.0655, "loss/gen": 0.39951732754707336, "loss/real": -3.662087917327881, "rewards/accuracies": 1.0, "rewards/generated": -119.48190307617188, "rewards/margins": 148.4240264892578, "rewards/real": 28.942119598388672, "step": 170 }, { "epoch": 0.72, "grad_norm": 2776.5446151646834, "learning_rate": 4.569088319088319e-07, "logits/generated": -2.689021110534668, "logits/oppo_generated": -2.8526816368103027, "logits/oppo_real": -3.2386014461517334, "logits/real": -2.449385166168213, "logps/generated": -224.29080200195312, "logps/oppo_gen": -113.54923248291016, "logps/oppo_real": -351.7125549316406, "logps/real": -352.888427734375, "loss": -9.9952, "loss/gen": 0.5014157295227051, "loss/real": -2.938952922821045, "rewards/accuracies": 0.875, "rewards/generated": -110.7415771484375, "rewards/margins": 109.56568145751953, "rewards/real": -1.1758899688720703, "step": 171 }, { "epoch": 0.72, "grad_norm": 74.08580527313414, "learning_rate": 4.5655270655270654e-07, "logits/generated": -2.553030252456665, "logits/oppo_generated": -2.9850940704345703, "logits/oppo_real": -3.0315611362457275, "logits/real": -2.5720577239990234, "logps/generated": -181.4952392578125, "logps/oppo_gen": -61.65489196777344, "logps/oppo_real": -151.10653686523438, "logps/real": -148.4203643798828, "loss": -3.289, "loss/gen": 0.38128042221069336, "loss/real": -2.9334371089935303, "rewards/accuracies": 1.0, "rewards/generated": -119.84036254882812, "rewards/margins": 122.52653503417969, "rewards/real": 2.686166763305664, "step": 172 }, { "epoch": 0.72, "grad_norm": 63.58691225524518, "learning_rate": 4.5619658119658116e-07, "logits/generated": -2.707376480102539, "logits/oppo_generated": -2.891350746154785, "logits/oppo_real": -3.0990657806396484, "logits/real": -2.544325828552246, "logps/generated": -313.1190490722656, "logps/oppo_gen": -212.02532958984375, "logps/oppo_real": -549.8078002929688, "logps/real": -522.171142578125, "loss": -3.0226, "loss/gen": 0.6081419587135315, "loss/real": -3.838395118713379, "rewards/accuracies": 1.0, "rewards/generated": -101.09373474121094, "rewards/margins": 128.73036193847656, "rewards/real": 27.636632919311523, "step": 173 }, { "epoch": 0.73, "grad_norm": 54.7705752933886, "learning_rate": 4.5584045584045584e-07, "logits/generated": -2.31124210357666, "logits/oppo_generated": -2.861656904220581, "logits/oppo_real": -2.749734878540039, "logits/real": -2.4704031944274902, "logps/generated": -180.6676025390625, "logps/oppo_gen": -52.08341598510742, "logps/oppo_real": -268.2560119628906, "logps/real": -232.18594360351562, "loss": -3.0632, "loss/gen": 0.29419511556625366, "loss/real": -3.9939093589782715, "rewards/accuracies": 1.0, "rewards/generated": -128.5841827392578, "rewards/margins": 164.6542510986328, "rewards/real": 36.070064544677734, "step": 174 }, { "epoch": 0.73, "grad_norm": 96.52861966309526, "learning_rate": 4.5548433048433046e-07, "logits/generated": -2.3479509353637695, "logits/oppo_generated": -2.8331031799316406, "logits/oppo_real": -2.8462958335876465, "logits/real": -2.4217453002929688, "logps/generated": -206.61517333984375, "logps/oppo_gen": -78.92254638671875, "logps/oppo_real": -224.86373901367188, "logps/real": -216.6464385986328, "loss": -3.2511, "loss/gen": 0.3076120615005493, "loss/real": -3.089078903198242, "rewards/accuracies": 1.0, "rewards/generated": -127.69261169433594, "rewards/margins": 135.909912109375, "rewards/real": 8.217292785644531, "step": 175 }, { "epoch": 0.74, "grad_norm": 56.16997780175812, "learning_rate": 4.551282051282051e-07, "logits/generated": -2.3228254318237305, "logits/oppo_generated": -2.879185199737549, "logits/oppo_real": -2.873112678527832, "logits/real": -2.4802536964416504, "logps/generated": -157.61184692382812, "logps/oppo_gen": -49.27460479736328, "logps/oppo_real": -375.43463134765625, "logps/real": -348.29833984375, "loss": -2.954, "loss/gen": 0.6059376001358032, "loss/real": -3.6162662506103516, "rewards/accuracies": 1.0, "rewards/generated": -108.33724212646484, "rewards/margins": 135.4735107421875, "rewards/real": 27.136272430419922, "step": 176 }, { "epoch": 0.74, "grad_norm": 47.747249184508, "learning_rate": 4.5477207977207976e-07, "logits/generated": -2.5688347816467285, "logits/oppo_generated": -3.0462043285369873, "logits/oppo_real": -3.1089582443237305, "logits/real": -2.586811065673828, "logps/generated": -204.5193634033203, "logps/oppo_gen": -77.79332733154297, "logps/oppo_real": -319.2231750488281, "logps/real": -287.8105773925781, "loss": -2.9293, "loss/gen": 0.32107144594192505, "loss/real": -3.8302066326141357, "rewards/accuracies": 1.0, "rewards/generated": -126.72602844238281, "rewards/margins": 158.13864135742188, "rewards/real": 31.412609100341797, "step": 177 }, { "epoch": 0.74, "grad_norm": 56.68338256821835, "learning_rate": 4.544159544159544e-07, "logits/generated": -2.477287530899048, "logits/oppo_generated": -2.815687656402588, "logits/oppo_real": -2.9501237869262695, "logits/real": -2.401658058166504, "logps/generated": -221.77561950683594, "logps/oppo_gen": -103.51431274414062, "logps/oppo_real": -308.8333435058594, "logps/real": -306.259521484375, "loss": -2.9378, "loss/gen": 0.3601089119911194, "loss/real": -3.115224599838257, "rewards/accuracies": 1.0, "rewards/generated": -118.26129913330078, "rewards/margins": 120.83515930175781, "rewards/real": 2.5738563537597656, "step": 178 }, { "epoch": 0.75, "grad_norm": 132.54113453471717, "learning_rate": 4.5405982905982905e-07, "logits/generated": -2.3259053230285645, "logits/oppo_generated": -2.779146194458008, "logits/oppo_real": -2.8336267471313477, "logits/real": -2.3255388736724854, "logps/generated": -202.4166259765625, "logps/oppo_gen": -72.71639251708984, "logps/oppo_real": -196.57557678222656, "logps/real": -192.04830932617188, "loss": -3.1315, "loss/gen": 0.3035447299480438, "loss/real": -3.032370090484619, "rewards/accuracies": 1.0, "rewards/generated": -129.70025634765625, "rewards/margins": 134.2274932861328, "rewards/real": 4.52725076675415, "step": 179 }, { "epoch": 0.75, "grad_norm": 51.83515977656961, "learning_rate": 4.537037037037037e-07, "logits/generated": -2.5221238136291504, "logits/oppo_generated": -2.8425636291503906, "logits/oppo_real": -2.9093685150146484, "logits/real": -2.4757275581359863, "logps/generated": -224.296142578125, "logps/oppo_gen": -95.93893432617188, "logps/oppo_real": -207.11392211914062, "logps/real": -186.50677490234375, "loss": -3.0553, "loss/gen": 0.6323412656784058, "loss/real": -3.4342591762542725, "rewards/accuracies": 1.0, "rewards/generated": -128.35723876953125, "rewards/margins": 148.96438598632812, "rewards/real": 20.607158660888672, "step": 180 }, { "epoch": 0.76, "grad_norm": 57.13147870810802, "learning_rate": 4.533475783475783e-07, "logits/generated": -2.2876157760620117, "logits/oppo_generated": -2.8224010467529297, "logits/oppo_real": -2.778409957885742, "logits/real": -2.416560649871826, "logps/generated": -224.8665313720703, "logps/oppo_gen": -88.16463470458984, "logps/oppo_real": -239.9169921875, "logps/real": -240.70309448242188, "loss": -3.0029, "loss/gen": 0.25222891569137573, "loss/real": -3.012547016143799, "rewards/accuracies": 1.0, "rewards/generated": -136.701904296875, "rewards/margins": 135.91583251953125, "rewards/real": -0.7860813140869141, "step": 181 }, { "epoch": 0.76, "grad_norm": 89.10157585762724, "learning_rate": 4.5299145299145297e-07, "logits/generated": -2.4986462593078613, "logits/oppo_generated": -2.9657952785491943, "logits/oppo_real": -2.9425137042999268, "logits/real": -2.509366035461426, "logps/generated": -201.0703125, "logps/oppo_gen": -76.42547607421875, "logps/oppo_real": -261.8043518066406, "logps/real": -241.64723205566406, "loss": -3.1181, "loss/gen": 0.3136594295501709, "loss/real": -3.37326717376709, "rewards/accuracies": 1.0, "rewards/generated": -124.64483642578125, "rewards/margins": 144.8019561767578, "rewards/real": 20.157115936279297, "step": 182 }, { "epoch": 0.77, "grad_norm": 59.75890689365896, "learning_rate": 4.5263532763532765e-07, "logits/generated": -1.9646540880203247, "logits/oppo_generated": -2.6656646728515625, "logits/oppo_real": -2.512063980102539, "logits/real": -2.13295316696167, "logps/generated": -146.4997100830078, "logps/oppo_gen": -61.16596603393555, "logps/oppo_real": -89.70797729492188, "logps/real": -67.80735778808594, "loss": -3.1443, "loss/gen": 0.8715238571166992, "loss/real": -3.4107680320739746, "rewards/accuracies": 1.0, "rewards/generated": -85.333740234375, "rewards/margins": 107.23435974121094, "rewards/real": 21.90062141418457, "step": 183 }, { "epoch": 0.77, "grad_norm": 9260.308850141015, "learning_rate": 4.522792022792022e-07, "logits/generated": -2.074495792388916, "logits/oppo_generated": -2.679591655731201, "logits/oppo_real": -2.5152084827423096, "logits/real": -2.2176570892333984, "logps/generated": -301.60516357421875, "logps/oppo_gen": -134.39280700683594, "logps/oppo_real": -353.8466491699219, "logps/real": -354.19549560546875, "loss": -41.3972, "loss/gen": 0.28073543310165405, "loss/real": -2.867943525314331, "rewards/accuracies": 1.0, "rewards/generated": -167.2123565673828, "rewards/margins": 166.86349487304688, "rewards/real": -0.34885168075561523, "step": 184 }, { "epoch": 0.77, "grad_norm": 57.11031975962729, "learning_rate": 4.519230769230769e-07, "logits/generated": -2.515873908996582, "logits/oppo_generated": -2.8852622509002686, "logits/oppo_real": -2.9888343811035156, "logits/real": -2.4470033645629883, "logps/generated": -221.6116485595703, "logps/oppo_gen": -86.57408142089844, "logps/oppo_real": -353.78594970703125, "logps/real": -337.73291015625, "loss": -3.0984, "loss/gen": 0.3061344623565674, "loss/real": -3.636414051055908, "rewards/accuracies": 1.0, "rewards/generated": -135.03756713867188, "rewards/margins": 151.0906219482422, "rewards/real": 16.053056716918945, "step": 185 }, { "epoch": 0.78, "grad_norm": 242.2639153640096, "learning_rate": 4.5156695156695157e-07, "logits/generated": -2.5487115383148193, "logits/oppo_generated": -2.894904136657715, "logits/oppo_real": -2.8833250999450684, "logits/real": -2.4917829036712646, "logps/generated": -208.41036987304688, "logps/oppo_gen": -97.552490234375, "logps/oppo_real": -446.60357666015625, "logps/real": -420.88616943359375, "loss": -3.9814, "loss/gen": 0.4838281273841858, "loss/real": -3.5608205795288086, "rewards/accuracies": 1.0, "rewards/generated": -110.85787963867188, "rewards/margins": 136.57525634765625, "rewards/real": 25.71738052368164, "step": 186 }, { "epoch": 0.78, "grad_norm": 61.190406675725114, "learning_rate": 4.512108262108262e-07, "logits/generated": -2.471825122833252, "logits/oppo_generated": -2.9238195419311523, "logits/oppo_real": -2.928109645843506, "logits/real": -2.5521817207336426, "logps/generated": -248.10580444335938, "logps/oppo_gen": -99.34373474121094, "logps/oppo_real": -381.1275634765625, "logps/real": -371.27154541015625, "loss": -3.1159, "loss/gen": 0.34282225370407104, "loss/real": -3.2633209228515625, "rewards/accuracies": 1.0, "rewards/generated": -148.7620849609375, "rewards/margins": 158.61810302734375, "rewards/real": 9.856016159057617, "step": 187 }, { "epoch": 0.79, "grad_norm": 59.89950894519086, "learning_rate": 4.5085470085470087e-07, "logits/generated": -2.162811279296875, "logits/oppo_generated": -2.7080626487731934, "logits/oppo_real": -2.5767087936401367, "logits/real": -2.30268931388855, "logps/generated": -199.12115478515625, "logps/oppo_gen": -46.502037048339844, "logps/oppo_real": -149.05059814453125, "logps/real": -149.08099365234375, "loss": -2.8636, "loss/gen": 0.46407026052474976, "loss/real": -2.982802391052246, "rewards/accuracies": 1.0, "rewards/generated": -152.61912536621094, "rewards/margins": 152.58872985839844, "rewards/real": -0.030394554138183594, "step": 188 }, { "epoch": 0.79, "grad_norm": 63.384206370258354, "learning_rate": 4.5049857549857543e-07, "logits/generated": -2.5746748447418213, "logits/oppo_generated": -2.9217922687530518, "logits/oppo_real": -3.0358145236968994, "logits/real": -2.5657949447631836, "logps/generated": -201.57012939453125, "logps/oppo_gen": -72.13301849365234, "logps/oppo_real": -295.51861572265625, "logps/real": -298.053955078125, "loss": -2.9233, "loss/gen": 0.28429698944091797, "loss/real": -3.108995199203491, "rewards/accuracies": 1.0, "rewards/generated": -129.4371337890625, "rewards/margins": 126.90178680419922, "rewards/real": -2.5353341102600098, "step": 189 }, { "epoch": 0.79, "grad_norm": 94.99060911644108, "learning_rate": 4.501424501424501e-07, "logits/generated": -2.3787384033203125, "logits/oppo_generated": -2.7406344413757324, "logits/oppo_real": -2.799593925476074, "logits/real": -2.3933238983154297, "logps/generated": -222.70245361328125, "logps/oppo_gen": -102.60955810546875, "logps/oppo_real": -305.8299255371094, "logps/real": -273.30157470703125, "loss": -3.2776, "loss/gen": 0.36470329761505127, "loss/real": -3.8918604850769043, "rewards/accuracies": 1.0, "rewards/generated": -120.0928955078125, "rewards/margins": 152.62124633789062, "rewards/real": 32.52833938598633, "step": 190 }, { "epoch": 0.8, "grad_norm": 350.9167124297445, "learning_rate": 4.497863247863248e-07, "logits/generated": -2.6413869857788086, "logits/oppo_generated": -2.8220396041870117, "logits/oppo_real": -3.0663821697235107, "logits/real": -2.490924596786499, "logps/generated": -178.68812561035156, "logps/oppo_gen": -80.95722961425781, "logps/oppo_real": -339.0364074707031, "logps/real": -321.1693115234375, "loss": -3.7641, "loss/gen": 0.9792780876159668, "loss/real": -3.2799935340881348, "rewards/accuracies": 1.0, "rewards/generated": -97.73089599609375, "rewards/margins": 115.59801483154297, "rewards/real": 17.867115020751953, "step": 191 }, { "epoch": 0.8, "grad_norm": 568.1687689410603, "learning_rate": 4.494301994301994e-07, "logits/generated": -2.53959321975708, "logits/oppo_generated": -2.8528313636779785, "logits/oppo_real": -2.9469070434570312, "logits/real": -2.4770290851593018, "logps/generated": -186.40338134765625, "logps/oppo_gen": -55.95906066894531, "logps/oppo_real": -228.37322998046875, "logps/real": -216.5171661376953, "loss": -4.4415, "loss/gen": 0.2824003994464874, "loss/real": -3.352588653564453, "rewards/accuracies": 1.0, "rewards/generated": -130.44430541992188, "rewards/margins": 142.30039978027344, "rewards/real": 11.856078147888184, "step": 192 }, { "epoch": 0.81, "grad_norm": 82.08388787354983, "learning_rate": 4.4907407407407403e-07, "logits/generated": -2.3993959426879883, "logits/oppo_generated": -2.759657859802246, "logits/oppo_real": -2.7739434242248535, "logits/real": -2.4300918579101562, "logps/generated": -195.13946533203125, "logps/oppo_gen": -55.900001525878906, "logps/oppo_real": -240.51673889160156, "logps/real": -254.91921997070312, "loss": -3.0531, "loss/gen": 0.31808772683143616, "loss/real": -2.974119186401367, "rewards/accuracies": 0.875, "rewards/generated": -139.23947143554688, "rewards/margins": 124.83699035644531, "rewards/real": -14.402481079101562, "step": 193 }, { "epoch": 0.81, "grad_norm": 59.35182524704906, "learning_rate": 4.487179487179487e-07, "logits/generated": -2.422110080718994, "logits/oppo_generated": -2.714049816131592, "logits/oppo_real": -2.821863889694214, "logits/real": -2.3553073406219482, "logps/generated": -191.3336181640625, "logps/oppo_gen": -61.66150665283203, "logps/oppo_real": -281.81561279296875, "logps/real": -268.0882873535156, "loss": -2.9157, "loss/gen": 0.2871388792991638, "loss/real": -3.4819259643554688, "rewards/accuracies": 1.0, "rewards/generated": -129.67208862304688, "rewards/margins": 143.39939880371094, "rewards/real": 13.727313995361328, "step": 194 }, { "epoch": 0.82, "grad_norm": 61.67208283154745, "learning_rate": 4.4836182336182333e-07, "logits/generated": -2.2326247692108154, "logits/oppo_generated": -2.7336645126342773, "logits/oppo_real": -2.6636435985565186, "logits/real": -2.38464617729187, "logps/generated": -191.60995483398438, "logps/oppo_gen": -66.04891204833984, "logps/oppo_real": -343.6158447265625, "logps/real": -311.3708801269531, "loss": -3.0524, "loss/gen": 0.30880045890808105, "loss/real": -4.188716888427734, "rewards/accuracies": 1.0, "rewards/generated": -125.56105041503906, "rewards/margins": 157.8060302734375, "rewards/real": 32.2449836730957, "step": 195 }, { "epoch": 0.82, "grad_norm": 79.39509465129127, "learning_rate": 4.48005698005698e-07, "logits/generated": -2.459395408630371, "logits/oppo_generated": -3.0542874336242676, "logits/oppo_real": -2.803119659423828, "logits/real": -2.7458314895629883, "logps/generated": -202.55088806152344, "logps/oppo_gen": -81.553955078125, "logps/oppo_real": -376.17071533203125, "logps/real": -320.265869140625, "loss": -3.204, "loss/gen": 0.3818941116333008, "loss/real": -5.556370735168457, "rewards/accuracies": 1.0, "rewards/generated": -120.99693298339844, "rewards/margins": 176.90176391601562, "rewards/real": 55.90484619140625, "step": 196 }, { "epoch": 0.82, "grad_norm": 64.34859516119019, "learning_rate": 4.476495726495726e-07, "logits/generated": -2.551626205444336, "logits/oppo_generated": -2.791293144226074, "logits/oppo_real": -2.8689441680908203, "logits/real": -2.4949615001678467, "logps/generated": -214.268310546875, "logps/oppo_gen": -90.10079956054688, "logps/oppo_real": -387.6597900390625, "logps/real": -355.855712890625, "loss": -2.9635, "loss/gen": 0.7095820903778076, "loss/real": -3.8604226112365723, "rewards/accuracies": 1.0, "rewards/generated": -124.16752624511719, "rewards/margins": 155.97164916992188, "rewards/real": 31.804113388061523, "step": 197 }, { "epoch": 0.83, "grad_norm": 55.47510967063292, "learning_rate": 4.4729344729344725e-07, "logits/generated": -2.5076422691345215, "logits/oppo_generated": -2.8356850147247314, "logits/oppo_real": -2.917833089828491, "logits/real": -2.4860076904296875, "logps/generated": -208.80723571777344, "logps/oppo_gen": -76.40264892578125, "logps/oppo_real": -278.172607421875, "logps/real": -251.12782287597656, "loss": -3.1359, "loss/gen": 0.3080252408981323, "loss/real": -3.7138681411743164, "rewards/accuracies": 1.0, "rewards/generated": -132.4045867919922, "rewards/margins": 159.44937133789062, "rewards/real": 27.04478645324707, "step": 198 }, { "epoch": 0.83, "grad_norm": 55.47510967063292, "learning_rate": 4.4729344729344725e-07, "logits/generated": -2.7056777477264404, "logits/oppo_generated": -3.0011539459228516, "logits/oppo_real": -3.069876194000244, "logits/real": -2.696037530899048, "logps/generated": -191.9580078125, "logps/oppo_gen": -69.13575744628906, "logps/oppo_real": -340.70343017578125, "logps/real": -328.8914794921875, "loss": -1322.4869, "loss/gen": 0.4668487310409546, "loss/real": -3.3937647342681885, "rewards/accuracies": 1.0, "rewards/generated": -122.82225036621094, "rewards/margins": 134.6342010498047, "rewards/real": 11.811951637268066, "step": 199 }, { "epoch": 0.84, "grad_norm": 76.39824855854027, "learning_rate": 4.469373219373219e-07, "logits/generated": -2.6483988761901855, "logits/oppo_generated": -2.821411609649658, "logits/oppo_real": -2.9697532653808594, "logits/real": -2.5232529640197754, "logps/generated": -208.79844665527344, "logps/oppo_gen": -94.25292205810547, "logps/oppo_real": -449.1705322265625, "logps/real": -411.73590087890625, "loss": -3.1662, "loss/gen": 0.6144514083862305, "loss/real": -4.066596984863281, "rewards/accuracies": 1.0, "rewards/generated": -114.54552459716797, "rewards/margins": 151.98013305664062, "rewards/real": 37.434608459472656, "step": 200 }, { "epoch": 0.84, "grad_norm": 80.08028198314489, "learning_rate": 4.465811965811966e-07, "logits/generated": -2.521160125732422, "logits/oppo_generated": -2.9498441219329834, "logits/oppo_real": -2.889374017715454, "logits/real": -2.6443803310394287, "logps/generated": -218.7113037109375, "logps/oppo_gen": -93.28401184082031, "logps/oppo_real": -446.9027099609375, "logps/real": -420.687744140625, "loss": -3.1837, "loss/gen": 0.3799129128456116, "loss/real": -3.5870652198791504, "rewards/accuracies": 1.0, "rewards/generated": -125.42729187011719, "rewards/margins": 151.6422119140625, "rewards/real": 26.214933395385742, "step": 201 }, { "epoch": 0.85, "grad_norm": 85.4263034452065, "learning_rate": 4.4622507122507117e-07, "logits/generated": -2.0849194526672363, "logits/oppo_generated": -2.5877699851989746, "logits/oppo_real": -2.4145617485046387, "logits/real": -2.3253278732299805, "logps/generated": -175.31253051757812, "logps/oppo_gen": -58.147544860839844, "logps/oppo_real": -256.63494873046875, "logps/real": -251.91702270507812, "loss": -3.1424, "loss/gen": 0.633264422416687, "loss/real": -2.9826745986938477, "rewards/accuracies": 1.0, "rewards/generated": -117.16497802734375, "rewards/margins": 121.88292694091797, "rewards/real": 4.7179460525512695, "step": 202 }, { "epoch": 0.85, "grad_norm": 61.56183771851885, "learning_rate": 4.4586894586894584e-07, "logits/generated": -2.5706653594970703, "logits/oppo_generated": -2.825096607208252, "logits/oppo_real": -2.919394016265869, "logits/real": -2.5202863216400146, "logps/generated": -173.32174682617188, "logps/oppo_gen": -62.71122360229492, "logps/oppo_real": -234.44354248046875, "logps/real": -211.79794311523438, "loss": -3.0867, "loss/gen": 0.7330012321472168, "loss/real": -3.4876270294189453, "rewards/accuracies": 1.0, "rewards/generated": -110.61052703857422, "rewards/margins": 133.25613403320312, "rewards/real": 22.645606994628906, "step": 203 }, { "epoch": 0.85, "grad_norm": 55.0575072225623, "learning_rate": 4.455128205128205e-07, "logits/generated": -2.589749336242676, "logits/oppo_generated": -2.681910276412964, "logits/oppo_real": -2.8930723667144775, "logits/real": -2.393826484680176, "logps/generated": -184.09539794921875, "logps/oppo_gen": -69.35714721679688, "logps/oppo_real": -321.68878173828125, "logps/real": -300.22052001953125, "loss": -2.9885, "loss/gen": 0.6509556770324707, "loss/real": -3.5244717597961426, "rewards/accuracies": 1.0, "rewards/generated": -114.73826599121094, "rewards/margins": 136.20651245117188, "rewards/real": 21.46826171875, "step": 204 }, { "epoch": 0.86, "grad_norm": 205.9144144875487, "learning_rate": 4.4515669515669514e-07, "logits/generated": -2.5176801681518555, "logits/oppo_generated": -2.910146951675415, "logits/oppo_real": -2.842686653137207, "logits/real": -2.6160125732421875, "logps/generated": -192.1990966796875, "logps/oppo_gen": -55.29602813720703, "logps/oppo_real": -188.457763671875, "logps/real": -170.13516235351562, "loss": -3.8694, "loss/gen": 0.29164981842041016, "loss/real": -3.5206615924835205, "rewards/accuracies": 1.0, "rewards/generated": -136.903076171875, "rewards/margins": 155.22567749023438, "rewards/real": 18.32259750366211, "step": 205 }, { "epoch": 0.86, "grad_norm": 72.13692086277665, "learning_rate": 4.448005698005698e-07, "logits/generated": -2.6976194381713867, "logits/oppo_generated": -2.9482345581054688, "logits/oppo_real": -3.0109448432922363, "logits/real": -2.652653455734253, "logps/generated": -196.82077026367188, "logps/oppo_gen": -70.6409912109375, "logps/oppo_real": -375.189697265625, "logps/real": -351.4737243652344, "loss": -3.0215, "loss/gen": 0.35152187943458557, "loss/real": -3.49497127532959, "rewards/accuracies": 1.0, "rewards/generated": -126.1797866821289, "rewards/margins": 149.89573669433594, "rewards/real": 23.7159423828125, "step": 206 }, { "epoch": 0.87, "grad_norm": 61.61398545074811, "learning_rate": 4.444444444444444e-07, "logits/generated": -2.5743110179901123, "logits/oppo_generated": -2.7811834812164307, "logits/oppo_real": -2.923962116241455, "logits/real": -2.454921245574951, "logps/generated": -196.2818603515625, "logps/oppo_gen": -71.71026611328125, "logps/oppo_real": -353.846923828125, "logps/real": -345.35919189453125, "loss": -3.0779, "loss/gen": 0.32238900661468506, "loss/real": -3.2966091632843018, "rewards/accuracies": 1.0, "rewards/generated": -124.57159423828125, "rewards/margins": 133.05931091308594, "rewards/real": 8.487724304199219, "step": 207 }, { "epoch": 0.87, "grad_norm": 57.15490447432092, "learning_rate": 4.4408831908831906e-07, "logits/generated": -2.782914161682129, "logits/oppo_generated": -2.8043360710144043, "logits/oppo_real": -3.0211949348449707, "logits/real": -2.5231986045837402, "logps/generated": -194.0042724609375, "logps/oppo_gen": -77.71004486083984, "logps/oppo_real": -389.77301025390625, "logps/real": -358.2027282714844, "loss": -3.0323, "loss/gen": 0.423714816570282, "loss/real": -3.7564921379089355, "rewards/accuracies": 1.0, "rewards/generated": -116.29424285888672, "rewards/margins": 147.8645477294922, "rewards/real": 31.570310592651367, "step": 208 }, { "epoch": 0.87, "grad_norm": 59.74080162590932, "learning_rate": 4.4373219373219373e-07, "logits/generated": -2.357008934020996, "logits/oppo_generated": -2.7760987281799316, "logits/oppo_real": -2.740163803100586, "logits/real": -2.439347505569458, "logps/generated": -205.67852783203125, "logps/oppo_gen": -88.69313049316406, "logps/oppo_real": -338.8006591796875, "logps/real": -314.58441162109375, "loss": -3.0479, "loss/gen": 0.4672941565513611, "loss/real": -3.506350040435791, "rewards/accuracies": 1.0, "rewards/generated": -116.98541259765625, "rewards/margins": 141.20166015625, "rewards/real": 24.21624755859375, "step": 209 }, { "epoch": 0.88, "grad_norm": 1848.1626687064102, "learning_rate": 4.4337606837606836e-07, "logits/generated": -2.4638514518737793, "logits/oppo_generated": -2.7127938270568848, "logits/oppo_real": -2.803234577178955, "logits/real": -2.38244366645813, "logps/generated": -237.40353393554688, "logps/oppo_gen": -85.75541687011719, "logps/oppo_real": -242.4071807861328, "logps/real": -237.742431640625, "loss": -5.9552, "loss/gen": 0.2063872367143631, "loss/real": -3.0251030921936035, "rewards/accuracies": 1.0, "rewards/generated": -151.64813232421875, "rewards/margins": 156.31288146972656, "rewards/real": 4.664756774902344, "step": 210 }, { "epoch": 0.88, "grad_norm": 400.5685997741211, "learning_rate": 4.43019943019943e-07, "logits/generated": -2.5298070907592773, "logits/oppo_generated": -2.995426654815674, "logits/oppo_real": -2.8803281784057617, "logits/real": -2.6857643127441406, "logps/generated": -184.82400512695312, "logps/oppo_gen": -68.82854461669922, "logps/oppo_real": -337.844482421875, "logps/real": -296.872314453125, "loss": -4.4013, "loss/gen": 0.568313717842102, "loss/real": -4.289045333862305, "rewards/accuracies": 1.0, "rewards/generated": -115.99545288085938, "rewards/margins": 156.96762084960938, "rewards/real": 40.97218322753906, "step": 211 }, { "epoch": 0.89, "grad_norm": 40892.68061646241, "learning_rate": 4.4266381766381765e-07, "logits/generated": -2.705117702484131, "logits/oppo_generated": -2.6126418113708496, "logits/oppo_real": -3.0222294330596924, "logits/real": -2.3141441345214844, "logps/generated": -171.36810302734375, "logps/oppo_gen": -56.36054992675781, "logps/oppo_real": -325.3075256347656, "logps/real": -308.48114013671875, "loss": -129.61, "loss/gen": 0.39068758487701416, "loss/real": -3.74429988861084, "rewards/accuracies": 1.0, "rewards/generated": -115.00755310058594, "rewards/margins": 131.83392333984375, "rewards/real": 16.826370239257812, "step": 212 }, { "epoch": 0.89, "grad_norm": 3590.9546084572, "learning_rate": 4.423076923076923e-07, "logits/generated": -2.696446418762207, "logits/oppo_generated": -3.026592254638672, "logits/oppo_real": -2.9974026679992676, "logits/real": -2.68188214302063, "logps/generated": -209.5288543701172, "logps/oppo_gen": -81.62860107421875, "logps/oppo_real": -354.01513671875, "logps/real": -335.030517578125, "loss": -13.5441, "loss/gen": 0.3259393572807312, "loss/real": -3.325887680053711, "rewards/accuracies": 1.0, "rewards/generated": -127.90025329589844, "rewards/margins": 146.8848876953125, "rewards/real": 18.9846248626709, "step": 213 }, { "epoch": 0.9, "grad_norm": 62.96425038203723, "learning_rate": 4.4195156695156695e-07, "logits/generated": -2.4695000648498535, "logits/oppo_generated": -2.86299991607666, "logits/oppo_real": -2.897392749786377, "logits/real": -2.52815580368042, "logps/generated": -159.04405212402344, "logps/oppo_gen": -55.654396057128906, "logps/oppo_real": -286.4037170410156, "logps/real": -273.60174560546875, "loss": -3.259, "loss/gen": 0.6177021265029907, "loss/real": -3.30937123298645, "rewards/accuracies": 1.0, "rewards/generated": -103.38966369628906, "rewards/margins": 116.19161224365234, "rewards/real": 12.801952362060547, "step": 214 }, { "epoch": 0.9, "grad_norm": 57.797468485057806, "learning_rate": 4.4159544159544157e-07, "logits/generated": -2.4351806640625, "logits/oppo_generated": -2.8678367137908936, "logits/oppo_real": -2.797013759613037, "logits/real": -2.479971408843994, "logps/generated": -265.5663757324219, "logps/oppo_gen": -154.916748046875, "logps/oppo_real": -268.4582824707031, "logps/real": -245.14251708984375, "loss": -3.1633, "loss/gen": 0.5528236627578735, "loss/real": -3.513732433319092, "rewards/accuracies": 1.0, "rewards/generated": -110.6496353149414, "rewards/margins": 133.96539306640625, "rewards/real": 23.315759658813477, "step": 215 }, { "epoch": 0.9, "grad_norm": 80.14004161173895, "learning_rate": 4.412393162393162e-07, "logits/generated": -2.6762161254882812, "logits/oppo_generated": -2.879833221435547, "logits/oppo_real": -3.0112786293029785, "logits/real": -2.480961799621582, "logps/generated": -193.92112731933594, "logps/oppo_gen": -96.10844421386719, "logps/oppo_real": -492.59039306640625, "logps/real": -480.1392517089844, "loss": -2.9444, "loss/gen": 1.0747777223587036, "loss/real": -3.2505507469177246, "rewards/accuracies": 0.875, "rewards/generated": -97.81267547607422, "rewards/margins": 110.26382446289062, "rewards/real": 12.45114803314209, "step": 216 }, { "epoch": 0.91, "grad_norm": 27165.010436221077, "learning_rate": 4.4088319088319087e-07, "logits/generated": -2.661689281463623, "logits/oppo_generated": -2.855457305908203, "logits/oppo_real": -3.161579132080078, "logits/real": -2.475346565246582, "logps/generated": -173.70448303222656, "logps/oppo_gen": -79.04156494140625, "logps/oppo_real": -508.73779296875, "logps/real": -485.790283203125, "loss": -63.2568, "loss/gen": 0.8462377786636353, "loss/real": -3.7985730171203613, "rewards/accuracies": 0.875, "rewards/generated": -94.66291809082031, "rewards/margins": 117.61046600341797, "rewards/real": 22.947547912597656, "step": 217 }, { "epoch": 0.91, "grad_norm": 79.71609474703807, "learning_rate": 4.4052706552706555e-07, "logits/generated": -2.6024856567382812, "logits/oppo_generated": -2.8270015716552734, "logits/oppo_real": -2.9884450435638428, "logits/real": -2.381761074066162, "logps/generated": -193.1739959716797, "logps/oppo_gen": -79.96229553222656, "logps/oppo_real": -295.296630859375, "logps/real": -280.0252685546875, "loss": -3.1061, "loss/gen": 0.8399382829666138, "loss/real": -3.309168815612793, "rewards/accuracies": 0.875, "rewards/generated": -113.2116928100586, "rewards/margins": 128.48306274414062, "rewards/real": 15.27135944366455, "step": 218 }, { "epoch": 0.92, "grad_norm": 117.95946155486003, "learning_rate": 4.4017094017094017e-07, "logits/generated": -2.274564266204834, "logits/oppo_generated": -2.7040886878967285, "logits/oppo_real": -2.816561698913574, "logits/real": -2.2658865451812744, "logps/generated": -187.32923889160156, "logps/oppo_gen": -55.71031188964844, "logps/oppo_real": -202.95962524414062, "logps/real": -166.00265502929688, "loss": -3.2002, "loss/gen": 0.33614322543144226, "loss/real": -4.0761213302612305, "rewards/accuracies": 1.0, "rewards/generated": -131.61892700195312, "rewards/margins": 168.57589721679688, "rewards/real": 36.956966400146484, "step": 219 }, { "epoch": 0.92, "grad_norm": 88.19558620351276, "learning_rate": 4.398148148148148e-07, "logits/generated": -1.9364006519317627, "logits/oppo_generated": -2.385345458984375, "logits/oppo_real": -2.4835422039031982, "logits/real": -1.808083415031433, "logps/generated": -179.90750122070312, "logps/oppo_gen": -75.58077239990234, "logps/oppo_real": -339.3034973144531, "logps/real": -272.0712585449219, "loss": -3.2605, "loss/gen": 0.8947268724441528, "loss/real": -8.812257766723633, "rewards/accuracies": 1.0, "rewards/generated": -104.32673645019531, "rewards/margins": 171.55897521972656, "rewards/real": 67.23223876953125, "step": 220 }, { "epoch": 0.92, "grad_norm": 5908.862834759325, "learning_rate": 4.394586894586894e-07, "logits/generated": -2.601797580718994, "logits/oppo_generated": -3.011491060256958, "logits/oppo_real": -3.0487937927246094, "logits/real": -2.562220573425293, "logps/generated": -246.65899658203125, "logps/oppo_gen": -131.22396850585938, "logps/oppo_real": -400.33868408203125, "logps/real": -376.3664245605469, "loss": -13.8663, "loss/gen": 0.42373475432395935, "loss/real": -3.5093624591827393, "rewards/accuracies": 1.0, "rewards/generated": -115.43504333496094, "rewards/margins": 139.40728759765625, "rewards/real": 23.972253799438477, "step": 221 }, { "epoch": 0.93, "grad_norm": 95.4678242590141, "learning_rate": 4.391025641025641e-07, "logits/generated": -2.3840436935424805, "logits/oppo_generated": -2.755108118057251, "logits/oppo_real": -2.8694067001342773, "logits/real": -2.2530529499053955, "logps/generated": -177.37356567382812, "logps/oppo_gen": -61.73572540283203, "logps/oppo_real": -230.838134765625, "logps/real": -215.96424865722656, "loss": -3.0607, "loss/gen": 0.6986711025238037, "loss/real": -3.6220858097076416, "rewards/accuracies": 0.875, "rewards/generated": -115.63784790039062, "rewards/margins": 130.51173400878906, "rewards/real": 14.873891830444336, "step": 222 }, { "epoch": 0.93, "grad_norm": 87.51268602233479, "learning_rate": 4.3874643874643876e-07, "logits/generated": -2.3197760581970215, "logits/oppo_generated": -2.8574419021606445, "logits/oppo_real": -2.923137903213501, "logits/real": -2.3333818912506104, "logps/generated": -231.46023559570312, "logps/oppo_gen": -82.77210998535156, "logps/oppo_real": -252.58892822265625, "logps/real": -270.0004577636719, "loss": -2.98, "loss/gen": 0.21608535945415497, "loss/real": -2.6946630477905273, "rewards/accuracies": 0.875, "rewards/generated": -148.6881103515625, "rewards/margins": 131.2765655517578, "rewards/real": -17.411537170410156, "step": 223 }, { "epoch": 0.94, "grad_norm": 87.51268602233479, "learning_rate": 4.3874643874643876e-07, "logits/generated": -2.242218017578125, "logits/oppo_generated": -2.994565010070801, "logits/oppo_real": -2.8149280548095703, "logits/real": -2.4609484672546387, "logps/generated": -181.10708618164062, "logps/oppo_gen": -48.2861213684082, "logps/oppo_real": -137.37625122070312, "logps/real": -162.7267608642578, "loss": -7932.2959, "loss/gen": 0.7526332139968872, "loss/real": -2.542538642883301, "rewards/accuracies": 0.875, "rewards/generated": -132.82098388671875, "rewards/margins": 107.47045135498047, "rewards/real": -25.350521087646484, "step": 224 }, { "epoch": 0.94, "grad_norm": 67.6441819490967, "learning_rate": 4.3839031339031333e-07, "logits/generated": -2.315286636352539, "logits/oppo_generated": -2.816603422164917, "logits/oppo_real": -2.9343314170837402, "logits/real": -2.3111538887023926, "logps/generated": -155.87979125976562, "logps/oppo_gen": -30.44548988342285, "logps/oppo_real": -174.9966278076172, "logps/real": -169.0306854248047, "loss": -2.8919, "loss/gen": 0.31436973810195923, "loss/real": -3.049879550933838, "rewards/accuracies": 1.0, "rewards/generated": -125.43431091308594, "rewards/margins": 131.40023803710938, "rewards/real": 5.965947151184082, "step": 225 }, { "epoch": 0.95, "grad_norm": 61.01237787454485, "learning_rate": 4.38034188034188e-07, "logits/generated": -2.3058667182922363, "logits/oppo_generated": -2.6415185928344727, "logits/oppo_real": -3.0115818977355957, "logits/real": -2.004304885864258, "logps/generated": -215.7899627685547, "logps/oppo_gen": -93.466064453125, "logps/oppo_real": -340.529296875, "logps/real": -337.6749572753906, "loss": -3.1087, "loss/gen": 0.5611802339553833, "loss/real": -2.9437613487243652, "rewards/accuracies": 1.0, "rewards/generated": -122.32388305664062, "rewards/margins": 125.1782455444336, "rewards/real": 2.8543548583984375, "step": 226 }, { "epoch": 0.95, "grad_norm": 68.83477018148241, "learning_rate": 4.376780626780627e-07, "logits/generated": -2.3567757606506348, "logits/oppo_generated": -2.7984108924865723, "logits/oppo_real": -2.9754528999328613, "logits/real": -2.275757312774658, "logps/generated": -208.89105224609375, "logps/oppo_gen": -69.67858123779297, "logps/oppo_real": -268.7974853515625, "logps/real": -266.00445556640625, "loss": -3.1351, "loss/gen": 0.24598746001720428, "loss/real": -3.0755763053894043, "rewards/accuracies": 1.0, "rewards/generated": -139.21246337890625, "rewards/margins": 142.00546264648438, "rewards/real": 2.7930030822753906, "step": 227 }, { "epoch": 0.95, "grad_norm": 103.64887767723296, "learning_rate": 4.373219373219373e-07, "logits/generated": -2.2009589672088623, "logits/oppo_generated": -2.7994847297668457, "logits/oppo_real": -2.687981605529785, "logits/real": -2.264253616333008, "logps/generated": -200.60202026367188, "logps/oppo_gen": -76.17577362060547, "logps/oppo_real": -381.5020751953125, "logps/real": -350.08245849609375, "loss": -3.0609, "loss/gen": 0.34163713455200195, "loss/real": -4.447661399841309, "rewards/accuracies": 1.0, "rewards/generated": -124.42623138427734, "rewards/margins": 155.8458251953125, "rewards/real": 31.419601440429688, "step": 228 }, { "epoch": 0.96, "grad_norm": 4236.734355609282, "learning_rate": 4.3696581196581193e-07, "logits/generated": -2.313900947570801, "logits/oppo_generated": -2.8429031372070312, "logits/oppo_real": -3.0224597454071045, "logits/real": -2.2679154872894287, "logps/generated": -197.47596740722656, "logps/oppo_gen": -78.5534439086914, "logps/oppo_real": -246.5026397705078, "logps/real": -232.70751953125, "loss": -7.8903, "loss/gen": 0.3893076479434967, "loss/real": -3.408906936645508, "rewards/accuracies": 1.0, "rewards/generated": -118.92252349853516, "rewards/margins": 132.71763610839844, "rewards/real": 13.795119285583496, "step": 229 }, { "epoch": 0.96, "grad_norm": 65.91049876655653, "learning_rate": 4.366096866096866e-07, "logits/generated": -2.122530221939087, "logits/oppo_generated": -2.5529236793518066, "logits/oppo_real": -2.7146146297454834, "logits/real": -1.90482759475708, "logps/generated": -217.27114868164062, "logps/oppo_gen": -79.70944213867188, "logps/oppo_real": -106.01055145263672, "logps/real": -128.87461853027344, "loss": -3.0589, "loss/gen": 0.2452090084552765, "loss/real": -2.452601909637451, "rewards/accuracies": 1.0, "rewards/generated": -137.56170654296875, "rewards/margins": 114.6976547241211, "rewards/real": -22.864065170288086, "step": 230 }, { "epoch": 0.97, "grad_norm": 100.04292047666341, "learning_rate": 4.362535612535612e-07, "logits/generated": -1.987313985824585, "logits/oppo_generated": -2.5894346237182617, "logits/oppo_real": -2.6849865913391113, "logits/real": -2.029129981994629, "logps/generated": -221.10101318359375, "logps/oppo_gen": -67.09019470214844, "logps/oppo_real": -256.4427185058594, "logps/real": -237.23843383789062, "loss": -3.213, "loss/gen": 0.4131355285644531, "loss/real": -3.562318801879883, "rewards/accuracies": 1.0, "rewards/generated": -154.0108184814453, "rewards/margins": 173.215087890625, "rewards/real": 19.204273223876953, "step": 231 }, { "epoch": 0.97, "grad_norm": 79.75284391970766, "learning_rate": 4.358974358974359e-07, "logits/generated": -2.341341495513916, "logits/oppo_generated": -2.959817886352539, "logits/oppo_real": -2.9362192153930664, "logits/real": -2.416731119155884, "logps/generated": -216.04949951171875, "logps/oppo_gen": -82.48292541503906, "logps/oppo_real": -458.88818359375, "logps/real": -485.8726806640625, "loss": -2.8748, "loss/gen": 0.28640565276145935, "loss/real": -2.7164149284362793, "rewards/accuracies": 0.75, "rewards/generated": -133.5665740966797, "rewards/margins": 106.58207702636719, "rewards/real": -26.9844970703125, "step": 232 }, { "epoch": 0.97, "grad_norm": 263.329314974615, "learning_rate": 4.355413105413105e-07, "logits/generated": -2.236466884613037, "logits/oppo_generated": -2.7284858226776123, "logits/oppo_real": -2.8326492309570312, "logits/real": -2.176626205444336, "logps/generated": -187.85845947265625, "logps/oppo_gen": -60.89936828613281, "logps/oppo_real": -245.58233642578125, "logps/real": -235.84445190429688, "loss": -3.2515, "loss/gen": 0.3334371745586395, "loss/real": -3.059086322784424, "rewards/accuracies": 1.0, "rewards/generated": -126.9590835571289, "rewards/margins": 136.69699096679688, "rewards/real": 9.737905502319336, "step": 233 }, { "epoch": 0.98, "grad_norm": 130.99742811168088, "learning_rate": 4.3518518518518514e-07, "logits/generated": -2.2553353309631348, "logits/oppo_generated": -2.884782075881958, "logits/oppo_real": -3.007986545562744, "logits/real": -2.3814938068389893, "logps/generated": -198.6071319580078, "logps/oppo_gen": -64.29571533203125, "logps/oppo_real": -445.2386169433594, "logps/real": -404.5937805175781, "loss": -3.0983, "loss/gen": 0.26874154806137085, "loss/real": -4.306643962860107, "rewards/accuracies": 1.0, "rewards/generated": -134.31141662597656, "rewards/margins": 174.95623779296875, "rewards/real": 40.64482498168945, "step": 234 }, { "epoch": 0.98, "grad_norm": 70.3636098964248, "learning_rate": 4.348290598290598e-07, "logits/generated": -2.1750454902648926, "logits/oppo_generated": -2.8430304527282715, "logits/oppo_real": -2.873483657836914, "logits/real": -2.3181915283203125, "logps/generated": -196.6973876953125, "logps/oppo_gen": -68.79239654541016, "logps/oppo_real": -391.89910888671875, "logps/real": -368.2919616699219, "loss": -2.8684, "loss/gen": 0.313241183757782, "loss/real": -3.5294508934020996, "rewards/accuracies": 1.0, "rewards/generated": -127.90498352050781, "rewards/margins": 151.51217651367188, "rewards/real": 23.607187271118164, "step": 235 }, { "epoch": 0.99, "grad_norm": 69.64450397263053, "learning_rate": 4.3447293447293444e-07, "logits/generated": -2.3948874473571777, "logits/oppo_generated": -2.8508265018463135, "logits/oppo_real": -2.9677348136901855, "logits/real": -2.3284974098205566, "logps/generated": -211.0993194580078, "logps/oppo_gen": -88.43344116210938, "logps/oppo_real": -438.55322265625, "logps/real": -395.72943115234375, "loss": -3.1273, "loss/gen": 0.3554914891719818, "loss/real": -4.292209625244141, "rewards/accuracies": 1.0, "rewards/generated": -122.66587829589844, "rewards/margins": 165.48968505859375, "rewards/real": 42.82379913330078, "step": 236 }, { "epoch": 0.99, "grad_norm": 287.0907957423923, "learning_rate": 4.341168091168091e-07, "logits/generated": -2.3292388916015625, "logits/oppo_generated": -2.816070079803467, "logits/oppo_real": -3.012850761413574, "logits/real": -2.2773959636688232, "logps/generated": -189.18850708007812, "logps/oppo_gen": -55.2912483215332, "logps/oppo_real": -255.20977783203125, "logps/real": -236.0473175048828, "loss": -3.7901, "loss/gen": 0.27096259593963623, "loss/real": -3.297393321990967, "rewards/accuracies": 1.0, "rewards/generated": -133.89724731445312, "rewards/margins": 153.05970764160156, "rewards/real": 19.162452697753906, "step": 237 }, { "epoch": 1.0, "grad_norm": 68.14058737960438, "learning_rate": 4.3376068376068374e-07, "logits/generated": -2.3537933826446533, "logits/oppo_generated": -2.701869487762451, "logits/oppo_real": -2.963564872741699, "logits/real": -2.1307592391967773, "logps/generated": -207.35293579101562, "logps/oppo_gen": -83.03327941894531, "logps/oppo_real": -312.4057312011719, "logps/real": -294.8611755371094, "loss": -3.0868, "loss/gen": 0.3351461589336395, "loss/real": -3.4385178089141846, "rewards/accuracies": 1.0, "rewards/generated": -124.31964111328125, "rewards/margins": 141.8642120361328, "rewards/real": 17.544559478759766, "step": 238 }, { "epoch": 1.0, "grad_norm": 66.31466478558872, "learning_rate": 4.3340455840455836e-07, "logits/generated": -2.291761636734009, "logits/oppo_generated": -2.8546152114868164, "logits/oppo_real": -3.036848545074463, "logits/real": -2.2752645015716553, "logps/generated": -213.76364135742188, "logps/oppo_gen": -75.19477844238281, "logps/oppo_real": -314.191162109375, "logps/real": -290.7506103515625, "loss": -3.1633, "loss/gen": 0.2661153972148895, "loss/real": -3.5240395069122314, "rewards/accuracies": 1.0, "rewards/generated": -138.56887817382812, "rewards/margins": 162.0093994140625, "rewards/real": 23.440532684326172, "step": 239 } ], "logging_steps": 1.0, "max_steps": 1434, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }