|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100.0, |
|
"global_step": 239, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -3.130502223968506, |
|
"logits/oppo_generated": -3.1088104248046875, |
|
"logits/oppo_real": -3.130502223968506, |
|
"logits/real": -3.1088104248046875, |
|
"logps/generated": -99.40917205810547, |
|
"logps/oppo_gen": -99.40917205810547, |
|
"logps/oppo_real": -459.3097229003906, |
|
"logps/real": -459.3097229003906, |
|
"loss": 0.9762, |
|
"loss/gen": 3.6945278644561768, |
|
"loss/real": -2.7182817459106445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -3.0933988094329834, |
|
"logits/oppo_generated": -2.919645309448242, |
|
"logits/oppo_real": -3.0933988094329834, |
|
"logits/real": -2.919645309448242, |
|
"logps/generated": -103.65153503417969, |
|
"logps/oppo_gen": -103.65153503417969, |
|
"logps/oppo_real": -392.1358642578125, |
|
"logps/real": -392.1358642578125, |
|
"loss": 0.9762, |
|
"loss/gen": 3.6945278644561768, |
|
"loss/real": -2.7182817459106445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -2.6572537422180176, |
|
"logits/oppo_generated": -2.8074941635131836, |
|
"logits/oppo_real": -2.6572537422180176, |
|
"logits/real": -2.8074941635131836, |
|
"logps/generated": -72.88986206054688, |
|
"logps/oppo_gen": -72.88986206054688, |
|
"logps/oppo_real": -291.916748046875, |
|
"logps/real": -291.916748046875, |
|
"loss": 0.9762, |
|
"loss/gen": 3.6945278644561768, |
|
"loss/real": -2.7182817459106445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -2.8966193199157715, |
|
"logits/oppo_generated": -2.768460273742676, |
|
"logits/oppo_real": -2.8966193199157715, |
|
"logits/real": -2.768460273742676, |
|
"logps/generated": -64.05287170410156, |
|
"logps/oppo_gen": -64.05287170410156, |
|
"logps/oppo_real": -376.8367919921875, |
|
"logps/real": -376.8367919921875, |
|
"loss": 0.9762, |
|
"loss/gen": 3.6945278644561768, |
|
"loss/real": -2.7182817459106445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -2.889317512512207, |
|
"logits/oppo_generated": -2.708950996398926, |
|
"logits/oppo_real": -2.889317512512207, |
|
"logits/real": -2.708950996398926, |
|
"logps/generated": -48.29164123535156, |
|
"logps/oppo_gen": -48.29164123535156, |
|
"logps/oppo_real": -173.0751953125, |
|
"logps/real": -173.0751953125, |
|
"loss": 0.9762, |
|
"loss/gen": 3.6945278644561768, |
|
"loss/real": -2.7182817459106445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -2.957958698272705, |
|
"logits/oppo_generated": -2.749436378479004, |
|
"logits/oppo_real": -2.957958698272705, |
|
"logits/real": -2.749436378479004, |
|
"logps/generated": -48.84138488769531, |
|
"logps/oppo_gen": -48.84138488769531, |
|
"logps/oppo_real": -139.2998046875, |
|
"logps/real": -139.2998046875, |
|
"loss": 0.9762, |
|
"loss/gen": 3.6945278644561768, |
|
"loss/real": -2.7182817459106445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 140.3248950538535, |
|
"learning_rate": 1.6666666666666667e-08, |
|
"logits/generated": -3.1195316314697266, |
|
"logits/oppo_generated": -2.9545342922210693, |
|
"logits/oppo_real": -3.1195316314697266, |
|
"logits/real": -2.9545342922210693, |
|
"logps/generated": -163.2059783935547, |
|
"logps/oppo_gen": -163.2059783935547, |
|
"logps/oppo_real": -432.88226318359375, |
|
"logps/real": -432.88226318359375, |
|
"loss": 0.9762, |
|
"loss/gen": 3.6945278644561768, |
|
"loss/real": -2.7182817459106445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 140.3248950538535, |
|
"learning_rate": 1.6666666666666667e-08, |
|
"logits/generated": -2.910332441329956, |
|
"logits/oppo_generated": -2.9416637420654297, |
|
"logits/oppo_real": -2.910332441329956, |
|
"logits/real": -2.9416637420654297, |
|
"logps/generated": -69.29386901855469, |
|
"logps/oppo_gen": -69.29386901855469, |
|
"logps/oppo_real": -311.59619140625, |
|
"logps/real": -311.59619140625, |
|
"loss": 0.9762, |
|
"loss/gen": 3.6945278644561768, |
|
"loss/real": -2.7182817459106445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 144.39084058121554, |
|
"learning_rate": 3.3333333333333334e-08, |
|
"logits/generated": -2.409976005554199, |
|
"logits/oppo_generated": -2.294548273086548, |
|
"logits/oppo_real": -2.409976005554199, |
|
"logits/real": -2.294548273086548, |
|
"logps/generated": -82.20011138916016, |
|
"logps/oppo_gen": -82.20011138916016, |
|
"logps/oppo_real": -381.1852111816406, |
|
"logps/real": -381.1852111816406, |
|
"loss": 0.9762, |
|
"loss/gen": 3.6945278644561768, |
|
"loss/real": -2.7182817459106445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 144.39084058121554, |
|
"learning_rate": 3.3333333333333334e-08, |
|
"logits/generated": -2.963313579559326, |
|
"logits/oppo_generated": -2.9239017963409424, |
|
"logits/oppo_real": -2.963313579559326, |
|
"logits/real": -2.9239017963409424, |
|
"logps/generated": -93.09856414794922, |
|
"logps/oppo_gen": -93.09856414794922, |
|
"logps/oppo_real": -233.10401916503906, |
|
"logps/real": -233.10401916503906, |
|
"loss": 0.9762, |
|
"loss/gen": 3.6945278644561768, |
|
"loss/real": -2.7182817459106445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 147.09211346550842, |
|
"learning_rate": 5e-08, |
|
"logits/generated": -2.857771396636963, |
|
"logits/oppo_generated": -2.837850570678711, |
|
"logits/oppo_real": -2.857771396636963, |
|
"logits/real": -2.837850570678711, |
|
"logps/generated": -59.46293640136719, |
|
"logps/oppo_gen": -59.46293640136719, |
|
"logps/oppo_real": -142.69805908203125, |
|
"logps/real": -142.69805908203125, |
|
"loss": 0.9762, |
|
"loss/gen": 3.6945278644561768, |
|
"loss/real": -2.7182817459106445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 147.36241597037218, |
|
"learning_rate": 6.666666666666667e-08, |
|
"logits/generated": -2.8778512477874756, |
|
"logits/oppo_generated": -2.7672762870788574, |
|
"logits/oppo_real": -2.8780808448791504, |
|
"logits/real": -2.766920328140259, |
|
"logps/generated": -70.60530090332031, |
|
"logps/oppo_gen": -70.58644104003906, |
|
"logps/oppo_real": -343.4704284667969, |
|
"logps/real": -343.4797058105469, |
|
"loss": 0.9737, |
|
"loss/gen": 3.693136215209961, |
|
"loss/real": -2.718029499053955, |
|
"rewards/accuracies": 0.5, |
|
"rewards/generated": -0.01885223388671875, |
|
"rewards/margins": 0.009566187858581543, |
|
"rewards/real": -0.009286046028137207, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 141.20291665498627, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/generated": -2.8214950561523438, |
|
"logits/oppo_generated": -2.8374581336975098, |
|
"logits/oppo_real": -2.822021961212158, |
|
"logits/real": -2.836732864379883, |
|
"logps/generated": -106.83735656738281, |
|
"logps/oppo_gen": -106.73956298828125, |
|
"logps/oppo_real": -280.41741943359375, |
|
"logps/real": -280.4453430175781, |
|
"loss": 0.9675, |
|
"loss/gen": 3.6873114109039307, |
|
"loss/real": -2.7175238132476807, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.09777355194091797, |
|
"rewards/margins": 0.06986618041992188, |
|
"rewards/real": -0.027907371520996094, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 147.7225611683097, |
|
"learning_rate": 1e-07, |
|
"logits/generated": -2.7692794799804688, |
|
"logits/oppo_generated": -2.8255615234375, |
|
"logits/oppo_real": -2.771684169769287, |
|
"logits/real": -2.8233795166015625, |
|
"logps/generated": -86.35212707519531, |
|
"logps/oppo_gen": -85.86231994628906, |
|
"logps/oppo_real": -289.01318359375, |
|
"logps/real": -289.048095703125, |
|
"loss": 0.9484, |
|
"loss/gen": 3.6585421562194824, |
|
"loss/real": -2.717336654663086, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.48981738090515137, |
|
"rewards/margins": 0.4548964500427246, |
|
"rewards/real": -0.03492093086242676, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 135.88560072492965, |
|
"learning_rate": 1.1666666666666667e-07, |
|
"logits/generated": -3.1533312797546387, |
|
"logits/oppo_generated": -2.7394165992736816, |
|
"logits/oppo_real": -3.1553921699523926, |
|
"logits/real": -2.7368688583374023, |
|
"logps/generated": -75.06793212890625, |
|
"logps/oppo_gen": -74.47514343261719, |
|
"logps/oppo_real": -366.370361328125, |
|
"logps/real": -366.428466796875, |
|
"loss": 0.9381, |
|
"loss/gen": 3.6510140895843506, |
|
"loss/real": -2.716707944869995, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.5927925109863281, |
|
"rewards/margins": 0.5347006320953369, |
|
"rewards/real": -0.05809187889099121, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 138.72056275567078, |
|
"learning_rate": 1.3333333333333334e-07, |
|
"logits/generated": -2.1314597129821777, |
|
"logits/oppo_generated": -2.1468427181243896, |
|
"logits/oppo_real": -2.142064094543457, |
|
"logits/real": -2.137998580932617, |
|
"logps/generated": -79.51522064208984, |
|
"logps/oppo_gen": -78.08332824707031, |
|
"logps/oppo_real": -437.152587890625, |
|
"logps/real": -437.42681884765625, |
|
"loss": 0.8706, |
|
"loss/gen": 3.590456008911133, |
|
"loss/real": -2.710862874984741, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.4318904876708984, |
|
"rewards/margins": 1.1576709747314453, |
|
"rewards/real": -0.2742195129394531, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 128.9259113793655, |
|
"learning_rate": 1.5e-07, |
|
"logits/generated": -2.9416465759277344, |
|
"logits/oppo_generated": -2.902646064758301, |
|
"logits/oppo_real": -2.953411817550659, |
|
"logits/real": -2.887700080871582, |
|
"logps/generated": -74.43273162841797, |
|
"logps/oppo_gen": -72.53976440429688, |
|
"logps/oppo_real": -310.7004089355469, |
|
"logps/real": -310.87109375, |
|
"loss": 0.8455, |
|
"loss/gen": 3.557424545288086, |
|
"loss/real": -2.7137060165405273, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.8929705619812012, |
|
"rewards/margins": 1.7222943305969238, |
|
"rewards/real": -0.17067623138427734, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 131.5700784634371, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/generated": -2.94179630279541, |
|
"logits/oppo_generated": -2.947140693664551, |
|
"logits/oppo_real": -2.9634807109832764, |
|
"logits/real": -2.920558214187622, |
|
"logps/generated": -79.8861083984375, |
|
"logps/oppo_gen": -74.80116271972656, |
|
"logps/oppo_real": -309.46124267578125, |
|
"logps/real": -310.43719482421875, |
|
"loss": 0.6796, |
|
"loss/gen": 3.3385138511657715, |
|
"loss/real": -2.6920909881591797, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.084942817687988, |
|
"rewards/margins": 4.109025955200195, |
|
"rewards/real": -0.9759171009063721, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 132.43793505119672, |
|
"learning_rate": 1.833333333333333e-07, |
|
"logits/generated": -2.4398093223571777, |
|
"logits/oppo_generated": -2.6668543815612793, |
|
"logits/oppo_real": -2.47564697265625, |
|
"logits/real": -2.6329777240753174, |
|
"logps/generated": -73.18605041503906, |
|
"logps/oppo_gen": -67.190673828125, |
|
"logps/oppo_real": -285.60797119140625, |
|
"logps/real": -287.226806640625, |
|
"loss": 0.5909, |
|
"loss/gen": 3.2783122062683105, |
|
"loss/real": -2.6751227378845215, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.995372772216797, |
|
"rewards/margins": 4.3765668869018555, |
|
"rewards/real": -1.6188058853149414, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 122.69142409669472, |
|
"learning_rate": 2e-07, |
|
"logits/generated": -3.083611011505127, |
|
"logits/oppo_generated": -2.7376956939697266, |
|
"logits/oppo_real": -3.1153059005737305, |
|
"logits/real": -2.70223331451416, |
|
"logps/generated": -102.092529296875, |
|
"logps/oppo_gen": -93.65745544433594, |
|
"logps/oppo_real": -173.968994140625, |
|
"logps/real": -176.39892578125, |
|
"loss": 0.5272, |
|
"loss/gen": 3.124610185623169, |
|
"loss/real": -2.653409957885742, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.435081481933594, |
|
"rewards/margins": 6.005127429962158, |
|
"rewards/real": -2.4299545288085938, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 107.67073560019686, |
|
"learning_rate": 2.1666666666666667e-07, |
|
"logits/generated": -2.8323276042938232, |
|
"logits/oppo_generated": -2.6699156761169434, |
|
"logits/oppo_real": -2.8930060863494873, |
|
"logits/real": -2.6024348735809326, |
|
"logps/generated": -59.84043884277344, |
|
"logps/oppo_gen": -50.189754486083984, |
|
"logps/oppo_real": -197.0562286376953, |
|
"logps/real": -199.4170379638672, |
|
"loss": 0.291, |
|
"loss/gen": 3.0484681129455566, |
|
"loss/real": -2.6573870182037354, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.650688171386719, |
|
"rewards/margins": 7.289878845214844, |
|
"rewards/real": -2.3608102798461914, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 97.9560398453689, |
|
"learning_rate": 2.3333333333333333e-07, |
|
"logits/generated": -2.9114887714385986, |
|
"logits/oppo_generated": -2.8113152980804443, |
|
"logits/oppo_real": -2.997610330581665, |
|
"logits/real": -2.724991798400879, |
|
"logps/generated": -74.77532196044922, |
|
"logps/oppo_gen": -59.91856384277344, |
|
"logps/oppo_real": -175.6089324951172, |
|
"logps/real": -181.092529296875, |
|
"loss": 0.1564, |
|
"loss/gen": 2.7578284740448, |
|
"loss/real": -2.5756349563598633, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.856756210327148, |
|
"rewards/margins": 9.373159408569336, |
|
"rewards/real": -5.4835968017578125, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 101.1866700089493, |
|
"learning_rate": 2.5e-07, |
|
"logits/generated": -2.738328218460083, |
|
"logits/oppo_generated": -2.712057113647461, |
|
"logits/oppo_real": -2.83805513381958, |
|
"logits/real": -2.6210412979125977, |
|
"logps/generated": -103.88157653808594, |
|
"logps/oppo_gen": -84.5518798828125, |
|
"logps/oppo_real": -331.96221923828125, |
|
"logps/real": -338.4169616699219, |
|
"loss": -0.0064, |
|
"loss/gen": 2.5180134773254395, |
|
"loss/real": -2.5506632328033447, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.329689025878906, |
|
"rewards/margins": 12.874977111816406, |
|
"rewards/real": -6.454712867736816, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 91.5968525574842, |
|
"learning_rate": 2.6666666666666667e-07, |
|
"logits/generated": -2.2416625022888184, |
|
"logits/oppo_generated": -2.4313888549804688, |
|
"logits/oppo_real": -2.3368191719055176, |
|
"logits/real": -2.3420183658599854, |
|
"logps/generated": -93.81153106689453, |
|
"logps/oppo_gen": -70.7446060180664, |
|
"logps/oppo_real": -186.56976318359375, |
|
"logps/real": -194.1738739013672, |
|
"loss": -0.0762, |
|
"loss/gen": 2.358870029449463, |
|
"loss/real": -2.5205307006835938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.066925048828125, |
|
"rewards/margins": 15.46281623840332, |
|
"rewards/real": -7.604110240936279, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 91.5968525574842, |
|
"learning_rate": 2.6666666666666667e-07, |
|
"logits/generated": -2.808882474899292, |
|
"logits/oppo_generated": -2.8222999572753906, |
|
"logits/oppo_real": -2.956730842590332, |
|
"logits/real": -2.6913347244262695, |
|
"logps/generated": -77.1277847290039, |
|
"logps/oppo_gen": -55.461936950683594, |
|
"logps/oppo_real": -125.98847198486328, |
|
"logps/real": -135.24478149414062, |
|
"loss": -0.2171, |
|
"loss/gen": 2.427062511444092, |
|
"loss/real": -2.4810240268707275, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -21.665851593017578, |
|
"rewards/margins": 12.40954875946045, |
|
"rewards/real": -9.256302833557129, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 90.71163423694614, |
|
"learning_rate": 2.833333333333333e-07, |
|
"logits/generated": -2.524838447570801, |
|
"logits/oppo_generated": -2.9076757431030273, |
|
"logits/oppo_real": -2.661245822906494, |
|
"logits/real": -2.7569193840026855, |
|
"logps/generated": -94.57086181640625, |
|
"logps/oppo_gen": -71.46342468261719, |
|
"logps/oppo_real": -293.69677734375, |
|
"logps/real": -298.1125183105469, |
|
"loss": -0.199, |
|
"loss/gen": 2.345475196838379, |
|
"loss/real": -2.6118550300598145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.107433319091797, |
|
"rewards/margins": 18.69169807434082, |
|
"rewards/real": -4.415735244750977, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 71.87999982609905, |
|
"learning_rate": 3e-07, |
|
"logits/generated": -2.6987175941467285, |
|
"logits/oppo_generated": -3.018123149871826, |
|
"logits/oppo_real": -2.837935447692871, |
|
"logits/real": -2.857689142227173, |
|
"logps/generated": -76.849853515625, |
|
"logps/oppo_gen": -51.06623458862305, |
|
"logps/oppo_real": -151.72972106933594, |
|
"logps/real": -170.76156616210938, |
|
"loss": -0.2983, |
|
"loss/gen": 2.2412900924682617, |
|
"loss/real": -2.2491354942321777, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -25.783626556396484, |
|
"rewards/margins": 6.751780033111572, |
|
"rewards/real": -19.031845092773438, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 77.17411637512444, |
|
"learning_rate": 3.166666666666666e-07, |
|
"logits/generated": -2.4447317123413086, |
|
"logits/oppo_generated": -2.7700376510620117, |
|
"logits/oppo_real": -2.6328747272491455, |
|
"logits/real": -2.6101927757263184, |
|
"logps/generated": -109.12590026855469, |
|
"logps/oppo_gen": -72.09120178222656, |
|
"logps/oppo_real": -411.427978515625, |
|
"logps/real": -411.268310546875, |
|
"loss": -0.5509, |
|
"loss/gen": 1.7849677801132202, |
|
"loss/real": -2.807443141937256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -37.03469467163086, |
|
"rewards/margins": 37.19430160522461, |
|
"rewards/real": 0.1596088409423828, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 77.17411637512444, |
|
"learning_rate": 3.166666666666666e-07, |
|
"logits/generated": -2.7265658378601074, |
|
"logits/oppo_generated": -2.91198468208313, |
|
"logits/oppo_real": -2.9211230278015137, |
|
"logits/real": -2.7112436294555664, |
|
"logps/generated": -121.98545837402344, |
|
"logps/oppo_gen": -82.21741485595703, |
|
"logps/oppo_real": -301.3589172363281, |
|
"logps/real": -312.0211181640625, |
|
"loss": -0.7401, |
|
"loss/gen": 1.702211618423462, |
|
"loss/real": -2.461573600769043, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -39.768035888671875, |
|
"rewards/margins": 29.1058292388916, |
|
"rewards/real": -10.66220760345459, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 85.01337498171243, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/generated": -2.7819857597351074, |
|
"logits/oppo_generated": -2.4022648334503174, |
|
"logits/oppo_real": -2.97650146484375, |
|
"logits/real": -2.2471132278442383, |
|
"logps/generated": -130.29348754882812, |
|
"logps/oppo_gen": -99.30915832519531, |
|
"logps/oppo_real": -226.3162841796875, |
|
"logps/real": -240.45065307617188, |
|
"loss": -0.6496, |
|
"loss/gen": 2.034857749938965, |
|
"loss/real": -2.3712759017944336, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.984325408935547, |
|
"rewards/margins": 16.849956512451172, |
|
"rewards/real": -14.134370803833008, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 58.31929890696561, |
|
"learning_rate": 3.5e-07, |
|
"logits/generated": -2.751274585723877, |
|
"logits/oppo_generated": -2.854034900665283, |
|
"logits/oppo_real": -2.9424033164978027, |
|
"logits/real": -2.689624309539795, |
|
"logps/generated": -90.87772369384766, |
|
"logps/oppo_gen": -54.3837890625, |
|
"logps/oppo_real": -252.91123962402344, |
|
"logps/real": -270.4813537597656, |
|
"loss": -0.6919, |
|
"loss/gen": 1.8102836608886719, |
|
"loss/real": -2.287971258163452, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -36.493934631347656, |
|
"rewards/margins": 18.923805236816406, |
|
"rewards/real": -17.570131301879883, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 49.377771381874105, |
|
"learning_rate": 3.666666666666666e-07, |
|
"logits/generated": -2.703281879425049, |
|
"logits/oppo_generated": -2.9263906478881836, |
|
"logits/oppo_real": -2.9535346031188965, |
|
"logits/real": -2.689378261566162, |
|
"logps/generated": -125.86929321289062, |
|
"logps/oppo_gen": -78.93435668945312, |
|
"logps/oppo_real": -298.2490234375, |
|
"logps/real": -317.0907287597656, |
|
"loss": -0.7566, |
|
"loss/gen": 1.4581267833709717, |
|
"loss/real": -2.2758255004882812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -46.9349365234375, |
|
"rewards/margins": 28.093202590942383, |
|
"rewards/real": -18.841733932495117, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 47.132596888492415, |
|
"learning_rate": 3.8333333333333335e-07, |
|
"logits/generated": -2.8561768531799316, |
|
"logits/oppo_generated": -2.9521539211273193, |
|
"logits/oppo_real": -3.0699048042297363, |
|
"logits/real": -2.701744794845581, |
|
"logps/generated": -182.20703125, |
|
"logps/oppo_gen": -136.80690002441406, |
|
"logps/oppo_real": -344.64990234375, |
|
"logps/real": -365.87115478515625, |
|
"loss": -0.8056, |
|
"loss/gen": 1.5829627513885498, |
|
"loss/real": -2.2397522926330566, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -45.400123596191406, |
|
"rewards/margins": 24.178863525390625, |
|
"rewards/real": -21.22126007080078, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 47.136771677116634, |
|
"learning_rate": 4e-07, |
|
"logits/generated": -2.74558162689209, |
|
"logits/oppo_generated": -2.8447458744049072, |
|
"logits/oppo_real": -2.998192548751831, |
|
"logits/real": -2.603461742401123, |
|
"logps/generated": -134.50888061523438, |
|
"logps/oppo_gen": -79.24800109863281, |
|
"logps/oppo_real": -401.9757385253906, |
|
"logps/real": -427.4682922363281, |
|
"loss": -0.995, |
|
"loss/gen": 1.2431423664093018, |
|
"loss/real": -2.120981454849243, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -55.260887145996094, |
|
"rewards/margins": 29.768321990966797, |
|
"rewards/real": -25.492568969726562, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 47.136771677116634, |
|
"learning_rate": 4e-07, |
|
"logits/generated": -2.696960926055908, |
|
"logits/oppo_generated": -2.942030906677246, |
|
"logits/oppo_real": -2.9536867141723633, |
|
"logits/real": -2.6742172241210938, |
|
"logps/generated": -125.61725616455078, |
|
"logps/oppo_gen": -62.21235656738281, |
|
"logps/oppo_real": -296.8402404785156, |
|
"logps/real": -324.08892822265625, |
|
"loss": -5.4743, |
|
"loss/gen": 1.1838252544403076, |
|
"loss/real": -2.137930154800415, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -63.40489959716797, |
|
"rewards/margins": 36.15622329711914, |
|
"rewards/real": -27.248676300048828, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 38.149193463480486, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/generated": -2.420623779296875, |
|
"logits/oppo_generated": -2.792217493057251, |
|
"logits/oppo_real": -2.680948257446289, |
|
"logits/real": -2.556100845336914, |
|
"logps/generated": -105.52731323242188, |
|
"logps/oppo_gen": -49.044715881347656, |
|
"logps/oppo_real": -183.3726348876953, |
|
"logps/real": -208.43609619140625, |
|
"loss": -0.9189, |
|
"loss/gen": 1.2789992094039917, |
|
"loss/real": -2.150240182876587, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -56.48259735107422, |
|
"rewards/margins": 31.41913604736328, |
|
"rewards/real": -25.063465118408203, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 40.49399399669891, |
|
"learning_rate": 4.3333333333333335e-07, |
|
"logits/generated": -2.5155656337738037, |
|
"logits/oppo_generated": -2.5968940258026123, |
|
"logits/oppo_real": -2.84472393989563, |
|
"logits/real": -2.363577127456665, |
|
"logps/generated": -156.7322235107422, |
|
"logps/oppo_gen": -96.46727752685547, |
|
"logps/oppo_real": -441.2087097167969, |
|
"logps/real": -452.6773681640625, |
|
"loss": -0.993, |
|
"loss/gen": 1.218324899673462, |
|
"loss/real": -2.478170394897461, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -60.26493835449219, |
|
"rewards/margins": 48.7962532043457, |
|
"rewards/real": -11.4686861038208, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 38.69477383912377, |
|
"learning_rate": 4.5e-07, |
|
"logits/generated": -2.845750331878662, |
|
"logits/oppo_generated": -3.097993850708008, |
|
"logits/oppo_real": -3.161780834197998, |
|
"logits/real": -2.804795742034912, |
|
"logps/generated": -169.83187866210938, |
|
"logps/oppo_gen": -86.33152770996094, |
|
"logps/oppo_real": -374.5130615234375, |
|
"logps/real": -400.9438171386719, |
|
"loss": -1.0833, |
|
"loss/gen": 0.779202401638031, |
|
"loss/real": -2.109189987182617, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -83.50035095214844, |
|
"rewards/margins": 57.069610595703125, |
|
"rewards/real": -26.430742263793945, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 39.95823930895698, |
|
"learning_rate": 4.6666666666666666e-07, |
|
"logits/generated": -2.4254915714263916, |
|
"logits/oppo_generated": -2.648486614227295, |
|
"logits/oppo_real": -2.7488012313842773, |
|
"logits/real": -2.350640296936035, |
|
"logps/generated": -155.58016967773438, |
|
"logps/oppo_gen": -78.30477142333984, |
|
"logps/oppo_real": -363.86407470703125, |
|
"logps/real": -395.0137939453125, |
|
"loss": -1.1548, |
|
"loss/gen": 0.8230071067810059, |
|
"loss/real": -2.0378403663635254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.275390625, |
|
"rewards/margins": 46.12569046020508, |
|
"rewards/real": -31.149703979492188, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 40.89272509652924, |
|
"learning_rate": 4.833333333333333e-07, |
|
"logits/generated": -2.461397409439087, |
|
"logits/oppo_generated": -2.864193916320801, |
|
"logits/oppo_real": -2.7761850357055664, |
|
"logits/real": -2.5565099716186523, |
|
"logps/generated": -136.72689819335938, |
|
"logps/oppo_gen": -60.6450309753418, |
|
"logps/oppo_real": -320.1565856933594, |
|
"logps/real": -337.87396240234375, |
|
"loss": -1.1347, |
|
"loss/gen": 0.9023051857948303, |
|
"loss/real": -2.3106727600097656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -76.08185577392578, |
|
"rewards/margins": 58.36448669433594, |
|
"rewards/real": -17.717369079589844, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 89.36429678043967, |
|
"learning_rate": 5e-07, |
|
"logits/generated": -2.6618571281433105, |
|
"logits/oppo_generated": -2.812058210372925, |
|
"logits/oppo_real": -2.982236862182617, |
|
"logits/real": -2.515589714050293, |
|
"logps/generated": -162.60000610351562, |
|
"logps/oppo_gen": -90.06674194335938, |
|
"logps/oppo_real": -176.9713592529297, |
|
"logps/real": -211.77285766601562, |
|
"loss": -1.1724, |
|
"loss/gen": 0.8981304168701172, |
|
"loss/real": -1.989371657371521, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -72.53326416015625, |
|
"rewards/margins": 37.731773376464844, |
|
"rewards/real": -34.80148696899414, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 89.36429678043967, |
|
"learning_rate": 5e-07, |
|
"logits/generated": -2.5393388271331787, |
|
"logits/oppo_generated": -2.9253015518188477, |
|
"logits/oppo_real": -2.9079301357269287, |
|
"logits/real": -2.55344557762146, |
|
"logps/generated": -130.3933868408203, |
|
"logps/oppo_gen": -54.79414367675781, |
|
"logps/oppo_real": -186.92176818847656, |
|
"logps/real": -235.45858764648438, |
|
"loss": -5.6809, |
|
"loss/gen": 0.8535439372062683, |
|
"loss/real": -1.694696307182312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -75.5992431640625, |
|
"rewards/margins": 27.062450408935547, |
|
"rewards/real": -48.53679275512695, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 43.34401895870049, |
|
"learning_rate": 4.996438746438746e-07, |
|
"logits/generated": -2.4617252349853516, |
|
"logits/oppo_generated": -2.9949498176574707, |
|
"logits/oppo_real": -2.9107003211975098, |
|
"logits/real": -2.6696996688842773, |
|
"logps/generated": -172.76368713378906, |
|
"logps/oppo_gen": -79.9820785522461, |
|
"logps/oppo_real": -404.1100158691406, |
|
"logps/real": -422.02642822265625, |
|
"loss": -1.2059, |
|
"loss/gen": 0.6165514588356018, |
|
"loss/real": -2.3567748069763184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -92.7816162109375, |
|
"rewards/margins": 74.865234375, |
|
"rewards/real": -17.916383743286133, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 49.41177428002358, |
|
"learning_rate": 4.992877492877492e-07, |
|
"logits/generated": -2.026392698287964, |
|
"logits/oppo_generated": -2.4440221786499023, |
|
"logits/oppo_real": -2.3998050689697266, |
|
"logits/real": -2.089980125427246, |
|
"logps/generated": -209.39190673828125, |
|
"logps/oppo_gen": -93.22187805175781, |
|
"logps/oppo_real": -290.8685302734375, |
|
"logps/real": -320.3968811035156, |
|
"loss": -1.3765, |
|
"loss/gen": 0.5650486350059509, |
|
"loss/real": -2.131740093231201, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -116.17002868652344, |
|
"rewards/margins": 86.64169311523438, |
|
"rewards/real": -29.528339385986328, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 59.38357051631053, |
|
"learning_rate": 4.98931623931624e-07, |
|
"logits/generated": -2.3456368446350098, |
|
"logits/oppo_generated": -2.9232547283172607, |
|
"logits/oppo_real": -2.7114880084991455, |
|
"logits/real": -2.5829110145568848, |
|
"logps/generated": -168.85809326171875, |
|
"logps/oppo_gen": -64.50846862792969, |
|
"logps/oppo_real": -239.8323974609375, |
|
"logps/real": -297.2595520019531, |
|
"loss": -1.4436, |
|
"loss/gen": 0.5499280691146851, |
|
"loss/real": -1.58909273147583, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -104.34961700439453, |
|
"rewards/margins": 46.92247009277344, |
|
"rewards/real": -57.42715072631836, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 44.12861838917575, |
|
"learning_rate": 4.985754985754986e-07, |
|
"logits/generated": -2.6069109439849854, |
|
"logits/oppo_generated": -2.741456985473633, |
|
"logits/oppo_real": -2.9938759803771973, |
|
"logits/real": -2.428788185119629, |
|
"logps/generated": -149.2159423828125, |
|
"logps/oppo_gen": -58.174400329589844, |
|
"logps/oppo_real": -258.21685791015625, |
|
"logps/real": -301.842041015625, |
|
"loss": -1.4547, |
|
"loss/gen": 0.676410973072052, |
|
"loss/real": -1.867649793624878, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -91.04153442382812, |
|
"rewards/margins": 47.41633605957031, |
|
"rewards/real": -43.62519836425781, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 44.34332055817426, |
|
"learning_rate": 4.982193732193732e-07, |
|
"logits/generated": -2.587238311767578, |
|
"logits/oppo_generated": -2.814079761505127, |
|
"logits/oppo_real": -2.964923620223999, |
|
"logits/real": -2.480611801147461, |
|
"logps/generated": -175.05799865722656, |
|
"logps/oppo_gen": -78.5189208984375, |
|
"logps/oppo_real": -288.56396484375, |
|
"logps/real": -318.1793518066406, |
|
"loss": -1.5609, |
|
"loss/gen": 0.7453894019126892, |
|
"loss/real": -2.1940207481384277, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -96.53907775878906, |
|
"rewards/margins": 66.9237060546875, |
|
"rewards/real": -29.615373611450195, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 41.77245636004139, |
|
"learning_rate": 4.978632478632478e-07, |
|
"logits/generated": -2.5828328132629395, |
|
"logits/oppo_generated": -2.7121076583862305, |
|
"logits/oppo_real": -2.932806968688965, |
|
"logits/real": -2.3821425437927246, |
|
"logps/generated": -170.45315551757812, |
|
"logps/oppo_gen": -72.10917663574219, |
|
"logps/oppo_real": -299.3392333984375, |
|
"logps/real": -351.05755615234375, |
|
"loss": -1.5561, |
|
"loss/gen": 0.6000806093215942, |
|
"loss/real": -1.664915680885315, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -98.34397888183594, |
|
"rewards/margins": 46.625675201416016, |
|
"rewards/real": -51.718299865722656, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 40.92458049952987, |
|
"learning_rate": 4.975071225071225e-07, |
|
"logits/generated": -2.7721643447875977, |
|
"logits/oppo_generated": -2.814209461212158, |
|
"logits/oppo_real": -3.157527208328247, |
|
"logits/real": -2.545376777648926, |
|
"logps/generated": -190.73538208007812, |
|
"logps/oppo_gen": -80.24543762207031, |
|
"logps/oppo_real": -294.9969482421875, |
|
"logps/real": -325.6192626953125, |
|
"loss": -1.6521, |
|
"loss/gen": 0.5949017405509949, |
|
"loss/real": -2.104870319366455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -110.48993682861328, |
|
"rewards/margins": 79.86763000488281, |
|
"rewards/real": -30.622314453125, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 40.13348241970552, |
|
"learning_rate": 4.971509971509972e-07, |
|
"logits/generated": -2.4653735160827637, |
|
"logits/oppo_generated": -2.9343652725219727, |
|
"logits/oppo_real": -2.7617945671081543, |
|
"logits/real": -2.6601219177246094, |
|
"logps/generated": -198.1933135986328, |
|
"logps/oppo_gen": -82.74765014648438, |
|
"logps/oppo_real": -315.32562255859375, |
|
"logps/real": -342.7396240234375, |
|
"loss": -1.6584, |
|
"loss/gen": 0.4857123792171478, |
|
"loss/real": -2.1372337341308594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -115.44567108154297, |
|
"rewards/margins": 88.03167724609375, |
|
"rewards/real": -27.413999557495117, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 601.4569550267084, |
|
"learning_rate": 4.967948717948718e-07, |
|
"logits/generated": -2.5435636043548584, |
|
"logits/oppo_generated": -2.805569648742676, |
|
"logits/oppo_real": -2.7846250534057617, |
|
"logits/real": -2.5612943172454834, |
|
"logps/generated": -126.34326934814453, |
|
"logps/oppo_gen": -45.456573486328125, |
|
"logps/oppo_real": -161.39598083496094, |
|
"logps/real": -196.76950073242188, |
|
"loss": -2.1364, |
|
"loss/gen": 0.7522258758544922, |
|
"loss/real": -2.0092098712921143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -80.88670349121094, |
|
"rewards/margins": 45.51318359375, |
|
"rewards/real": -35.3735237121582, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 38.65978819953379, |
|
"learning_rate": 4.964387464387464e-07, |
|
"logits/generated": -2.464820384979248, |
|
"logits/oppo_generated": -2.7444612979888916, |
|
"logits/oppo_real": -2.7595162391662598, |
|
"logits/real": -2.45442271232605, |
|
"logps/generated": -139.13876342773438, |
|
"logps/oppo_gen": -50.193504333496094, |
|
"logps/oppo_real": -148.25294494628906, |
|
"logps/real": -181.2758026123047, |
|
"loss": -1.7596, |
|
"loss/gen": 0.6654144525527954, |
|
"loss/real": -2.03794002532959, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -88.94526672363281, |
|
"rewards/margins": 55.92240524291992, |
|
"rewards/real": -33.022857666015625, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 167.4533750406363, |
|
"learning_rate": 4.96082621082621e-07, |
|
"logits/generated": -2.297238349914551, |
|
"logits/oppo_generated": -2.660369396209717, |
|
"logits/oppo_real": -2.6082496643066406, |
|
"logits/real": -2.3693835735321045, |
|
"logps/generated": -138.73458862304688, |
|
"logps/oppo_gen": -55.80210876464844, |
|
"logps/oppo_real": -201.49038696289062, |
|
"logps/real": -226.22634887695312, |
|
"loss": -2.0472, |
|
"loss/gen": 0.7817223072052002, |
|
"loss/real": -2.228806257247925, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -82.93248748779297, |
|
"rewards/margins": 58.19652557373047, |
|
"rewards/real": -24.735958099365234, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 38.20762371262606, |
|
"learning_rate": 4.957264957264958e-07, |
|
"logits/generated": -2.6939735412597656, |
|
"logits/oppo_generated": -2.746832847595215, |
|
"logits/oppo_real": -2.973560333251953, |
|
"logits/real": -2.453509569168091, |
|
"logps/generated": -155.50794982910156, |
|
"logps/oppo_gen": -77.28608703613281, |
|
"logps/oppo_real": -547.3628540039062, |
|
"logps/real": -561.0299072265625, |
|
"loss": -1.7775, |
|
"loss/gen": 0.9930198192596436, |
|
"loss/real": -2.6198465824127197, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -78.22187042236328, |
|
"rewards/margins": 64.55480194091797, |
|
"rewards/real": -13.66706657409668, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 111.84459258553808, |
|
"learning_rate": 4.953703703703703e-07, |
|
"logits/generated": -2.3411145210266113, |
|
"logits/oppo_generated": -2.664555072784424, |
|
"logits/oppo_real": -2.6400251388549805, |
|
"logits/real": -2.3643062114715576, |
|
"logps/generated": -196.82240295410156, |
|
"logps/oppo_gen": -78.57785034179688, |
|
"logps/oppo_real": -398.628662109375, |
|
"logps/real": -393.2767333984375, |
|
"loss": -2.0424, |
|
"loss/gen": 0.5210200548171997, |
|
"loss/real": -2.969128131866455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -118.24455261230469, |
|
"rewards/margins": 123.59645080566406, |
|
"rewards/real": 5.351901054382324, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 50.014668258578155, |
|
"learning_rate": 4.95014245014245e-07, |
|
"logits/generated": -2.841848373413086, |
|
"logits/oppo_generated": -2.638930320739746, |
|
"logits/oppo_real": -3.1015210151672363, |
|
"logits/real": -2.4049315452575684, |
|
"logps/generated": -181.6864471435547, |
|
"logps/oppo_gen": -84.6130599975586, |
|
"logps/oppo_real": -310.54534912109375, |
|
"logps/real": -329.8880615234375, |
|
"loss": -1.8582, |
|
"loss/gen": 0.7178683876991272, |
|
"loss/real": -2.3166608810424805, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -97.0733871459961, |
|
"rewards/margins": 77.73066711425781, |
|
"rewards/real": -19.342731475830078, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 578.0217340204432, |
|
"learning_rate": 4.946581196581196e-07, |
|
"logits/generated": -2.5697083473205566, |
|
"logits/oppo_generated": -2.9305167198181152, |
|
"logits/oppo_real": -2.7986156940460205, |
|
"logits/real": -2.666802406311035, |
|
"logps/generated": -182.54356384277344, |
|
"logps/oppo_gen": -55.247596740722656, |
|
"logps/oppo_real": -159.6094970703125, |
|
"logps/real": -191.58706665039062, |
|
"loss": -3.1972, |
|
"loss/gen": 0.5683310031890869, |
|
"loss/real": -2.0497186183929443, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -127.29595947265625, |
|
"rewards/margins": 95.31836700439453, |
|
"rewards/real": -31.977588653564453, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 51.64103394316142, |
|
"learning_rate": 4.943019943019943e-07, |
|
"logits/generated": -2.674006462097168, |
|
"logits/oppo_generated": -2.733177900314331, |
|
"logits/oppo_real": -3.0261659622192383, |
|
"logits/real": -2.440023899078369, |
|
"logps/generated": -159.27865600585938, |
|
"logps/oppo_gen": -77.4105453491211, |
|
"logps/oppo_real": -291.50042724609375, |
|
"logps/real": -305.1040954589844, |
|
"loss": -1.8105, |
|
"loss/gen": 0.7389234900474548, |
|
"loss/real": -2.533874988555908, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -81.86811065673828, |
|
"rewards/margins": 68.26446533203125, |
|
"rewards/real": -13.60364055633545, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 93.81772033276816, |
|
"learning_rate": 4.93945868945869e-07, |
|
"logits/generated": -2.253323554992676, |
|
"logits/oppo_generated": -2.70068359375, |
|
"logits/oppo_real": -2.622352361679077, |
|
"logits/real": -2.379178047180176, |
|
"logps/generated": -198.3895263671875, |
|
"logps/oppo_gen": -66.53448486328125, |
|
"logps/oppo_real": -142.07913208007812, |
|
"logps/real": -186.49630737304688, |
|
"loss": -2.138, |
|
"loss/gen": 0.36330240964889526, |
|
"loss/real": -1.9270637035369873, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -131.85504150390625, |
|
"rewards/margins": 87.43788146972656, |
|
"rewards/real": -44.41715621948242, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 39.40381974811817, |
|
"learning_rate": 4.935897435897436e-07, |
|
"logits/generated": -2.8230233192443848, |
|
"logits/oppo_generated": -3.0608558654785156, |
|
"logits/oppo_real": -3.0881457328796387, |
|
"logits/real": -2.815178394317627, |
|
"logps/generated": -176.8870849609375, |
|
"logps/oppo_gen": -78.30126953125, |
|
"logps/oppo_real": -296.7585144042969, |
|
"logps/real": -305.8564453125, |
|
"loss": -1.9511, |
|
"loss/gen": 0.5859768390655518, |
|
"loss/real": -2.5944645404815674, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -98.58580780029297, |
|
"rewards/margins": 89.48786926269531, |
|
"rewards/real": -9.097940444946289, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 37.537286150739504, |
|
"learning_rate": 4.932336182336182e-07, |
|
"logits/generated": -2.67462158203125, |
|
"logits/oppo_generated": -2.904336929321289, |
|
"logits/oppo_real": -3.0007967948913574, |
|
"logits/real": -2.706181526184082, |
|
"logps/generated": -194.5768585205078, |
|
"logps/oppo_gen": -78.76142883300781, |
|
"logps/oppo_real": -321.17315673828125, |
|
"logps/real": -332.7289733886719, |
|
"loss": -2.0148, |
|
"loss/gen": 0.5784947276115417, |
|
"loss/real": -2.6833224296569824, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -115.8154296875, |
|
"rewards/margins": 104.25957489013672, |
|
"rewards/real": -11.5558500289917, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 52.37389057595874, |
|
"learning_rate": 4.928774928774928e-07, |
|
"logits/generated": -2.8456006050109863, |
|
"logits/oppo_generated": -3.0246148109436035, |
|
"logits/oppo_real": -3.155604839324951, |
|
"logits/real": -2.7388291358947754, |
|
"logps/generated": -199.48080444335938, |
|
"logps/oppo_gen": -99.78816986083984, |
|
"logps/oppo_real": -357.6624755859375, |
|
"logps/real": -361.3135070800781, |
|
"loss": -2.1519, |
|
"loss/gen": 0.5312547087669373, |
|
"loss/real": -2.7395927906036377, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -99.692626953125, |
|
"rewards/margins": 96.04158020019531, |
|
"rewards/real": -3.65103816986084, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 42.730668543561166, |
|
"learning_rate": 4.925213675213676e-07, |
|
"logits/generated": -2.5994668006896973, |
|
"logits/oppo_generated": -2.718918800354004, |
|
"logits/oppo_real": -2.8950438499450684, |
|
"logits/real": -2.5016493797302246, |
|
"logps/generated": -158.23098754882812, |
|
"logps/oppo_gen": -73.73533630371094, |
|
"logps/oppo_real": -276.2977294921875, |
|
"logps/real": -278.3821105957031, |
|
"loss": -2.1712, |
|
"loss/gen": 0.7339967489242554, |
|
"loss/real": -2.8307507038116455, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -84.49565124511719, |
|
"rewards/margins": 82.41130065917969, |
|
"rewards/real": -2.084348678588867, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 42.60172940894316, |
|
"learning_rate": 4.921652421652421e-07, |
|
"logits/generated": -2.6288089752197266, |
|
"logits/oppo_generated": -2.7741386890411377, |
|
"logits/oppo_real": -2.8905487060546875, |
|
"logits/real": -2.5671515464782715, |
|
"logps/generated": -164.08560180664062, |
|
"logps/oppo_gen": -70.42605590820312, |
|
"logps/oppo_real": -291.8798522949219, |
|
"logps/real": -327.316650390625, |
|
"loss": -2.0118, |
|
"loss/gen": 0.6031943559646606, |
|
"loss/real": -2.0373241901397705, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -93.65955352783203, |
|
"rewards/margins": 58.222755432128906, |
|
"rewards/real": -35.436798095703125, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 525.7316627805482, |
|
"learning_rate": 4.918091168091168e-07, |
|
"logits/generated": -2.4973931312561035, |
|
"logits/oppo_generated": -2.731257438659668, |
|
"logits/oppo_real": -2.804780960083008, |
|
"logits/real": -2.5444960594177246, |
|
"logps/generated": -230.39053344726562, |
|
"logps/oppo_gen": -143.67832946777344, |
|
"logps/oppo_real": -309.55450439453125, |
|
"logps/real": -315.0069274902344, |
|
"loss": -2.927, |
|
"loss/gen": 0.7850175499916077, |
|
"loss/real": -2.6631596088409424, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -86.71220397949219, |
|
"rewards/margins": 81.25978088378906, |
|
"rewards/real": -5.452421188354492, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 80.44494186631624, |
|
"learning_rate": 4.914529914529914e-07, |
|
"logits/generated": -2.6201300621032715, |
|
"logits/oppo_generated": -2.710496664047241, |
|
"logits/oppo_real": -2.980191707611084, |
|
"logits/real": -2.4632492065429688, |
|
"logps/generated": -194.9330291748047, |
|
"logps/oppo_gen": -71.51214599609375, |
|
"logps/oppo_real": -284.34765625, |
|
"logps/real": -298.09637451171875, |
|
"loss": -2.3734, |
|
"loss/gen": 0.33017057180404663, |
|
"loss/real": -2.5317859649658203, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -123.42086791992188, |
|
"rewards/margins": 109.67212677001953, |
|
"rewards/real": -13.74874210357666, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 45.73295767790172, |
|
"learning_rate": 4.910968660968661e-07, |
|
"logits/generated": -2.7911667823791504, |
|
"logits/oppo_generated": -3.0934062004089355, |
|
"logits/oppo_real": -3.077010154724121, |
|
"logits/real": -2.8539376258850098, |
|
"logps/generated": -222.52537536621094, |
|
"logps/oppo_gen": -109.1805419921875, |
|
"logps/oppo_real": -348.23834228515625, |
|
"logps/real": -337.4581298828125, |
|
"loss": -2.0979, |
|
"loss/gen": 0.41786307096481323, |
|
"loss/real": -3.0975918769836426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -113.3448257446289, |
|
"rewards/margins": 124.12504577636719, |
|
"rewards/real": 10.780221939086914, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 79.69396419859851, |
|
"learning_rate": 4.907407407407407e-07, |
|
"logits/generated": -2.657637596130371, |
|
"logits/oppo_generated": -2.838265895843506, |
|
"logits/oppo_real": -3.01387357711792, |
|
"logits/real": -2.6080217361450195, |
|
"logps/generated": -174.41976928710938, |
|
"logps/oppo_gen": -75.5096206665039, |
|
"logps/oppo_real": -242.11915588378906, |
|
"logps/real": -260.3476867675781, |
|
"loss": -2.2245, |
|
"loss/gen": 0.5530567765235901, |
|
"loss/real": -2.540099620819092, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -98.91015625, |
|
"rewards/margins": 80.68161010742188, |
|
"rewards/real": -18.228544235229492, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 57.15850101499557, |
|
"learning_rate": 4.903846153846153e-07, |
|
"logits/generated": -2.718892812728882, |
|
"logits/oppo_generated": -2.786154270172119, |
|
"logits/oppo_real": -2.980445146560669, |
|
"logits/real": -2.5882253646850586, |
|
"logps/generated": -203.54293823242188, |
|
"logps/oppo_gen": -78.40753173828125, |
|
"logps/oppo_real": -188.29739379882812, |
|
"logps/real": -220.8904571533203, |
|
"loss": -2.1241, |
|
"loss/gen": 0.3356163501739502, |
|
"loss/real": -2.0496373176574707, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -125.13542175292969, |
|
"rewards/margins": 92.5423583984375, |
|
"rewards/real": -32.59306335449219, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 55.560476534442856, |
|
"learning_rate": 4.9002849002849e-07, |
|
"logits/generated": -2.484227180480957, |
|
"logits/oppo_generated": -2.8353500366210938, |
|
"logits/oppo_real": -2.788581371307373, |
|
"logits/real": -2.584005832672119, |
|
"logps/generated": -167.95159912109375, |
|
"logps/oppo_gen": -74.27359008789062, |
|
"logps/oppo_real": -262.4258728027344, |
|
"logps/real": -275.72314453125, |
|
"loss": -2.2186, |
|
"loss/gen": 0.6950039863586426, |
|
"loss/real": -2.613152027130127, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -93.67799377441406, |
|
"rewards/margins": 80.38072967529297, |
|
"rewards/real": -13.297256469726562, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 40.88330591021765, |
|
"learning_rate": 4.896723646723647e-07, |
|
"logits/generated": -2.44921612739563, |
|
"logits/oppo_generated": -2.8188014030456543, |
|
"logits/oppo_real": -2.757133960723877, |
|
"logits/real": -2.499187469482422, |
|
"logps/generated": -161.24481201171875, |
|
"logps/oppo_gen": -55.317054748535156, |
|
"logps/oppo_real": -178.10824584960938, |
|
"logps/real": -189.52215576171875, |
|
"loss": -2.1209, |
|
"loss/gen": 0.4848253130912781, |
|
"loss/real": -2.4801671504974365, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -105.9277572631836, |
|
"rewards/margins": 94.51385498046875, |
|
"rewards/real": -11.413912773132324, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 35.69595968854091, |
|
"learning_rate": 4.893162393162393e-07, |
|
"logits/generated": -2.509648323059082, |
|
"logits/oppo_generated": -2.865746259689331, |
|
"logits/oppo_real": -2.85042142868042, |
|
"logits/real": -2.612628936767578, |
|
"logps/generated": -207.73446655273438, |
|
"logps/oppo_gen": -101.81581115722656, |
|
"logps/oppo_real": -463.47314453125, |
|
"logps/real": -449.06451416015625, |
|
"loss": -2.2045, |
|
"loss/gen": 0.5114428997039795, |
|
"loss/real": -3.246914863586426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -105.91865539550781, |
|
"rewards/margins": 120.32732391357422, |
|
"rewards/real": 14.408672332763672, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 46.60751808654372, |
|
"learning_rate": 4.889601139601139e-07, |
|
"logits/generated": -2.513535499572754, |
|
"logits/oppo_generated": -2.9923882484436035, |
|
"logits/oppo_real": -2.813816547393799, |
|
"logits/real": -2.6687417030334473, |
|
"logps/generated": -200.91436767578125, |
|
"logps/oppo_gen": -78.51251220703125, |
|
"logps/oppo_real": -286.4658508300781, |
|
"logps/real": -272.64630126953125, |
|
"loss": -2.3923, |
|
"loss/gen": 0.3351864218711853, |
|
"loss/real": -3.2229790687561035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -122.40186309814453, |
|
"rewards/margins": 136.22140502929688, |
|
"rewards/real": 13.81955337524414, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 46.60751808654372, |
|
"learning_rate": 4.889601139601139e-07, |
|
"logits/generated": -2.667757987976074, |
|
"logits/oppo_generated": -2.7725887298583984, |
|
"logits/oppo_real": -3.063380002975464, |
|
"logits/real": -2.553708076477051, |
|
"logps/generated": -177.4560546875, |
|
"logps/oppo_gen": -79.40229034423828, |
|
"logps/oppo_real": -383.419677734375, |
|
"logps/real": -384.32568359375, |
|
"loss": -22602.7559, |
|
"loss/gen": 0.5979279279708862, |
|
"loss/real": -2.8606982231140137, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -98.05377960205078, |
|
"rewards/margins": 97.14777374267578, |
|
"rewards/real": -0.9059967994689941, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 41.5718210882534, |
|
"learning_rate": 4.886039886039886e-07, |
|
"logits/generated": -2.7659826278686523, |
|
"logits/oppo_generated": -2.8321666717529297, |
|
"logits/oppo_real": -3.1668171882629395, |
|
"logits/real": -2.5931761264801025, |
|
"logps/generated": -241.8350067138672, |
|
"logps/oppo_gen": -99.83964538574219, |
|
"logps/oppo_real": -322.6613464355469, |
|
"logps/real": -311.7099914550781, |
|
"loss": -2.2896, |
|
"loss/gen": 0.5622150897979736, |
|
"loss/real": -3.6310153007507324, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -141.99537658691406, |
|
"rewards/margins": 152.9467315673828, |
|
"rewards/real": 10.951353073120117, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 44.716500642240554, |
|
"learning_rate": 4.882478632478633e-07, |
|
"logits/generated": -2.7758758068084717, |
|
"logits/oppo_generated": -3.000812530517578, |
|
"logits/oppo_real": -3.1619484424591064, |
|
"logits/real": -2.7301864624023438, |
|
"logps/generated": -200.3653564453125, |
|
"logps/oppo_gen": -83.82888793945312, |
|
"logps/oppo_real": -441.3746337890625, |
|
"logps/real": -431.2779541015625, |
|
"loss": -2.3134, |
|
"loss/gen": 0.3644047975540161, |
|
"loss/real": -3.1670141220092773, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -116.53646850585938, |
|
"rewards/margins": 126.63313293457031, |
|
"rewards/real": 10.096664428710938, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 44.716500642240554, |
|
"learning_rate": 4.882478632478633e-07, |
|
"logits/generated": -2.254303455352783, |
|
"logits/oppo_generated": -2.4111037254333496, |
|
"logits/oppo_real": -2.622360944747925, |
|
"logits/real": -2.1454672813415527, |
|
"logps/generated": -177.43157958984375, |
|
"logps/oppo_gen": -94.29784393310547, |
|
"logps/oppo_real": -307.8828125, |
|
"logps/real": -284.0107727050781, |
|
"loss": -17.4644, |
|
"loss/gen": 1.3658581972122192, |
|
"loss/real": -3.9946789741516113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -83.13372802734375, |
|
"rewards/margins": 107.00576782226562, |
|
"rewards/real": 23.872041702270508, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 44.716500642240554, |
|
"learning_rate": 4.882478632478633e-07, |
|
"logits/generated": -2.561386823654175, |
|
"logits/oppo_generated": -2.7816574573516846, |
|
"logits/oppo_real": -2.923349380493164, |
|
"logits/real": -2.5139307975769043, |
|
"logps/generated": -170.64508056640625, |
|
"logps/oppo_gen": -70.22672271728516, |
|
"logps/oppo_real": -286.0644836425781, |
|
"logps/real": -304.5027160644531, |
|
"loss": -51.313, |
|
"loss/gen": 0.5637646317481995, |
|
"loss/real": -2.394735813140869, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -100.41835021972656, |
|
"rewards/margins": 81.98014831542969, |
|
"rewards/real": -18.438209533691406, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 57.8592273155015, |
|
"learning_rate": 4.878917378917379e-07, |
|
"logits/generated": -2.341658115386963, |
|
"logits/oppo_generated": -2.624129056930542, |
|
"logits/oppo_real": -2.6314826011657715, |
|
"logits/real": -2.3068737983703613, |
|
"logps/generated": -137.337646484375, |
|
"logps/oppo_gen": -48.185340881347656, |
|
"logps/oppo_real": -148.66656494140625, |
|
"logps/real": -167.26583862304688, |
|
"loss": -2.4266, |
|
"loss/gen": 0.7307255268096924, |
|
"loss/real": -2.38840389251709, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -89.15231323242188, |
|
"rewards/margins": 70.55303955078125, |
|
"rewards/real": -18.599275588989258, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 61.02295290503402, |
|
"learning_rate": 4.875356125356125e-07, |
|
"logits/generated": -2.563333034515381, |
|
"logits/oppo_generated": -2.668670177459717, |
|
"logits/oppo_real": -2.9500231742858887, |
|
"logits/real": -2.375744581222534, |
|
"logps/generated": -193.91883850097656, |
|
"logps/oppo_gen": -76.79248809814453, |
|
"logps/oppo_real": -287.1414794921875, |
|
"logps/real": -309.30792236328125, |
|
"loss": -2.2982, |
|
"loss/gen": 0.386036217212677, |
|
"loss/real": -2.4096016883850098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -117.1263427734375, |
|
"rewards/margins": 94.95994567871094, |
|
"rewards/real": -22.166412353515625, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 169.99358903352797, |
|
"learning_rate": 4.871794871794871e-07, |
|
"logits/generated": -2.613680601119995, |
|
"logits/oppo_generated": -2.8624868392944336, |
|
"logits/oppo_real": -3.0077338218688965, |
|
"logits/real": -2.5658488273620605, |
|
"logps/generated": -205.80078125, |
|
"logps/oppo_gen": -103.01863861083984, |
|
"logps/oppo_real": -484.10565185546875, |
|
"logps/real": -483.44097900390625, |
|
"loss": -3.0697, |
|
"loss/gen": 0.7189458608627319, |
|
"loss/real": -2.95969820022583, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -102.78215026855469, |
|
"rewards/margins": 103.44681549072266, |
|
"rewards/real": 0.6646575927734375, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 43.23124085134354, |
|
"learning_rate": 4.868233618233618e-07, |
|
"logits/generated": -2.5590624809265137, |
|
"logits/oppo_generated": -2.976921796798706, |
|
"logits/oppo_real": -3.0094780921936035, |
|
"logits/real": -2.6058220863342285, |
|
"logps/generated": -179.38499450683594, |
|
"logps/oppo_gen": -66.51390075683594, |
|
"logps/oppo_real": -174.39071655273438, |
|
"logps/real": -176.55557250976562, |
|
"loss": -2.4127, |
|
"loss/gen": 0.44477635622024536, |
|
"loss/real": -2.9173386096954346, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -112.87110137939453, |
|
"rewards/margins": 110.70625305175781, |
|
"rewards/real": -2.1648406982421875, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 58.48061786622663, |
|
"learning_rate": 4.864672364672365e-07, |
|
"logits/generated": -2.5132930278778076, |
|
"logits/oppo_generated": -3.01529598236084, |
|
"logits/oppo_real": -2.9185380935668945, |
|
"logits/real": -2.643099308013916, |
|
"logps/generated": -246.02755737304688, |
|
"logps/oppo_gen": -86.220458984375, |
|
"logps/oppo_real": -329.8023376464844, |
|
"logps/real": -310.6354064941406, |
|
"loss": -2.4253, |
|
"loss/gen": 0.5145424008369446, |
|
"loss/real": -3.408470392227173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -159.80709838867188, |
|
"rewards/margins": 178.97406005859375, |
|
"rewards/real": 19.166940689086914, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 54.441314178233476, |
|
"learning_rate": 4.861111111111111e-07, |
|
"logits/generated": -2.3987717628479004, |
|
"logits/oppo_generated": -2.864108085632324, |
|
"logits/oppo_real": -2.8596436977386475, |
|
"logits/real": -2.5680923461914062, |
|
"logps/generated": -177.2393798828125, |
|
"logps/oppo_gen": -79.35113525390625, |
|
"logps/oppo_real": -357.43438720703125, |
|
"logps/real": -336.9925537109375, |
|
"loss": -2.5489, |
|
"loss/gen": 0.5846430659294128, |
|
"loss/real": -3.432420492172241, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -97.88824462890625, |
|
"rewards/margins": 118.33008575439453, |
|
"rewards/real": 20.441844940185547, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 66.89113415188145, |
|
"learning_rate": 4.857549857549857e-07, |
|
"logits/generated": -2.436213493347168, |
|
"logits/oppo_generated": -2.635812282562256, |
|
"logits/oppo_real": -2.784547805786133, |
|
"logits/real": -2.3119587898254395, |
|
"logps/generated": -188.01727294921875, |
|
"logps/oppo_gen": -87.48421478271484, |
|
"logps/oppo_real": -250.10626220703125, |
|
"logps/real": -244.0000457763672, |
|
"loss": -2.5429, |
|
"loss/gen": 0.7033488154411316, |
|
"loss/real": -2.972754955291748, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -100.53305053710938, |
|
"rewards/margins": 106.6392593383789, |
|
"rewards/real": 6.106204986572266, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 60.396710964360466, |
|
"learning_rate": 4.853988603988603e-07, |
|
"logits/generated": -2.538017749786377, |
|
"logits/oppo_generated": -2.9845218658447266, |
|
"logits/oppo_real": -3.016307830810547, |
|
"logits/real": -2.62971830368042, |
|
"logps/generated": -155.26116943359375, |
|
"logps/oppo_gen": -55.523197174072266, |
|
"logps/oppo_real": -291.81378173828125, |
|
"logps/real": -305.18359375, |
|
"loss": -2.5841, |
|
"loss/gen": 0.6104675531387329, |
|
"loss/real": -2.7806365489959717, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -99.73796081542969, |
|
"rewards/margins": 86.36811828613281, |
|
"rewards/real": -13.369840621948242, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 53.259132139474445, |
|
"learning_rate": 4.850427350427351e-07, |
|
"logits/generated": -2.45882511138916, |
|
"logits/oppo_generated": -2.8317785263061523, |
|
"logits/oppo_real": -2.849785327911377, |
|
"logits/real": -2.4766674041748047, |
|
"logps/generated": -163.40484619140625, |
|
"logps/oppo_gen": -65.48351287841797, |
|
"logps/oppo_real": -259.8980712890625, |
|
"logps/real": -273.74273681640625, |
|
"loss": -2.6208, |
|
"loss/gen": 0.5979644656181335, |
|
"loss/real": -2.596888542175293, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -97.92133331298828, |
|
"rewards/margins": 84.07666015625, |
|
"rewards/real": -13.844667434692383, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1690.1221109730504, |
|
"learning_rate": 4.846866096866097e-07, |
|
"logits/generated": -2.439664602279663, |
|
"logits/oppo_generated": -2.9616637229919434, |
|
"logits/oppo_real": -2.8549320697784424, |
|
"logits/real": -2.6093478202819824, |
|
"logps/generated": -177.17694091796875, |
|
"logps/oppo_gen": -66.1073226928711, |
|
"logps/oppo_real": -297.0393981933594, |
|
"logps/real": -275.50140380859375, |
|
"loss": -7.118, |
|
"loss/gen": 0.45209312438964844, |
|
"loss/real": -3.4258365631103516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -111.06962585449219, |
|
"rewards/margins": 132.60760498046875, |
|
"rewards/real": 21.537994384765625, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 74.6983974790457, |
|
"learning_rate": 4.843304843304843e-07, |
|
"logits/generated": -2.5593514442443848, |
|
"logits/oppo_generated": -2.944060802459717, |
|
"logits/oppo_real": -2.977362632751465, |
|
"logits/real": -2.5549235343933105, |
|
"logps/generated": -160.49493408203125, |
|
"logps/oppo_gen": -49.032493591308594, |
|
"logps/oppo_real": -197.13412475585938, |
|
"logps/real": -235.08087158203125, |
|
"loss": -2.3729, |
|
"loss/gen": 0.49238741397857666, |
|
"loss/real": -2.1076858043670654, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -111.46244812011719, |
|
"rewards/margins": 73.51570892333984, |
|
"rewards/real": -37.94673538208008, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 70.76835216372322, |
|
"learning_rate": 4.839743589743589e-07, |
|
"logits/generated": -2.497036933898926, |
|
"logits/oppo_generated": -2.9935152530670166, |
|
"logits/oppo_real": -2.782620906829834, |
|
"logits/real": -2.689803123474121, |
|
"logps/generated": -172.08953857421875, |
|
"logps/oppo_gen": -79.41259002685547, |
|
"logps/oppo_real": -304.58465576171875, |
|
"logps/real": -297.86407470703125, |
|
"loss": -2.6668, |
|
"loss/gen": 0.9129126071929932, |
|
"loss/real": -3.119077205657959, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -92.67694854736328, |
|
"rewards/margins": 99.39753723144531, |
|
"rewards/real": 6.720589637756348, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 68.6735548002741, |
|
"learning_rate": 4.836182336182337e-07, |
|
"logits/generated": -2.508333683013916, |
|
"logits/oppo_generated": -3.0348973274230957, |
|
"logits/oppo_real": -2.8550362586975098, |
|
"logits/real": -2.699089527130127, |
|
"logps/generated": -235.1026611328125, |
|
"logps/oppo_gen": -147.11734008789062, |
|
"logps/oppo_real": -324.0049743652344, |
|
"logps/real": -307.71380615234375, |
|
"loss": -2.9712, |
|
"loss/gen": 0.950553297996521, |
|
"loss/real": -3.3368782997131348, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -87.98532104492188, |
|
"rewards/margins": 104.27648162841797, |
|
"rewards/real": 16.291156768798828, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 70.78148799628862, |
|
"learning_rate": 4.832621082621082e-07, |
|
"logits/generated": -2.5086488723754883, |
|
"logits/oppo_generated": -2.8708338737487793, |
|
"logits/oppo_real": -2.8143606185913086, |
|
"logits/real": -2.600031852722168, |
|
"logps/generated": -205.0748291015625, |
|
"logps/oppo_gen": -81.77798461914062, |
|
"logps/oppo_real": -330.5220031738281, |
|
"logps/real": -311.5235900878906, |
|
"loss": -2.4969, |
|
"loss/gen": 0.43894362449645996, |
|
"loss/real": -3.3736114501953125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -123.29684448242188, |
|
"rewards/margins": 142.29525756835938, |
|
"rewards/real": 18.99840545654297, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 76.29366705574257, |
|
"learning_rate": 4.829059829059829e-07, |
|
"logits/generated": -2.3690929412841797, |
|
"logits/oppo_generated": -2.7298922538757324, |
|
"logits/oppo_real": -2.698655605316162, |
|
"logits/real": -2.4298644065856934, |
|
"logps/generated": -173.87249755859375, |
|
"logps/oppo_gen": -74.60616302490234, |
|
"logps/oppo_real": -251.41427612304688, |
|
"logps/real": -237.06617736816406, |
|
"loss": -2.5674, |
|
"loss/gen": 0.6722112894058228, |
|
"loss/real": -3.512993335723877, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -99.26634216308594, |
|
"rewards/margins": 113.61441802978516, |
|
"rewards/real": 14.348082542419434, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 249.33590702353723, |
|
"learning_rate": 4.825498575498575e-07, |
|
"logits/generated": -2.570150375366211, |
|
"logits/oppo_generated": -2.9584808349609375, |
|
"logits/oppo_real": -2.8358330726623535, |
|
"logits/real": -2.728276491165161, |
|
"logps/generated": -160.3553466796875, |
|
"logps/oppo_gen": -83.23335266113281, |
|
"logps/oppo_real": -311.66064453125, |
|
"logps/real": -289.5158996582031, |
|
"loss": -3.7493, |
|
"loss/gen": 1.0953956842422485, |
|
"loss/real": -3.4564929008483887, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -77.12197875976562, |
|
"rewards/margins": 99.26671600341797, |
|
"rewards/real": 22.14473533630371, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1622.4536202924262, |
|
"learning_rate": 4.821937321937321e-07, |
|
"logits/generated": -2.4929990768432617, |
|
"logits/oppo_generated": -2.83894681930542, |
|
"logits/oppo_real": -2.731696605682373, |
|
"logits/real": -2.6017203330993652, |
|
"logps/generated": -202.414306640625, |
|
"logps/oppo_gen": -103.72628021240234, |
|
"logps/oppo_real": -218.9561767578125, |
|
"logps/real": -203.55921936035156, |
|
"loss": -7.3232, |
|
"loss/gen": 0.5733932256698608, |
|
"loss/real": -3.2266201972961426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -98.68803405761719, |
|
"rewards/margins": 114.0849838256836, |
|
"rewards/real": 15.396947860717773, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 66.96757975529091, |
|
"learning_rate": 4.818376068376069e-07, |
|
"logits/generated": -2.6456146240234375, |
|
"logits/oppo_generated": -2.7633142471313477, |
|
"logits/oppo_real": -2.9560418128967285, |
|
"logits/real": -2.4849910736083984, |
|
"logps/generated": -207.35745239257812, |
|
"logps/oppo_gen": -74.91079711914062, |
|
"logps/oppo_real": -299.2713623046875, |
|
"logps/real": -269.4769287109375, |
|
"loss": -2.824, |
|
"loss/gen": 0.35913562774658203, |
|
"loss/real": -3.8368678092956543, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -132.4466552734375, |
|
"rewards/margins": 162.2410888671875, |
|
"rewards/real": 29.7944393157959, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 702.0471182548932, |
|
"learning_rate": 4.814814814814814e-07, |
|
"logits/generated": -2.798750400543213, |
|
"logits/oppo_generated": -2.8308515548706055, |
|
"logits/oppo_real": -3.085522174835205, |
|
"logits/real": -2.5982208251953125, |
|
"logps/generated": -237.33787536621094, |
|
"logps/oppo_gen": -134.01483154296875, |
|
"logps/oppo_real": -442.37945556640625, |
|
"logps/real": -406.73846435546875, |
|
"loss": -1.4584, |
|
"loss/gen": 0.540956437587738, |
|
"loss/real": -3.9525904655456543, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -103.32305908203125, |
|
"rewards/margins": 138.96401977539062, |
|
"rewards/real": 35.640968322753906, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 54.58323990131952, |
|
"learning_rate": 4.811253561253561e-07, |
|
"logits/generated": -2.40437650680542, |
|
"logits/oppo_generated": -2.8044867515563965, |
|
"logits/oppo_real": -2.8060150146484375, |
|
"logits/real": -2.5216751098632812, |
|
"logps/generated": -167.4996337890625, |
|
"logps/oppo_gen": -51.423309326171875, |
|
"logps/oppo_real": -222.54879760742188, |
|
"logps/real": -225.21975708007812, |
|
"loss": -2.7112, |
|
"loss/gen": 0.3818909823894501, |
|
"loss/real": -2.9921202659606934, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -116.07633972167969, |
|
"rewards/margins": 113.40538024902344, |
|
"rewards/real": -2.670961380004883, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 45.42335040173446, |
|
"learning_rate": 4.807692307692307e-07, |
|
"logits/generated": -2.6010522842407227, |
|
"logits/oppo_generated": -2.932793140411377, |
|
"logits/oppo_real": -2.9959638118743896, |
|
"logits/real": -2.675575017929077, |
|
"logps/generated": -186.99935913085938, |
|
"logps/oppo_gen": -68.20332336425781, |
|
"logps/oppo_real": -376.541015625, |
|
"logps/real": -360.2162170410156, |
|
"loss": -2.6531, |
|
"loss/gen": 0.3653205931186676, |
|
"loss/real": -3.4390110969543457, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -118.7960205078125, |
|
"rewards/margins": 135.12083435058594, |
|
"rewards/real": 16.324806213378906, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 183.07126984797478, |
|
"learning_rate": 4.804131054131054e-07, |
|
"logits/generated": -2.4377760887145996, |
|
"logits/oppo_generated": -2.780601739883423, |
|
"logits/oppo_real": -2.8726038932800293, |
|
"logits/real": -2.5523815155029297, |
|
"logps/generated": -195.5144500732422, |
|
"logps/oppo_gen": -75.83106994628906, |
|
"logps/oppo_real": -327.609619140625, |
|
"logps/real": -326.1234130859375, |
|
"loss": -2.9695, |
|
"loss/gen": 0.4366985857486725, |
|
"loss/real": -2.959474563598633, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -119.68338012695312, |
|
"rewards/margins": 121.16956329345703, |
|
"rewards/real": 1.4861793518066406, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 549.6681100900797, |
|
"learning_rate": 4.8005698005698e-07, |
|
"logits/generated": -2.454486846923828, |
|
"logits/oppo_generated": -2.91953706741333, |
|
"logits/oppo_real": -2.820370674133301, |
|
"logits/real": -2.6601805686950684, |
|
"logps/generated": -184.12876892089844, |
|
"logps/oppo_gen": -75.91517639160156, |
|
"logps/oppo_real": -531.0400390625, |
|
"logps/real": -524.9949340820312, |
|
"loss": -4.2352, |
|
"loss/gen": 0.5441170334815979, |
|
"loss/real": -3.007982015609741, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -108.21359252929688, |
|
"rewards/margins": 114.25873565673828, |
|
"rewards/real": 6.045146942138672, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 60.524447459141456, |
|
"learning_rate": 4.797008547008547e-07, |
|
"logits/generated": -2.492274761199951, |
|
"logits/oppo_generated": -2.927794933319092, |
|
"logits/oppo_real": -2.8259315490722656, |
|
"logits/real": -2.6615185737609863, |
|
"logps/generated": -186.5205078125, |
|
"logps/oppo_gen": -75.32722473144531, |
|
"logps/oppo_real": -334.3116149902344, |
|
"logps/real": -322.076904296875, |
|
"loss": -2.7939, |
|
"loss/gen": 0.4799632132053375, |
|
"loss/real": -3.1898889541625977, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -111.19327545166016, |
|
"rewards/margins": 123.42797088623047, |
|
"rewards/real": 12.23469066619873, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 72.22195132995404, |
|
"learning_rate": 4.793447293447293e-07, |
|
"logits/generated": -2.681981325149536, |
|
"logits/oppo_generated": -2.798323154449463, |
|
"logits/oppo_real": -3.0827927589416504, |
|
"logits/real": -2.6118640899658203, |
|
"logps/generated": -193.87255859375, |
|
"logps/oppo_gen": -85.98326110839844, |
|
"logps/oppo_real": -484.7052001953125, |
|
"logps/real": -468.4195556640625, |
|
"loss": -2.7665, |
|
"loss/gen": 0.48763740062713623, |
|
"loss/real": -3.367074489593506, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -107.88929748535156, |
|
"rewards/margins": 124.17497253417969, |
|
"rewards/real": 16.285675048828125, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 120.03039979521854, |
|
"learning_rate": 4.78988603988604e-07, |
|
"logits/generated": -2.369338035583496, |
|
"logits/oppo_generated": -2.820817232131958, |
|
"logits/oppo_real": -2.7580766677856445, |
|
"logits/real": -2.554074287414551, |
|
"logps/generated": -224.9398651123047, |
|
"logps/oppo_gen": -98.39456176757812, |
|
"logps/oppo_real": -435.86871337890625, |
|
"logps/real": -420.9836730957031, |
|
"loss": -2.9534, |
|
"loss/gen": 0.3016844391822815, |
|
"loss/real": -3.4657280445098877, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -126.5452880859375, |
|
"rewards/margins": 141.43032836914062, |
|
"rewards/real": 14.885029792785645, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1841.6121334309241, |
|
"learning_rate": 4.786324786324786e-07, |
|
"logits/generated": -2.6123902797698975, |
|
"logits/oppo_generated": -2.991581439971924, |
|
"logits/oppo_real": -3.002182960510254, |
|
"logits/real": -2.710818290710449, |
|
"logps/generated": -204.66802978515625, |
|
"logps/oppo_gen": -81.12940216064453, |
|
"logps/oppo_real": -296.61138916015625, |
|
"logps/real": -273.9120788574219, |
|
"loss": -8.6558, |
|
"loss/gen": 0.40045416355133057, |
|
"loss/real": -3.615565776824951, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -123.53861236572266, |
|
"rewards/margins": 146.23793029785156, |
|
"rewards/real": 22.69931411743164, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 50.14175949168393, |
|
"learning_rate": 4.782763532763532e-07, |
|
"logits/generated": -2.58475399017334, |
|
"logits/oppo_generated": -2.8433456420898438, |
|
"logits/oppo_real": -3.012195110321045, |
|
"logits/real": -2.584439992904663, |
|
"logps/generated": -161.61810302734375, |
|
"logps/oppo_gen": -63.396881103515625, |
|
"logps/oppo_real": -288.55780029296875, |
|
"logps/real": -261.4425048828125, |
|
"loss": -2.8566, |
|
"loss/gen": 0.7563031315803528, |
|
"loss/real": -3.606753349304199, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -98.22122192382812, |
|
"rewards/margins": 125.3365249633789, |
|
"rewards/real": 27.115306854248047, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 365.902095853522, |
|
"learning_rate": 4.779202279202279e-07, |
|
"logits/generated": -2.642536163330078, |
|
"logits/oppo_generated": -2.75607967376709, |
|
"logits/oppo_real": -3.044626235961914, |
|
"logits/real": -2.5140504837036133, |
|
"logps/generated": -215.69821166992188, |
|
"logps/oppo_gen": -89.79308319091797, |
|
"logps/oppo_real": -237.51071166992188, |
|
"logps/real": -235.16732788085938, |
|
"loss": -4.2838, |
|
"loss/gen": 0.3768947720527649, |
|
"loss/real": -2.9775023460388184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -125.90511322021484, |
|
"rewards/margins": 128.24850463867188, |
|
"rewards/real": 2.3433871269226074, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 100.40770890630111, |
|
"learning_rate": 4.775641025641026e-07, |
|
"logits/generated": -2.698265790939331, |
|
"logits/oppo_generated": -2.9334537982940674, |
|
"logits/oppo_real": -3.0197911262512207, |
|
"logits/real": -2.6614885330200195, |
|
"logps/generated": -193.49476623535156, |
|
"logps/oppo_gen": -86.25882720947266, |
|
"logps/oppo_real": -171.73361206054688, |
|
"logps/real": -154.22259521484375, |
|
"loss": -3.0162, |
|
"loss/gen": 0.695202112197876, |
|
"loss/real": -3.3781354427337646, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -107.23593139648438, |
|
"rewards/margins": 124.74696350097656, |
|
"rewards/real": 17.511028289794922, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 95.44328629315685, |
|
"learning_rate": 4.772079772079772e-07, |
|
"logits/generated": -2.5307321548461914, |
|
"logits/oppo_generated": -2.8885016441345215, |
|
"logits/oppo_real": -2.9670629501342773, |
|
"logits/real": -2.6184444427490234, |
|
"logps/generated": -137.50279235839844, |
|
"logps/oppo_gen": -52.36747741699219, |
|
"logps/oppo_real": -234.88699340820312, |
|
"logps/real": -211.22215270996094, |
|
"loss": -2.7515, |
|
"loss/gen": 1.0063905715942383, |
|
"loss/real": -3.66544771194458, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -85.13531494140625, |
|
"rewards/margins": 108.8001708984375, |
|
"rewards/real": 23.664859771728516, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 92.31076504801594, |
|
"learning_rate": 4.768518518518518e-07, |
|
"logits/generated": -2.4487879276275635, |
|
"logits/oppo_generated": -2.902094841003418, |
|
"logits/oppo_real": -2.738150119781494, |
|
"logits/real": -2.5988502502441406, |
|
"logps/generated": -183.7650146484375, |
|
"logps/oppo_gen": -71.77503967285156, |
|
"logps/oppo_real": -226.59805297851562, |
|
"logps/real": -210.24148559570312, |
|
"loss": -2.6027, |
|
"loss/gen": 0.4320296347141266, |
|
"loss/real": -3.250072956085205, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -111.989990234375, |
|
"rewards/margins": 128.34658813476562, |
|
"rewards/real": 16.356592178344727, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 92.31076504801594, |
|
"learning_rate": 4.768518518518518e-07, |
|
"logits/generated": -2.4111056327819824, |
|
"logits/oppo_generated": -2.78233003616333, |
|
"logits/oppo_real": -2.810633420944214, |
|
"logits/real": -2.52742075920105, |
|
"logps/generated": -161.51727294921875, |
|
"logps/oppo_gen": -51.96064758300781, |
|
"logps/oppo_real": -160.8415069580078, |
|
"logps/real": -171.3201446533203, |
|
"loss": -128.9964, |
|
"loss/gen": 0.43095916509628296, |
|
"loss/real": -2.8278121948242188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -109.556640625, |
|
"rewards/margins": 99.07798767089844, |
|
"rewards/real": -10.478641510009766, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 68.96497993207313, |
|
"learning_rate": 4.764957264957264e-07, |
|
"logits/generated": -2.3004653453826904, |
|
"logits/oppo_generated": -2.7906460762023926, |
|
"logits/oppo_real": -2.7454147338867188, |
|
"logits/real": -2.5157923698425293, |
|
"logps/generated": -148.928955078125, |
|
"logps/oppo_gen": -67.77021789550781, |
|
"logps/oppo_real": -355.9058837890625, |
|
"logps/real": -322.17315673828125, |
|
"loss": -2.7744, |
|
"loss/gen": 0.9168766736984253, |
|
"loss/real": -3.8560690879821777, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -81.15873718261719, |
|
"rewards/margins": 114.89146423339844, |
|
"rewards/real": 33.73272705078125, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 82.86539584244439, |
|
"learning_rate": 4.761396011396011e-07, |
|
"logits/generated": -2.309685707092285, |
|
"logits/oppo_generated": -2.784420967102051, |
|
"logits/oppo_real": -2.58797550201416, |
|
"logits/real": -2.521721363067627, |
|
"logps/generated": -174.80889892578125, |
|
"logps/oppo_gen": -53.4489631652832, |
|
"logps/oppo_real": -213.77337646484375, |
|
"logps/real": -204.11801147460938, |
|
"loss": -2.8077, |
|
"loss/gen": 0.35567396879196167, |
|
"loss/real": -3.218747138977051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -121.35994720458984, |
|
"rewards/margins": 131.01528930664062, |
|
"rewards/real": 9.655345916748047, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 61.933964171274795, |
|
"learning_rate": 4.7578347578347577e-07, |
|
"logits/generated": -2.5905487537384033, |
|
"logits/oppo_generated": -2.9693868160247803, |
|
"logits/oppo_real": -2.897064208984375, |
|
"logits/real": -2.695655345916748, |
|
"logps/generated": -178.83404541015625, |
|
"logps/oppo_gen": -65.07535552978516, |
|
"logps/oppo_real": -380.3414306640625, |
|
"logps/real": -379.77105712890625, |
|
"loss": -2.6882, |
|
"loss/gen": 0.38962453603744507, |
|
"loss/real": -3.060286521911621, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -113.75869750976562, |
|
"rewards/margins": 114.32907104492188, |
|
"rewards/real": 0.5703763961791992, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 66.47038447097135, |
|
"learning_rate": 4.754273504273504e-07, |
|
"logits/generated": -2.6704883575439453, |
|
"logits/oppo_generated": -2.8074076175689697, |
|
"logits/oppo_real": -2.9744620323181152, |
|
"logits/real": -2.5615124702453613, |
|
"logps/generated": -175.31643676757812, |
|
"logps/oppo_gen": -81.67523193359375, |
|
"logps/oppo_real": -332.10321044921875, |
|
"logps/real": -320.36962890625, |
|
"loss": -2.8096, |
|
"loss/gen": 1.0026687383651733, |
|
"loss/real": -3.2720324993133545, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -93.64120483398438, |
|
"rewards/margins": 105.37479400634766, |
|
"rewards/real": 11.733586311340332, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 54.16138220416485, |
|
"learning_rate": 4.7507122507122507e-07, |
|
"logits/generated": -2.586947441101074, |
|
"logits/oppo_generated": -2.8780970573425293, |
|
"logits/oppo_real": -2.880333185195923, |
|
"logits/real": -2.6156821250915527, |
|
"logps/generated": -207.31790161132812, |
|
"logps/oppo_gen": -83.72149658203125, |
|
"logps/oppo_real": -272.17291259765625, |
|
"logps/real": -258.2806701660156, |
|
"loss": -2.7218, |
|
"loss/gen": 0.3455054759979248, |
|
"loss/real": -3.568074941635132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -123.59640502929688, |
|
"rewards/margins": 137.48866271972656, |
|
"rewards/real": 13.892251968383789, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 64.72151128826876, |
|
"learning_rate": 4.747150997150997e-07, |
|
"logits/generated": -2.5787789821624756, |
|
"logits/oppo_generated": -2.8689210414886475, |
|
"logits/oppo_real": -3.036574602127075, |
|
"logits/real": -2.5574660301208496, |
|
"logps/generated": -197.68472290039062, |
|
"logps/oppo_gen": -61.806739807128906, |
|
"logps/oppo_real": -213.864013671875, |
|
"logps/real": -206.0244140625, |
|
"loss": -2.8435, |
|
"loss/gen": 0.28257864713668823, |
|
"loss/real": -3.154269218444824, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -135.8779754638672, |
|
"rewards/margins": 143.7175750732422, |
|
"rewards/real": 7.839590072631836, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 63.413190000311644, |
|
"learning_rate": 4.743589743589743e-07, |
|
"logits/generated": -2.537674903869629, |
|
"logits/oppo_generated": -2.847443103790283, |
|
"logits/oppo_real": -2.9110074043273926, |
|
"logits/real": -2.5497055053710938, |
|
"logps/generated": -195.18397521972656, |
|
"logps/oppo_gen": -68.70259857177734, |
|
"logps/oppo_real": -252.70947265625, |
|
"logps/real": -234.55947875976562, |
|
"loss": -2.7751, |
|
"loss/gen": 0.33247071504592896, |
|
"loss/real": -3.345210075378418, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -126.48136901855469, |
|
"rewards/margins": 144.63134765625, |
|
"rewards/real": 18.149982452392578, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 67.1754822100761, |
|
"learning_rate": 4.74002849002849e-07, |
|
"logits/generated": -2.6043078899383545, |
|
"logits/oppo_generated": -2.850525140762329, |
|
"logits/oppo_real": -2.9623799324035645, |
|
"logits/real": -2.5423567295074463, |
|
"logps/generated": -186.6025390625, |
|
"logps/oppo_gen": -70.65492248535156, |
|
"logps/oppo_real": -241.07968139648438, |
|
"logps/real": -243.4227294921875, |
|
"loss": -2.7129, |
|
"loss/gen": 0.38259631395339966, |
|
"loss/real": -2.913571357727051, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -115.94760131835938, |
|
"rewards/margins": 113.60454559326172, |
|
"rewards/real": -2.3430585861206055, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 64.89191358881898, |
|
"learning_rate": 4.7364672364672366e-07, |
|
"logits/generated": -2.326094150543213, |
|
"logits/oppo_generated": -2.760641574859619, |
|
"logits/oppo_real": -2.835960865020752, |
|
"logits/real": -2.4331917762756348, |
|
"logps/generated": -192.00738525390625, |
|
"logps/oppo_gen": -77.80702209472656, |
|
"logps/oppo_real": -309.97265625, |
|
"logps/real": -309.2704162597656, |
|
"loss": -2.821, |
|
"loss/gen": 0.539390504360199, |
|
"loss/real": -3.1048460006713867, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -114.20036315917969, |
|
"rewards/margins": 114.902587890625, |
|
"rewards/real": 0.7022085189819336, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 118.88704263396411, |
|
"learning_rate": 4.7329059829059823e-07, |
|
"logits/generated": -2.453880786895752, |
|
"logits/oppo_generated": -2.762300491333008, |
|
"logits/oppo_real": -2.91391658782959, |
|
"logits/real": -2.4560084342956543, |
|
"logps/generated": -197.6976318359375, |
|
"logps/oppo_gen": -79.30331420898438, |
|
"logps/oppo_real": -206.95407104492188, |
|
"logps/real": -221.3215789794922, |
|
"loss": -3.2051, |
|
"loss/gen": 0.37958478927612305, |
|
"loss/real": -2.6625490188598633, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -118.39431762695312, |
|
"rewards/margins": 104.02679443359375, |
|
"rewards/real": -14.367520332336426, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 118.88704263396411, |
|
"learning_rate": 4.7329059829059823e-07, |
|
"logits/generated": -2.224625825881958, |
|
"logits/oppo_generated": -2.8723740577697754, |
|
"logits/oppo_real": -2.730229139328003, |
|
"logits/real": -2.5184946060180664, |
|
"logps/generated": -221.04774475097656, |
|
"logps/oppo_gen": -68.4917984008789, |
|
"logps/oppo_real": -205.74790954589844, |
|
"logps/real": -212.26702880859375, |
|
"loss": -316.7941, |
|
"loss/gen": 0.3251597583293915, |
|
"loss/real": -2.7914090156555176, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -152.55593872070312, |
|
"rewards/margins": 146.0368194580078, |
|
"rewards/real": -6.51912784576416, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1343.0752491949213, |
|
"learning_rate": 4.729344729344729e-07, |
|
"logits/generated": -2.4904122352600098, |
|
"logits/oppo_generated": -2.833265781402588, |
|
"logits/oppo_real": -2.8581643104553223, |
|
"logits/real": -2.5306711196899414, |
|
"logps/generated": -204.9275360107422, |
|
"logps/oppo_gen": -72.44357299804688, |
|
"logps/oppo_real": -294.85699462890625, |
|
"logps/real": -290.6130065917969, |
|
"loss": -4.5958, |
|
"loss/gen": 0.28193220496177673, |
|
"loss/real": -3.1163246631622314, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -132.48397827148438, |
|
"rewards/margins": 136.72792053222656, |
|
"rewards/real": 4.243948936462402, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 664.7720944052932, |
|
"learning_rate": 4.725783475783476e-07, |
|
"logits/generated": -2.349301815032959, |
|
"logits/oppo_generated": -2.8131227493286133, |
|
"logits/oppo_real": -2.815453052520752, |
|
"logits/real": -2.5294294357299805, |
|
"logps/generated": -248.4262237548828, |
|
"logps/oppo_gen": -118.46414184570312, |
|
"logps/oppo_real": -350.6376953125, |
|
"logps/real": -330.9083557128906, |
|
"loss": -5.5005, |
|
"loss/gen": 0.29418256878852844, |
|
"loss/real": -3.4984450340270996, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -129.9620819091797, |
|
"rewards/margins": 149.69143676757812, |
|
"rewards/real": 19.729347229003906, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 40.89130414749342, |
|
"learning_rate": 4.722222222222222e-07, |
|
"logits/generated": -2.5400872230529785, |
|
"logits/oppo_generated": -2.868478775024414, |
|
"logits/oppo_real": -2.87443208694458, |
|
"logits/real": -2.5824098587036133, |
|
"logps/generated": -186.3829345703125, |
|
"logps/oppo_gen": -72.4801025390625, |
|
"logps/oppo_real": -315.2503356933594, |
|
"logps/real": -298.22100830078125, |
|
"loss": -2.7752, |
|
"loss/gen": 0.39562442898750305, |
|
"loss/real": -3.2849442958831787, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -113.90283203125, |
|
"rewards/margins": 130.93215942382812, |
|
"rewards/real": 17.02932357788086, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 94.72539979956538, |
|
"learning_rate": 4.7186609686609683e-07, |
|
"logits/generated": -2.2407426834106445, |
|
"logits/oppo_generated": -2.5010550022125244, |
|
"logits/oppo_real": -2.635188102722168, |
|
"logits/real": -2.2052998542785645, |
|
"logps/generated": -200.30931091308594, |
|
"logps/oppo_gen": -80.23007202148438, |
|
"logps/oppo_real": -347.019287109375, |
|
"logps/real": -327.673828125, |
|
"loss": -2.9739, |
|
"loss/gen": 0.40606454014778137, |
|
"loss/real": -4.382803916931152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -120.0792465209961, |
|
"rewards/margins": 139.4246826171875, |
|
"rewards/real": 19.345449447631836, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 60.16715201536986, |
|
"learning_rate": 4.715099715099715e-07, |
|
"logits/generated": -2.1603076457977295, |
|
"logits/oppo_generated": -2.6126623153686523, |
|
"logits/oppo_real": -2.6145567893981934, |
|
"logits/real": -2.2864603996276855, |
|
"logps/generated": -194.54769897460938, |
|
"logps/oppo_gen": -73.5291748046875, |
|
"logps/oppo_real": -317.5265808105469, |
|
"logps/real": -296.9075012207031, |
|
"loss": -2.8726, |
|
"loss/gen": 0.3832360804080963, |
|
"loss/real": -3.425445795059204, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -121.01852416992188, |
|
"rewards/margins": 141.63758850097656, |
|
"rewards/real": 20.619068145751953, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 99.14064346926541, |
|
"learning_rate": 4.711538461538461e-07, |
|
"logits/generated": -2.6689248085021973, |
|
"logits/oppo_generated": -3.0297465324401855, |
|
"logits/oppo_real": -3.101362705230713, |
|
"logits/real": -2.7913174629211426, |
|
"logps/generated": -225.02159118652344, |
|
"logps/oppo_gen": -120.2161865234375, |
|
"logps/oppo_real": -532.0965576171875, |
|
"logps/real": -496.509033203125, |
|
"loss": -2.6576, |
|
"loss/gen": 0.5110812187194824, |
|
"loss/real": -3.9389498233795166, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -104.80540466308594, |
|
"rewards/margins": 140.39291381835938, |
|
"rewards/real": 35.58751678466797, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 70.39239557984051, |
|
"learning_rate": 4.707977207977208e-07, |
|
"logits/generated": -2.5018911361694336, |
|
"logits/oppo_generated": -2.4462087154388428, |
|
"logits/oppo_real": -2.882254123687744, |
|
"logits/real": -2.1281325817108154, |
|
"logps/generated": -162.50857543945312, |
|
"logps/oppo_gen": -74.71348571777344, |
|
"logps/oppo_real": -324.086669921875, |
|
"logps/real": -299.44586181640625, |
|
"loss": -2.7935, |
|
"loss/gen": 0.9241290092468262, |
|
"loss/real": -3.661430597305298, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -87.79508972167969, |
|
"rewards/margins": 112.4359130859375, |
|
"rewards/real": 24.640825271606445, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 19246.478742876578, |
|
"learning_rate": 4.7044159544159537e-07, |
|
"logits/generated": -2.6820337772369385, |
|
"logits/oppo_generated": -2.9427778720855713, |
|
"logits/oppo_real": -2.9869794845581055, |
|
"logits/real": -2.646888494491577, |
|
"logps/generated": -157.2148895263672, |
|
"logps/oppo_gen": -57.98387908935547, |
|
"logps/oppo_real": -299.8202209472656, |
|
"logps/real": -309.00274658203125, |
|
"loss": -58.9365, |
|
"loss/gen": 0.7235317230224609, |
|
"loss/real": -2.887176990509033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -99.23101806640625, |
|
"rewards/margins": 90.04846954345703, |
|
"rewards/real": -9.182544708251953, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 132.2171139995689, |
|
"learning_rate": 4.7008547008547005e-07, |
|
"logits/generated": -2.256178855895996, |
|
"logits/oppo_generated": -2.462200880050659, |
|
"logits/oppo_real": -2.7382378578186035, |
|
"logits/real": -2.1479060649871826, |
|
"logps/generated": -191.7882537841797, |
|
"logps/oppo_gen": -109.31198120117188, |
|
"logps/oppo_real": -333.22021484375, |
|
"logps/real": -315.614013671875, |
|
"loss": -3.274, |
|
"loss/gen": 1.3347947597503662, |
|
"loss/real": -3.325333595275879, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -82.47628021240234, |
|
"rewards/margins": 100.08245849609375, |
|
"rewards/real": 17.606182098388672, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 74.27982684269834, |
|
"learning_rate": 4.697293447293447e-07, |
|
"logits/generated": -2.584226131439209, |
|
"logits/oppo_generated": -2.9814329147338867, |
|
"logits/oppo_real": -2.8366198539733887, |
|
"logits/real": -2.7369401454925537, |
|
"logps/generated": -231.37564086914062, |
|
"logps/oppo_gen": -117.97686767578125, |
|
"logps/oppo_real": -333.4208679199219, |
|
"logps/real": -292.571044921875, |
|
"loss": -2.9361, |
|
"loss/gen": 0.7850100994110107, |
|
"loss/real": -4.297349452972412, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -113.39878845214844, |
|
"rewards/margins": 154.24859619140625, |
|
"rewards/real": 40.84980773925781, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 62.146904934749166, |
|
"learning_rate": 4.6937321937321934e-07, |
|
"logits/generated": -2.1917073726654053, |
|
"logits/oppo_generated": -2.6781723499298096, |
|
"logits/oppo_real": -2.516916513442993, |
|
"logits/real": -2.4100053310394287, |
|
"logps/generated": -191.9784393310547, |
|
"logps/oppo_gen": -60.19814682006836, |
|
"logps/oppo_real": -262.58551025390625, |
|
"logps/real": -243.27468872070312, |
|
"loss": -2.8222, |
|
"loss/gen": 0.7389452457427979, |
|
"loss/real": -3.3546838760375977, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -131.78028869628906, |
|
"rewards/margins": 151.09109497070312, |
|
"rewards/real": 19.31081771850586, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 62.146904934749166, |
|
"learning_rate": 4.6937321937321934e-07, |
|
"logits/generated": -2.438559055328369, |
|
"logits/oppo_generated": -2.8787498474121094, |
|
"logits/oppo_real": -2.805894374847412, |
|
"logits/real": -2.6161952018737793, |
|
"logps/generated": -236.9315185546875, |
|
"logps/oppo_gen": -124.28936767578125, |
|
"logps/oppo_real": -606.1627807617188, |
|
"logps/real": -575.891845703125, |
|
"loss": -30094.6035, |
|
"loss/gen": 0.43543320894241333, |
|
"loss/real": -4.007241725921631, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -112.64216613769531, |
|
"rewards/margins": 142.91311645507812, |
|
"rewards/real": 30.27095603942871, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 78.11314597568548, |
|
"learning_rate": 4.69017094017094e-07, |
|
"logits/generated": -2.526062488555908, |
|
"logits/oppo_generated": -2.765538454055786, |
|
"logits/oppo_real": -2.839543342590332, |
|
"logits/real": -2.4912123680114746, |
|
"logps/generated": -192.7285919189453, |
|
"logps/oppo_gen": -83.72669982910156, |
|
"logps/oppo_real": -361.6756591796875, |
|
"logps/real": -346.46002197265625, |
|
"loss": -3.0693, |
|
"loss/gen": 0.47312071919441223, |
|
"loss/real": -3.3567910194396973, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -109.00190734863281, |
|
"rewards/margins": 124.21757507324219, |
|
"rewards/real": 15.215664863586426, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 54.23171224736731, |
|
"learning_rate": 4.6866096866096864e-07, |
|
"logits/generated": -2.607853889465332, |
|
"logits/oppo_generated": -2.7416014671325684, |
|
"logits/oppo_real": -2.8941569328308105, |
|
"logits/real": -2.4866786003112793, |
|
"logps/generated": -161.49307250976562, |
|
"logps/oppo_gen": -51.659912109375, |
|
"logps/oppo_real": -267.5926513671875, |
|
"logps/real": -247.1339111328125, |
|
"loss": -2.9068, |
|
"loss/gen": 0.421144962310791, |
|
"loss/real": -3.4416115283966064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -109.8331527709961, |
|
"rewards/margins": 130.2919158935547, |
|
"rewards/real": 20.458759307861328, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 72.46861187459885, |
|
"learning_rate": 4.6830484330484326e-07, |
|
"logits/generated": -2.2220163345336914, |
|
"logits/oppo_generated": -2.609920024871826, |
|
"logits/oppo_real": -2.5399818420410156, |
|
"logits/real": -2.291043758392334, |
|
"logps/generated": -210.66543579101562, |
|
"logps/oppo_gen": -81.96345520019531, |
|
"logps/oppo_real": -258.99554443359375, |
|
"logps/real": -252.50823974609375, |
|
"loss": -3.0398, |
|
"loss/gen": 0.3264577388763428, |
|
"loss/real": -3.0477218627929688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -128.70199584960938, |
|
"rewards/margins": 135.1892852783203, |
|
"rewards/real": 6.4872941970825195, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 72.46861187459885, |
|
"learning_rate": 4.6830484330484326e-07, |
|
"logits/generated": -2.443568706512451, |
|
"logits/oppo_generated": -2.89731502532959, |
|
"logits/oppo_real": -2.861166000366211, |
|
"logits/real": -2.563661575317383, |
|
"logps/generated": -180.6520538330078, |
|
"logps/oppo_gen": -61.10588073730469, |
|
"logps/oppo_real": -297.8720703125, |
|
"logps/real": -281.30902099609375, |
|
"loss": -231011.7969, |
|
"loss/gen": 0.3417486846446991, |
|
"loss/real": -3.499724864959717, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -119.54617309570312, |
|
"rewards/margins": 136.10922241210938, |
|
"rewards/real": 16.563053131103516, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 68.7764082802926, |
|
"learning_rate": 4.6794871794871794e-07, |
|
"logits/generated": -2.4255595207214355, |
|
"logits/oppo_generated": -2.8648695945739746, |
|
"logits/oppo_real": -2.711393356323242, |
|
"logits/real": -2.6028270721435547, |
|
"logps/generated": -223.78445434570312, |
|
"logps/oppo_gen": -111.59371948242188, |
|
"logps/oppo_real": -521.255859375, |
|
"logps/real": -493.6855773925781, |
|
"loss": -2.7501, |
|
"loss/gen": 0.418493390083313, |
|
"loss/real": -3.638298749923706, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -112.19073486328125, |
|
"rewards/margins": 139.76104736328125, |
|
"rewards/real": 27.570310592651367, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 60.205945382287624, |
|
"learning_rate": 4.675925925925926e-07, |
|
"logits/generated": -2.525608539581299, |
|
"logits/oppo_generated": -2.8064088821411133, |
|
"logits/oppo_real": -2.845989227294922, |
|
"logits/real": -2.5157408714294434, |
|
"logps/generated": -162.0449676513672, |
|
"logps/oppo_gen": -52.78784942626953, |
|
"logps/oppo_real": -172.55088806152344, |
|
"logps/real": -161.2114715576172, |
|
"loss": -2.904, |
|
"loss/gen": 0.4695492088794708, |
|
"loss/real": -3.171067237854004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -109.25712585449219, |
|
"rewards/margins": 120.59654235839844, |
|
"rewards/real": 11.339418411254883, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 70.03484065984587, |
|
"learning_rate": 4.672364672364672e-07, |
|
"logits/generated": -2.5758299827575684, |
|
"logits/oppo_generated": -3.0264251232147217, |
|
"logits/oppo_real": -2.836057186126709, |
|
"logits/real": -2.738698959350586, |
|
"logps/generated": -218.2009735107422, |
|
"logps/oppo_gen": -74.337158203125, |
|
"logps/oppo_real": -371.032470703125, |
|
"logps/real": -321.6204833984375, |
|
"loss": -2.9986, |
|
"loss/gen": 0.24696138501167297, |
|
"loss/real": -4.774725437164307, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -143.8638153076172, |
|
"rewards/margins": 193.27581787109375, |
|
"rewards/real": 49.41199493408203, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7817.082320149891, |
|
"learning_rate": 4.6688034188034186e-07, |
|
"logits/generated": -2.6040773391723633, |
|
"logits/oppo_generated": -2.876476764678955, |
|
"logits/oppo_real": -2.912707805633545, |
|
"logits/real": -2.6422886848449707, |
|
"logps/generated": -201.4759063720703, |
|
"logps/oppo_gen": -90.53692626953125, |
|
"logps/oppo_real": -383.74615478515625, |
|
"logps/real": -350.41131591796875, |
|
"loss": -19.0115, |
|
"loss/gen": 0.4651464819908142, |
|
"loss/real": -3.848228931427002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -110.93898010253906, |
|
"rewards/margins": 144.27383422851562, |
|
"rewards/real": 33.33485794067383, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 105.7367382346363, |
|
"learning_rate": 4.6652421652421653e-07, |
|
"logits/generated": -2.674943447113037, |
|
"logits/oppo_generated": -2.9819746017456055, |
|
"logits/oppo_real": -3.1959123611450195, |
|
"logits/real": -2.714082717895508, |
|
"logps/generated": -279.74652099609375, |
|
"logps/oppo_gen": -152.70217895507812, |
|
"logps/oppo_real": -483.54266357421875, |
|
"logps/real": -443.05084228515625, |
|
"loss": -3.1497, |
|
"loss/gen": 0.3712007403373718, |
|
"loss/real": -4.19202184677124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -127.04434204101562, |
|
"rewards/margins": 167.5361328125, |
|
"rewards/real": 40.49180603027344, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 320.13193081134534, |
|
"learning_rate": 4.6616809116809116e-07, |
|
"logits/generated": -2.6575989723205566, |
|
"logits/oppo_generated": -2.7378830909729004, |
|
"logits/oppo_real": -3.110536813735962, |
|
"logits/real": -2.483811378479004, |
|
"logps/generated": -208.11447143554688, |
|
"logps/oppo_gen": -86.0918960571289, |
|
"logps/oppo_real": -447.7939147949219, |
|
"logps/real": -449.82757568359375, |
|
"loss": -3.1025, |
|
"loss/gen": 0.3562849164009094, |
|
"loss/real": -3.004971981048584, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -122.0225830078125, |
|
"rewards/margins": 119.98893737792969, |
|
"rewards/real": -2.033646583557129, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 56.81756199946017, |
|
"learning_rate": 4.658119658119658e-07, |
|
"logits/generated": -2.8450493812561035, |
|
"logits/oppo_generated": -2.7491419315338135, |
|
"logits/oppo_real": -3.191051483154297, |
|
"logits/real": -2.4600586891174316, |
|
"logps/generated": -215.31295776367188, |
|
"logps/oppo_gen": -96.26548767089844, |
|
"logps/oppo_real": -305.7531433105469, |
|
"logps/real": -280.7630615234375, |
|
"loss": -2.8775, |
|
"loss/gen": 0.5062470436096191, |
|
"loss/real": -3.553849220275879, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -119.04747009277344, |
|
"rewards/margins": 144.03756713867188, |
|
"rewards/real": 24.990097045898438, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 56.194760202520214, |
|
"learning_rate": 4.654558404558404e-07, |
|
"logits/generated": -2.451647996902466, |
|
"logits/oppo_generated": -2.8662476539611816, |
|
"logits/oppo_real": -2.7619881629943848, |
|
"logits/real": -2.580970287322998, |
|
"logps/generated": -190.9722900390625, |
|
"logps/oppo_gen": -76.39656066894531, |
|
"logps/oppo_real": -342.36138916015625, |
|
"logps/real": -320.16766357421875, |
|
"loss": -2.8042, |
|
"loss/gen": 0.40042293071746826, |
|
"loss/real": -3.5132551193237305, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -114.57573699951172, |
|
"rewards/margins": 136.7694854736328, |
|
"rewards/real": 22.193754196166992, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 54.320377107864644, |
|
"learning_rate": 4.650997150997151e-07, |
|
"logits/generated": -2.6203999519348145, |
|
"logits/oppo_generated": -2.973456859588623, |
|
"logits/oppo_real": -2.9541869163513184, |
|
"logits/real": -2.606893539428711, |
|
"logps/generated": -158.5798797607422, |
|
"logps/oppo_gen": -58.52758026123047, |
|
"logps/oppo_real": -196.6337127685547, |
|
"logps/real": -194.17990112304688, |
|
"loss": -3.0855, |
|
"loss/gen": 0.5960197448730469, |
|
"loss/real": -3.0248513221740723, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -100.05229949951172, |
|
"rewards/margins": 102.50611877441406, |
|
"rewards/real": 2.453828811645508, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 75.8143516622595, |
|
"learning_rate": 4.6474358974358975e-07, |
|
"logits/generated": -2.4562883377075195, |
|
"logits/oppo_generated": -2.9579458236694336, |
|
"logits/oppo_real": -2.8345115184783936, |
|
"logits/real": -2.6457347869873047, |
|
"logps/generated": -204.59548950195312, |
|
"logps/oppo_gen": -86.37559509277344, |
|
"logps/oppo_real": -329.4002685546875, |
|
"logps/real": -317.261962890625, |
|
"loss": -3.0377, |
|
"loss/gen": 0.4204404056072235, |
|
"loss/real": -3.355125904083252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -118.21987915039062, |
|
"rewards/margins": 130.35821533203125, |
|
"rewards/real": 12.138343811035156, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 75.08773189056086, |
|
"learning_rate": 4.643874643874643e-07, |
|
"logits/generated": -2.1544671058654785, |
|
"logits/oppo_generated": -2.4297678470611572, |
|
"logits/oppo_real": -2.5349526405334473, |
|
"logits/real": -2.043349027633667, |
|
"logps/generated": -245.35202026367188, |
|
"logps/oppo_gen": -139.25880432128906, |
|
"logps/oppo_real": -366.9024658203125, |
|
"logps/real": -337.96356201171875, |
|
"loss": -2.9923, |
|
"loss/gen": 0.5029778480529785, |
|
"loss/real": -3.993164300918579, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -106.09322357177734, |
|
"rewards/margins": 135.03216552734375, |
|
"rewards/real": 28.938934326171875, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 576.2930027338524, |
|
"learning_rate": 4.64031339031339e-07, |
|
"logits/generated": -2.114830493927002, |
|
"logits/oppo_generated": -2.59027099609375, |
|
"logits/oppo_real": -2.5751681327819824, |
|
"logits/real": -2.2125301361083984, |
|
"logps/generated": -178.6730499267578, |
|
"logps/oppo_gen": -44.13750076293945, |
|
"logps/oppo_real": -126.39328002929688, |
|
"logps/real": -146.06198120117188, |
|
"loss": -4.0466, |
|
"loss/gen": 0.268838495016098, |
|
"loss/real": -2.595045566558838, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -134.53555297851562, |
|
"rewards/margins": 114.86683654785156, |
|
"rewards/real": -19.668712615966797, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 54.61241720782235, |
|
"learning_rate": 4.6367521367521367e-07, |
|
"logits/generated": -2.424686908721924, |
|
"logits/oppo_generated": -2.8061888217926025, |
|
"logits/oppo_real": -2.885352611541748, |
|
"logits/real": -2.449500560760498, |
|
"logps/generated": -222.48379516601562, |
|
"logps/oppo_gen": -82.9956283569336, |
|
"logps/oppo_real": -287.7582702636719, |
|
"logps/real": -284.30731201171875, |
|
"loss": -3.0876, |
|
"loss/gen": 0.32225707173347473, |
|
"loss/real": -2.8702611923217773, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -139.4881591796875, |
|
"rewards/margins": 142.93911743164062, |
|
"rewards/real": 3.450957775115967, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 85.11727319006701, |
|
"learning_rate": 4.633190883190883e-07, |
|
"logits/generated": -2.086930274963379, |
|
"logits/oppo_generated": -2.6804826259613037, |
|
"logits/oppo_real": -2.560675621032715, |
|
"logits/real": -2.307936668395996, |
|
"logps/generated": -248.29327392578125, |
|
"logps/oppo_gen": -125.20469665527344, |
|
"logps/oppo_real": -214.75454711914062, |
|
"logps/real": -237.43409729003906, |
|
"loss": -2.705, |
|
"loss/gen": 0.33005163073539734, |
|
"loss/real": -2.6607136726379395, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -123.08856201171875, |
|
"rewards/margins": 100.40898895263672, |
|
"rewards/real": -22.67957878112793, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 76.16983490857781, |
|
"learning_rate": 4.6296296296296297e-07, |
|
"logits/generated": -2.4300737380981445, |
|
"logits/oppo_generated": -2.8161306381225586, |
|
"logits/oppo_real": -2.873737096786499, |
|
"logits/real": -2.3974549770355225, |
|
"logps/generated": -211.3475341796875, |
|
"logps/oppo_gen": -39.4675178527832, |
|
"logps/oppo_real": -94.7720718383789, |
|
"logps/real": -112.44818115234375, |
|
"loss": -2.8425, |
|
"loss/gen": 0.24527525901794434, |
|
"loss/real": -2.5662083625793457, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -171.88002014160156, |
|
"rewards/margins": 154.2039031982422, |
|
"rewards/real": -17.676116943359375, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 52.66541092417774, |
|
"learning_rate": 4.626068376068376e-07, |
|
"logits/generated": -2.2791929244995117, |
|
"logits/oppo_generated": -2.754338026046753, |
|
"logits/oppo_real": -2.6611428260803223, |
|
"logits/real": -2.365473747253418, |
|
"logps/generated": -180.72125244140625, |
|
"logps/oppo_gen": -53.64311981201172, |
|
"logps/oppo_real": -189.60964965820312, |
|
"logps/real": -185.12130737304688, |
|
"loss": -2.8864, |
|
"loss/gen": 0.31177347898483276, |
|
"loss/real": -3.0672144889831543, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -127.07814025878906, |
|
"rewards/margins": 131.56646728515625, |
|
"rewards/real": 4.488343238830566, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 89.10240608160505, |
|
"learning_rate": 4.622507122507122e-07, |
|
"logits/generated": -2.525489568710327, |
|
"logits/oppo_generated": -2.8700437545776367, |
|
"logits/oppo_real": -3.012883186340332, |
|
"logits/real": -2.459331512451172, |
|
"logps/generated": -198.41644287109375, |
|
"logps/oppo_gen": -64.43563842773438, |
|
"logps/oppo_real": -366.68572998046875, |
|
"logps/real": -346.22503662109375, |
|
"loss": -3.2314, |
|
"loss/gen": 0.27841734886169434, |
|
"loss/real": -3.4703869819641113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -133.98080444335938, |
|
"rewards/margins": 154.4415283203125, |
|
"rewards/real": 20.46072006225586, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1254.2688109013952, |
|
"learning_rate": 4.618945868945869e-07, |
|
"logits/generated": -2.289478302001953, |
|
"logits/oppo_generated": -2.896176338195801, |
|
"logits/oppo_real": -2.7520911693573, |
|
"logits/real": -2.514561653137207, |
|
"logps/generated": -221.06515502929688, |
|
"logps/oppo_gen": -94.6259765625, |
|
"logps/oppo_real": -329.9571533203125, |
|
"logps/real": -310.5443115234375, |
|
"loss": -5.4989, |
|
"loss/gen": 0.3454495668411255, |
|
"loss/real": -3.5192689895629883, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -126.43919372558594, |
|
"rewards/margins": 145.85203552246094, |
|
"rewards/real": 19.412845611572266, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 49.926036602752426, |
|
"learning_rate": 4.6153846153846156e-07, |
|
"logits/generated": -2.4086711406707764, |
|
"logits/oppo_generated": -2.72526478767395, |
|
"logits/oppo_real": -2.760162591934204, |
|
"logits/real": -2.3816709518432617, |
|
"logps/generated": -189.68716430664062, |
|
"logps/oppo_gen": -70.71673583984375, |
|
"logps/oppo_real": -391.76458740234375, |
|
"logps/real": -400.5279846191406, |
|
"loss": -2.873, |
|
"loss/gen": 0.38557717204093933, |
|
"loss/real": -2.8944320678710938, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -118.97042846679688, |
|
"rewards/margins": 110.20704650878906, |
|
"rewards/real": -8.763385772705078, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 108.6453699255058, |
|
"learning_rate": 4.6118233618233613e-07, |
|
"logits/generated": -2.9071619510650635, |
|
"logits/oppo_generated": -2.979785919189453, |
|
"logits/oppo_real": -3.2641677856445312, |
|
"logits/real": -2.598475933074951, |
|
"logps/generated": -202.7903594970703, |
|
"logps/oppo_gen": -92.89317321777344, |
|
"logps/oppo_real": -330.3245849609375, |
|
"logps/real": -319.0426025390625, |
|
"loss": -3.0021, |
|
"loss/gen": 0.49392998218536377, |
|
"loss/real": -3.2759296894073486, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -109.89718627929688, |
|
"rewards/margins": 121.17916870117188, |
|
"rewards/real": 11.28197956085205, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 59.173970949148114, |
|
"learning_rate": 4.608262108262108e-07, |
|
"logits/generated": -2.1723835468292236, |
|
"logits/oppo_generated": -2.775574207305908, |
|
"logits/oppo_real": -2.598371744155884, |
|
"logits/real": -2.396592140197754, |
|
"logps/generated": -187.12681579589844, |
|
"logps/oppo_gen": -65.71693420410156, |
|
"logps/oppo_real": -220.19737243652344, |
|
"logps/real": -200.814453125, |
|
"loss": -3.0664, |
|
"loss/gen": 0.3779526352882385, |
|
"loss/real": -3.3624069690704346, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -121.40988159179688, |
|
"rewards/margins": 140.79281616210938, |
|
"rewards/real": 19.38292694091797, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 75.21973020757254, |
|
"learning_rate": 4.6047008547008543e-07, |
|
"logits/generated": -2.0699994564056396, |
|
"logits/oppo_generated": -2.6892812252044678, |
|
"logits/oppo_real": -2.527797222137451, |
|
"logits/real": -2.2784643173217773, |
|
"logps/generated": -172.57388305664062, |
|
"logps/oppo_gen": -56.507102966308594, |
|
"logps/oppo_real": -203.99942016601562, |
|
"logps/real": -220.61575317382812, |
|
"loss": -2.9974, |
|
"loss/gen": 0.3955667018890381, |
|
"loss/real": -2.5530004501342773, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -116.06678771972656, |
|
"rewards/margins": 99.45044708251953, |
|
"rewards/real": -16.6163387298584, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 69.13334884030283, |
|
"learning_rate": 4.601139601139601e-07, |
|
"logits/generated": -2.4354324340820312, |
|
"logits/oppo_generated": -2.892515182495117, |
|
"logits/oppo_real": -2.87583589553833, |
|
"logits/real": -2.509371280670166, |
|
"logps/generated": -193.70269775390625, |
|
"logps/oppo_gen": -70.63409423828125, |
|
"logps/oppo_real": -236.45480346679688, |
|
"logps/real": -236.11924743652344, |
|
"loss": -2.9182, |
|
"loss/gen": 0.3297494649887085, |
|
"loss/real": -3.017835855484009, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -123.068603515625, |
|
"rewards/margins": 123.4041748046875, |
|
"rewards/real": 0.3355722427368164, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 96.21606544647312, |
|
"learning_rate": 4.5975783475783473e-07, |
|
"logits/generated": -2.1324949264526367, |
|
"logits/oppo_generated": -2.2372124195098877, |
|
"logits/oppo_real": -2.6531500816345215, |
|
"logits/real": -1.7291717529296875, |
|
"logps/generated": -151.3114471435547, |
|
"logps/oppo_gen": -49.9699821472168, |
|
"logps/oppo_real": -257.7629699707031, |
|
"logps/real": -268.58935546875, |
|
"loss": -3.1964, |
|
"loss/gen": 1.1664835214614868, |
|
"loss/real": -2.592834234237671, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -101.34146118164062, |
|
"rewards/margins": 90.51508331298828, |
|
"rewards/real": -10.826382637023926, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1100.3644850684705, |
|
"learning_rate": 4.5940170940170935e-07, |
|
"logits/generated": -2.265676736831665, |
|
"logits/oppo_generated": -2.6594979763031006, |
|
"logits/oppo_real": -2.72336483001709, |
|
"logits/real": -2.2372395992279053, |
|
"logps/generated": -186.74288940429688, |
|
"logps/oppo_gen": -69.47285461425781, |
|
"logps/oppo_real": -203.925048828125, |
|
"logps/real": -171.45407104492188, |
|
"loss": -4.8218, |
|
"loss/gen": 0.37407732009887695, |
|
"loss/real": -3.8044371604919434, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -117.27003479003906, |
|
"rewards/margins": 149.74102783203125, |
|
"rewards/real": 32.470985412597656, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 48.85153938608921, |
|
"learning_rate": 4.59045584045584e-07, |
|
"logits/generated": -2.573637008666992, |
|
"logits/oppo_generated": -2.84741473197937, |
|
"logits/oppo_real": -2.9322423934936523, |
|
"logits/real": -2.466978073120117, |
|
"logps/generated": -193.22213745117188, |
|
"logps/oppo_gen": -72.28129577636719, |
|
"logps/oppo_real": -342.0706787109375, |
|
"logps/real": -368.0989990234375, |
|
"loss": -2.7908, |
|
"loss/gen": 0.3612110912799835, |
|
"loss/real": -2.5483293533325195, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -120.94083404541016, |
|
"rewards/margins": 94.91249084472656, |
|
"rewards/real": -26.028343200683594, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 61.90407697473366, |
|
"learning_rate": 4.586894586894587e-07, |
|
"logits/generated": -2.5092499256134033, |
|
"logits/oppo_generated": -2.8123486042022705, |
|
"logits/oppo_real": -2.9484448432922363, |
|
"logits/real": -2.4517569541931152, |
|
"logps/generated": -189.08139038085938, |
|
"logps/oppo_gen": -78.67784118652344, |
|
"logps/oppo_real": -224.94638061523438, |
|
"logps/real": -222.41128540039062, |
|
"loss": -2.8246, |
|
"loss/gen": 0.4234386086463928, |
|
"loss/real": -3.0529117584228516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -110.4035415649414, |
|
"rewards/margins": 112.93864440917969, |
|
"rewards/real": 2.5351076126098633, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 162.38187413278305, |
|
"learning_rate": 4.5833333333333327e-07, |
|
"logits/generated": -2.1803882122039795, |
|
"logits/oppo_generated": -2.6430654525756836, |
|
"logits/oppo_real": -2.7417783737182617, |
|
"logits/real": -2.243424415588379, |
|
"logps/generated": -164.8372802734375, |
|
"logps/oppo_gen": -63.871150970458984, |
|
"logps/oppo_real": -224.14703369140625, |
|
"logps/real": -182.39511108398438, |
|
"loss": -3.7083, |
|
"loss/gen": 0.7786407470703125, |
|
"loss/real": -4.343791961669922, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -100.96613311767578, |
|
"rewards/margins": 142.7180633544922, |
|
"rewards/real": 41.75192642211914, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 49.259806784569555, |
|
"learning_rate": 4.5797720797720794e-07, |
|
"logits/generated": -2.4166438579559326, |
|
"logits/oppo_generated": -2.757966995239258, |
|
"logits/oppo_real": -2.906935691833496, |
|
"logits/real": -2.3613169193267822, |
|
"logps/generated": -156.58056640625, |
|
"logps/oppo_gen": -53.980133056640625, |
|
"logps/oppo_real": -168.99293518066406, |
|
"logps/real": -158.2866973876953, |
|
"loss": -2.9404, |
|
"loss/gen": 0.9939805269241333, |
|
"loss/real": -3.2384204864501953, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -102.6004409790039, |
|
"rewards/margins": 113.30667114257812, |
|
"rewards/real": 10.706242561340332, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 49.259806784569555, |
|
"learning_rate": 4.5797720797720794e-07, |
|
"logits/generated": -2.1523754596710205, |
|
"logits/oppo_generated": -2.34848690032959, |
|
"logits/oppo_real": -2.549453020095825, |
|
"logits/real": -1.9709889888763428, |
|
"logps/generated": -172.17507934570312, |
|
"logps/oppo_gen": -41.99907684326172, |
|
"logps/oppo_real": -137.05735778808594, |
|
"logps/real": -117.31524658203125, |
|
"loss": -36278.4766, |
|
"loss/gen": 0.308816134929657, |
|
"loss/real": -3.7620303630828857, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -130.17599487304688, |
|
"rewards/margins": 149.91812133789062, |
|
"rewards/real": 19.742107391357422, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 58.361834787518674, |
|
"learning_rate": 4.576210826210826e-07, |
|
"logits/generated": -2.2917189598083496, |
|
"logits/oppo_generated": -2.5094847679138184, |
|
"logits/oppo_real": -2.6891722679138184, |
|
"logits/real": -2.1305155754089355, |
|
"logps/generated": -260.20501708984375, |
|
"logps/oppo_gen": -68.40258026123047, |
|
"logps/oppo_real": -223.42794799804688, |
|
"logps/real": -207.68116760253906, |
|
"loss": -2.9255, |
|
"loss/gen": 0.7335460186004639, |
|
"loss/real": -3.7286908626556396, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -191.8024444580078, |
|
"rewards/margins": 207.54922485351562, |
|
"rewards/real": 15.746776580810547, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 170.55447260332988, |
|
"learning_rate": 4.5726495726495724e-07, |
|
"logits/generated": -2.4084904193878174, |
|
"logits/oppo_generated": -2.8935999870300293, |
|
"logits/oppo_real": -2.775484561920166, |
|
"logits/real": -2.5147581100463867, |
|
"logps/generated": -170.41473388671875, |
|
"logps/oppo_gen": -50.93283462524414, |
|
"logps/oppo_real": -316.0002136230469, |
|
"logps/real": -287.05810546875, |
|
"loss": -3.0655, |
|
"loss/gen": 0.39951732754707336, |
|
"loss/real": -3.662087917327881, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -119.48190307617188, |
|
"rewards/margins": 148.4240264892578, |
|
"rewards/real": 28.942119598388672, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2776.5446151646834, |
|
"learning_rate": 4.569088319088319e-07, |
|
"logits/generated": -2.689021110534668, |
|
"logits/oppo_generated": -2.8526816368103027, |
|
"logits/oppo_real": -3.2386014461517334, |
|
"logits/real": -2.449385166168213, |
|
"logps/generated": -224.29080200195312, |
|
"logps/oppo_gen": -113.54923248291016, |
|
"logps/oppo_real": -351.7125549316406, |
|
"logps/real": -352.888427734375, |
|
"loss": -9.9952, |
|
"loss/gen": 0.5014157295227051, |
|
"loss/real": -2.938952922821045, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -110.7415771484375, |
|
"rewards/margins": 109.56568145751953, |
|
"rewards/real": -1.1758899688720703, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 74.08580527313414, |
|
"learning_rate": 4.5655270655270654e-07, |
|
"logits/generated": -2.553030252456665, |
|
"logits/oppo_generated": -2.9850940704345703, |
|
"logits/oppo_real": -3.0315611362457275, |
|
"logits/real": -2.5720577239990234, |
|
"logps/generated": -181.4952392578125, |
|
"logps/oppo_gen": -61.65489196777344, |
|
"logps/oppo_real": -151.10653686523438, |
|
"logps/real": -148.4203643798828, |
|
"loss": -3.289, |
|
"loss/gen": 0.38128042221069336, |
|
"loss/real": -2.9334371089935303, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -119.84036254882812, |
|
"rewards/margins": 122.52653503417969, |
|
"rewards/real": 2.686166763305664, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 63.58691225524518, |
|
"learning_rate": 4.5619658119658116e-07, |
|
"logits/generated": -2.707376480102539, |
|
"logits/oppo_generated": -2.891350746154785, |
|
"logits/oppo_real": -3.0990657806396484, |
|
"logits/real": -2.544325828552246, |
|
"logps/generated": -313.1190490722656, |
|
"logps/oppo_gen": -212.02532958984375, |
|
"logps/oppo_real": -549.8078002929688, |
|
"logps/real": -522.171142578125, |
|
"loss": -3.0226, |
|
"loss/gen": 0.6081419587135315, |
|
"loss/real": -3.838395118713379, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -101.09373474121094, |
|
"rewards/margins": 128.73036193847656, |
|
"rewards/real": 27.636632919311523, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 54.7705752933886, |
|
"learning_rate": 4.5584045584045584e-07, |
|
"logits/generated": -2.31124210357666, |
|
"logits/oppo_generated": -2.861656904220581, |
|
"logits/oppo_real": -2.749734878540039, |
|
"logits/real": -2.4704031944274902, |
|
"logps/generated": -180.6676025390625, |
|
"logps/oppo_gen": -52.08341598510742, |
|
"logps/oppo_real": -268.2560119628906, |
|
"logps/real": -232.18594360351562, |
|
"loss": -3.0632, |
|
"loss/gen": 0.29419511556625366, |
|
"loss/real": -3.9939093589782715, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -128.5841827392578, |
|
"rewards/margins": 164.6542510986328, |
|
"rewards/real": 36.070064544677734, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 96.52861966309526, |
|
"learning_rate": 4.5548433048433046e-07, |
|
"logits/generated": -2.3479509353637695, |
|
"logits/oppo_generated": -2.8331031799316406, |
|
"logits/oppo_real": -2.8462958335876465, |
|
"logits/real": -2.4217453002929688, |
|
"logps/generated": -206.61517333984375, |
|
"logps/oppo_gen": -78.92254638671875, |
|
"logps/oppo_real": -224.86373901367188, |
|
"logps/real": -216.6464385986328, |
|
"loss": -3.2511, |
|
"loss/gen": 0.3076120615005493, |
|
"loss/real": -3.089078903198242, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -127.69261169433594, |
|
"rewards/margins": 135.909912109375, |
|
"rewards/real": 8.217292785644531, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 56.16997780175812, |
|
"learning_rate": 4.551282051282051e-07, |
|
"logits/generated": -2.3228254318237305, |
|
"logits/oppo_generated": -2.879185199737549, |
|
"logits/oppo_real": -2.873112678527832, |
|
"logits/real": -2.4802536964416504, |
|
"logps/generated": -157.61184692382812, |
|
"logps/oppo_gen": -49.27460479736328, |
|
"logps/oppo_real": -375.43463134765625, |
|
"logps/real": -348.29833984375, |
|
"loss": -2.954, |
|
"loss/gen": 0.6059376001358032, |
|
"loss/real": -3.6162662506103516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -108.33724212646484, |
|
"rewards/margins": 135.4735107421875, |
|
"rewards/real": 27.136272430419922, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 47.747249184508, |
|
"learning_rate": 4.5477207977207976e-07, |
|
"logits/generated": -2.5688347816467285, |
|
"logits/oppo_generated": -3.0462043285369873, |
|
"logits/oppo_real": -3.1089582443237305, |
|
"logits/real": -2.586811065673828, |
|
"logps/generated": -204.5193634033203, |
|
"logps/oppo_gen": -77.79332733154297, |
|
"logps/oppo_real": -319.2231750488281, |
|
"logps/real": -287.8105773925781, |
|
"loss": -2.9293, |
|
"loss/gen": 0.32107144594192505, |
|
"loss/real": -3.8302066326141357, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -126.72602844238281, |
|
"rewards/margins": 158.13864135742188, |
|
"rewards/real": 31.412609100341797, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 56.68338256821835, |
|
"learning_rate": 4.544159544159544e-07, |
|
"logits/generated": -2.477287530899048, |
|
"logits/oppo_generated": -2.815687656402588, |
|
"logits/oppo_real": -2.9501237869262695, |
|
"logits/real": -2.401658058166504, |
|
"logps/generated": -221.77561950683594, |
|
"logps/oppo_gen": -103.51431274414062, |
|
"logps/oppo_real": -308.8333435058594, |
|
"logps/real": -306.259521484375, |
|
"loss": -2.9378, |
|
"loss/gen": 0.3601089119911194, |
|
"loss/real": -3.115224599838257, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -118.26129913330078, |
|
"rewards/margins": 120.83515930175781, |
|
"rewards/real": 2.5738563537597656, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 132.54113453471717, |
|
"learning_rate": 4.5405982905982905e-07, |
|
"logits/generated": -2.3259053230285645, |
|
"logits/oppo_generated": -2.779146194458008, |
|
"logits/oppo_real": -2.8336267471313477, |
|
"logits/real": -2.3255388736724854, |
|
"logps/generated": -202.4166259765625, |
|
"logps/oppo_gen": -72.71639251708984, |
|
"logps/oppo_real": -196.57557678222656, |
|
"logps/real": -192.04830932617188, |
|
"loss": -3.1315, |
|
"loss/gen": 0.3035447299480438, |
|
"loss/real": -3.032370090484619, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -129.70025634765625, |
|
"rewards/margins": 134.2274932861328, |
|
"rewards/real": 4.52725076675415, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 51.83515977656961, |
|
"learning_rate": 4.537037037037037e-07, |
|
"logits/generated": -2.5221238136291504, |
|
"logits/oppo_generated": -2.8425636291503906, |
|
"logits/oppo_real": -2.9093685150146484, |
|
"logits/real": -2.4757275581359863, |
|
"logps/generated": -224.296142578125, |
|
"logps/oppo_gen": -95.93893432617188, |
|
"logps/oppo_real": -207.11392211914062, |
|
"logps/real": -186.50677490234375, |
|
"loss": -3.0553, |
|
"loss/gen": 0.6323412656784058, |
|
"loss/real": -3.4342591762542725, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -128.35723876953125, |
|
"rewards/margins": 148.96438598632812, |
|
"rewards/real": 20.607158660888672, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 57.13147870810802, |
|
"learning_rate": 4.533475783475783e-07, |
|
"logits/generated": -2.2876157760620117, |
|
"logits/oppo_generated": -2.8224010467529297, |
|
"logits/oppo_real": -2.778409957885742, |
|
"logits/real": -2.416560649871826, |
|
"logps/generated": -224.8665313720703, |
|
"logps/oppo_gen": -88.16463470458984, |
|
"logps/oppo_real": -239.9169921875, |
|
"logps/real": -240.70309448242188, |
|
"loss": -3.0029, |
|
"loss/gen": 0.25222891569137573, |
|
"loss/real": -3.012547016143799, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -136.701904296875, |
|
"rewards/margins": 135.91583251953125, |
|
"rewards/real": -0.7860813140869141, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 89.10157585762724, |
|
"learning_rate": 4.5299145299145297e-07, |
|
"logits/generated": -2.4986462593078613, |
|
"logits/oppo_generated": -2.9657952785491943, |
|
"logits/oppo_real": -2.9425137042999268, |
|
"logits/real": -2.509366035461426, |
|
"logps/generated": -201.0703125, |
|
"logps/oppo_gen": -76.42547607421875, |
|
"logps/oppo_real": -261.8043518066406, |
|
"logps/real": -241.64723205566406, |
|
"loss": -3.1181, |
|
"loss/gen": 0.3136594295501709, |
|
"loss/real": -3.37326717376709, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -124.64483642578125, |
|
"rewards/margins": 144.8019561767578, |
|
"rewards/real": 20.157115936279297, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 59.75890689365896, |
|
"learning_rate": 4.5263532763532765e-07, |
|
"logits/generated": -1.9646540880203247, |
|
"logits/oppo_generated": -2.6656646728515625, |
|
"logits/oppo_real": -2.512063980102539, |
|
"logits/real": -2.13295316696167, |
|
"logps/generated": -146.4997100830078, |
|
"logps/oppo_gen": -61.16596603393555, |
|
"logps/oppo_real": -89.70797729492188, |
|
"logps/real": -67.80735778808594, |
|
"loss": -3.1443, |
|
"loss/gen": 0.8715238571166992, |
|
"loss/real": -3.4107680320739746, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -85.333740234375, |
|
"rewards/margins": 107.23435974121094, |
|
"rewards/real": 21.90062141418457, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 9260.308850141015, |
|
"learning_rate": 4.522792022792022e-07, |
|
"logits/generated": -2.074495792388916, |
|
"logits/oppo_generated": -2.679591655731201, |
|
"logits/oppo_real": -2.5152084827423096, |
|
"logits/real": -2.2176570892333984, |
|
"logps/generated": -301.60516357421875, |
|
"logps/oppo_gen": -134.39280700683594, |
|
"logps/oppo_real": -353.8466491699219, |
|
"logps/real": -354.19549560546875, |
|
"loss": -41.3972, |
|
"loss/gen": 0.28073543310165405, |
|
"loss/real": -2.867943525314331, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -167.2123565673828, |
|
"rewards/margins": 166.86349487304688, |
|
"rewards/real": -0.34885168075561523, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 57.11031975962729, |
|
"learning_rate": 4.519230769230769e-07, |
|
"logits/generated": -2.515873908996582, |
|
"logits/oppo_generated": -2.8852622509002686, |
|
"logits/oppo_real": -2.9888343811035156, |
|
"logits/real": -2.4470033645629883, |
|
"logps/generated": -221.6116485595703, |
|
"logps/oppo_gen": -86.57408142089844, |
|
"logps/oppo_real": -353.78594970703125, |
|
"logps/real": -337.73291015625, |
|
"loss": -3.0984, |
|
"loss/gen": 0.3061344623565674, |
|
"loss/real": -3.636414051055908, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -135.03756713867188, |
|
"rewards/margins": 151.0906219482422, |
|
"rewards/real": 16.053056716918945, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 242.2639153640096, |
|
"learning_rate": 4.5156695156695157e-07, |
|
"logits/generated": -2.5487115383148193, |
|
"logits/oppo_generated": -2.894904136657715, |
|
"logits/oppo_real": -2.8833250999450684, |
|
"logits/real": -2.4917829036712646, |
|
"logps/generated": -208.41036987304688, |
|
"logps/oppo_gen": -97.552490234375, |
|
"logps/oppo_real": -446.60357666015625, |
|
"logps/real": -420.88616943359375, |
|
"loss": -3.9814, |
|
"loss/gen": 0.4838281273841858, |
|
"loss/real": -3.5608205795288086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -110.85787963867188, |
|
"rewards/margins": 136.57525634765625, |
|
"rewards/real": 25.71738052368164, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 61.190406675725114, |
|
"learning_rate": 4.512108262108262e-07, |
|
"logits/generated": -2.471825122833252, |
|
"logits/oppo_generated": -2.9238195419311523, |
|
"logits/oppo_real": -2.928109645843506, |
|
"logits/real": -2.5521817207336426, |
|
"logps/generated": -248.10580444335938, |
|
"logps/oppo_gen": -99.34373474121094, |
|
"logps/oppo_real": -381.1275634765625, |
|
"logps/real": -371.27154541015625, |
|
"loss": -3.1159, |
|
"loss/gen": 0.34282225370407104, |
|
"loss/real": -3.2633209228515625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -148.7620849609375, |
|
"rewards/margins": 158.61810302734375, |
|
"rewards/real": 9.856016159057617, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 59.89950894519086, |
|
"learning_rate": 4.5085470085470087e-07, |
|
"logits/generated": -2.162811279296875, |
|
"logits/oppo_generated": -2.7080626487731934, |
|
"logits/oppo_real": -2.5767087936401367, |
|
"logits/real": -2.30268931388855, |
|
"logps/generated": -199.12115478515625, |
|
"logps/oppo_gen": -46.502037048339844, |
|
"logps/oppo_real": -149.05059814453125, |
|
"logps/real": -149.08099365234375, |
|
"loss": -2.8636, |
|
"loss/gen": 0.46407026052474976, |
|
"loss/real": -2.982802391052246, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -152.61912536621094, |
|
"rewards/margins": 152.58872985839844, |
|
"rewards/real": -0.030394554138183594, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 63.384206370258354, |
|
"learning_rate": 4.5049857549857543e-07, |
|
"logits/generated": -2.5746748447418213, |
|
"logits/oppo_generated": -2.9217922687530518, |
|
"logits/oppo_real": -3.0358145236968994, |
|
"logits/real": -2.5657949447631836, |
|
"logps/generated": -201.57012939453125, |
|
"logps/oppo_gen": -72.13301849365234, |
|
"logps/oppo_real": -295.51861572265625, |
|
"logps/real": -298.053955078125, |
|
"loss": -2.9233, |
|
"loss/gen": 0.28429698944091797, |
|
"loss/real": -3.108995199203491, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -129.4371337890625, |
|
"rewards/margins": 126.90178680419922, |
|
"rewards/real": -2.5353341102600098, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 94.99060911644108, |
|
"learning_rate": 4.501424501424501e-07, |
|
"logits/generated": -2.3787384033203125, |
|
"logits/oppo_generated": -2.7406344413757324, |
|
"logits/oppo_real": -2.799593925476074, |
|
"logits/real": -2.3933238983154297, |
|
"logps/generated": -222.70245361328125, |
|
"logps/oppo_gen": -102.60955810546875, |
|
"logps/oppo_real": -305.8299255371094, |
|
"logps/real": -273.30157470703125, |
|
"loss": -3.2776, |
|
"loss/gen": 0.36470329761505127, |
|
"loss/real": -3.8918604850769043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -120.0928955078125, |
|
"rewards/margins": 152.62124633789062, |
|
"rewards/real": 32.52833938598633, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 350.9167124297445, |
|
"learning_rate": 4.497863247863248e-07, |
|
"logits/generated": -2.6413869857788086, |
|
"logits/oppo_generated": -2.8220396041870117, |
|
"logits/oppo_real": -3.0663821697235107, |
|
"logits/real": -2.490924596786499, |
|
"logps/generated": -178.68812561035156, |
|
"logps/oppo_gen": -80.95722961425781, |
|
"logps/oppo_real": -339.0364074707031, |
|
"logps/real": -321.1693115234375, |
|
"loss": -3.7641, |
|
"loss/gen": 0.9792780876159668, |
|
"loss/real": -3.2799935340881348, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -97.73089599609375, |
|
"rewards/margins": 115.59801483154297, |
|
"rewards/real": 17.867115020751953, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 568.1687689410603, |
|
"learning_rate": 4.494301994301994e-07, |
|
"logits/generated": -2.53959321975708, |
|
"logits/oppo_generated": -2.8528313636779785, |
|
"logits/oppo_real": -2.9469070434570312, |
|
"logits/real": -2.4770290851593018, |
|
"logps/generated": -186.40338134765625, |
|
"logps/oppo_gen": -55.95906066894531, |
|
"logps/oppo_real": -228.37322998046875, |
|
"logps/real": -216.5171661376953, |
|
"loss": -4.4415, |
|
"loss/gen": 0.2824003994464874, |
|
"loss/real": -3.352588653564453, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -130.44430541992188, |
|
"rewards/margins": 142.30039978027344, |
|
"rewards/real": 11.856078147888184, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 82.08388787354983, |
|
"learning_rate": 4.4907407407407403e-07, |
|
"logits/generated": -2.3993959426879883, |
|
"logits/oppo_generated": -2.759657859802246, |
|
"logits/oppo_real": -2.7739434242248535, |
|
"logits/real": -2.4300918579101562, |
|
"logps/generated": -195.13946533203125, |
|
"logps/oppo_gen": -55.900001525878906, |
|
"logps/oppo_real": -240.51673889160156, |
|
"logps/real": -254.91921997070312, |
|
"loss": -3.0531, |
|
"loss/gen": 0.31808772683143616, |
|
"loss/real": -2.974119186401367, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -139.23947143554688, |
|
"rewards/margins": 124.83699035644531, |
|
"rewards/real": -14.402481079101562, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 59.35182524704906, |
|
"learning_rate": 4.487179487179487e-07, |
|
"logits/generated": -2.422110080718994, |
|
"logits/oppo_generated": -2.714049816131592, |
|
"logits/oppo_real": -2.821863889694214, |
|
"logits/real": -2.3553073406219482, |
|
"logps/generated": -191.3336181640625, |
|
"logps/oppo_gen": -61.66150665283203, |
|
"logps/oppo_real": -281.81561279296875, |
|
"logps/real": -268.0882873535156, |
|
"loss": -2.9157, |
|
"loss/gen": 0.2871388792991638, |
|
"loss/real": -3.4819259643554688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -129.67208862304688, |
|
"rewards/margins": 143.39939880371094, |
|
"rewards/real": 13.727313995361328, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 61.67208283154745, |
|
"learning_rate": 4.4836182336182333e-07, |
|
"logits/generated": -2.2326247692108154, |
|
"logits/oppo_generated": -2.7336645126342773, |
|
"logits/oppo_real": -2.6636435985565186, |
|
"logits/real": -2.38464617729187, |
|
"logps/generated": -191.60995483398438, |
|
"logps/oppo_gen": -66.04891204833984, |
|
"logps/oppo_real": -343.6158447265625, |
|
"logps/real": -311.3708801269531, |
|
"loss": -3.0524, |
|
"loss/gen": 0.30880045890808105, |
|
"loss/real": -4.188716888427734, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -125.56105041503906, |
|
"rewards/margins": 157.8060302734375, |
|
"rewards/real": 32.2449836730957, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 79.39509465129127, |
|
"learning_rate": 4.48005698005698e-07, |
|
"logits/generated": -2.459395408630371, |
|
"logits/oppo_generated": -3.0542874336242676, |
|
"logits/oppo_real": -2.803119659423828, |
|
"logits/real": -2.7458314895629883, |
|
"logps/generated": -202.55088806152344, |
|
"logps/oppo_gen": -81.553955078125, |
|
"logps/oppo_real": -376.17071533203125, |
|
"logps/real": -320.265869140625, |
|
"loss": -3.204, |
|
"loss/gen": 0.3818941116333008, |
|
"loss/real": -5.556370735168457, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -120.99693298339844, |
|
"rewards/margins": 176.90176391601562, |
|
"rewards/real": 55.90484619140625, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 64.34859516119019, |
|
"learning_rate": 4.476495726495726e-07, |
|
"logits/generated": -2.551626205444336, |
|
"logits/oppo_generated": -2.791293144226074, |
|
"logits/oppo_real": -2.8689441680908203, |
|
"logits/real": -2.4949615001678467, |
|
"logps/generated": -214.268310546875, |
|
"logps/oppo_gen": -90.10079956054688, |
|
"logps/oppo_real": -387.6597900390625, |
|
"logps/real": -355.855712890625, |
|
"loss": -2.9635, |
|
"loss/gen": 0.7095820903778076, |
|
"loss/real": -3.8604226112365723, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -124.16752624511719, |
|
"rewards/margins": 155.97164916992188, |
|
"rewards/real": 31.804113388061523, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 55.47510967063292, |
|
"learning_rate": 4.4729344729344725e-07, |
|
"logits/generated": -2.5076422691345215, |
|
"logits/oppo_generated": -2.8356850147247314, |
|
"logits/oppo_real": -2.917833089828491, |
|
"logits/real": -2.4860076904296875, |
|
"logps/generated": -208.80723571777344, |
|
"logps/oppo_gen": -76.40264892578125, |
|
"logps/oppo_real": -278.172607421875, |
|
"logps/real": -251.12782287597656, |
|
"loss": -3.1359, |
|
"loss/gen": 0.3080252408981323, |
|
"loss/real": -3.7138681411743164, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -132.4045867919922, |
|
"rewards/margins": 159.44937133789062, |
|
"rewards/real": 27.04478645324707, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 55.47510967063292, |
|
"learning_rate": 4.4729344729344725e-07, |
|
"logits/generated": -2.7056777477264404, |
|
"logits/oppo_generated": -3.0011539459228516, |
|
"logits/oppo_real": -3.069876194000244, |
|
"logits/real": -2.696037530899048, |
|
"logps/generated": -191.9580078125, |
|
"logps/oppo_gen": -69.13575744628906, |
|
"logps/oppo_real": -340.70343017578125, |
|
"logps/real": -328.8914794921875, |
|
"loss": -1322.4869, |
|
"loss/gen": 0.4668487310409546, |
|
"loss/real": -3.3937647342681885, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -122.82225036621094, |
|
"rewards/margins": 134.6342010498047, |
|
"rewards/real": 11.811951637268066, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 76.39824855854027, |
|
"learning_rate": 4.469373219373219e-07, |
|
"logits/generated": -2.6483988761901855, |
|
"logits/oppo_generated": -2.821411609649658, |
|
"logits/oppo_real": -2.9697532653808594, |
|
"logits/real": -2.5232529640197754, |
|
"logps/generated": -208.79844665527344, |
|
"logps/oppo_gen": -94.25292205810547, |
|
"logps/oppo_real": -449.1705322265625, |
|
"logps/real": -411.73590087890625, |
|
"loss": -3.1662, |
|
"loss/gen": 0.6144514083862305, |
|
"loss/real": -4.066596984863281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -114.54552459716797, |
|
"rewards/margins": 151.98013305664062, |
|
"rewards/real": 37.434608459472656, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 80.08028198314489, |
|
"learning_rate": 4.465811965811966e-07, |
|
"logits/generated": -2.521160125732422, |
|
"logits/oppo_generated": -2.9498441219329834, |
|
"logits/oppo_real": -2.889374017715454, |
|
"logits/real": -2.6443803310394287, |
|
"logps/generated": -218.7113037109375, |
|
"logps/oppo_gen": -93.28401184082031, |
|
"logps/oppo_real": -446.9027099609375, |
|
"logps/real": -420.687744140625, |
|
"loss": -3.1837, |
|
"loss/gen": 0.3799129128456116, |
|
"loss/real": -3.5870652198791504, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -125.42729187011719, |
|
"rewards/margins": 151.6422119140625, |
|
"rewards/real": 26.214933395385742, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 85.4263034452065, |
|
"learning_rate": 4.4622507122507117e-07, |
|
"logits/generated": -2.0849194526672363, |
|
"logits/oppo_generated": -2.5877699851989746, |
|
"logits/oppo_real": -2.4145617485046387, |
|
"logits/real": -2.3253278732299805, |
|
"logps/generated": -175.31253051757812, |
|
"logps/oppo_gen": -58.147544860839844, |
|
"logps/oppo_real": -256.63494873046875, |
|
"logps/real": -251.91702270507812, |
|
"loss": -3.1424, |
|
"loss/gen": 0.633264422416687, |
|
"loss/real": -2.9826745986938477, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -117.16497802734375, |
|
"rewards/margins": 121.88292694091797, |
|
"rewards/real": 4.7179460525512695, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 61.56183771851885, |
|
"learning_rate": 4.4586894586894584e-07, |
|
"logits/generated": -2.5706653594970703, |
|
"logits/oppo_generated": -2.825096607208252, |
|
"logits/oppo_real": -2.919394016265869, |
|
"logits/real": -2.5202863216400146, |
|
"logps/generated": -173.32174682617188, |
|
"logps/oppo_gen": -62.71122360229492, |
|
"logps/oppo_real": -234.44354248046875, |
|
"logps/real": -211.79794311523438, |
|
"loss": -3.0867, |
|
"loss/gen": 0.7330012321472168, |
|
"loss/real": -3.4876270294189453, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -110.61052703857422, |
|
"rewards/margins": 133.25613403320312, |
|
"rewards/real": 22.645606994628906, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 55.0575072225623, |
|
"learning_rate": 4.455128205128205e-07, |
|
"logits/generated": -2.589749336242676, |
|
"logits/oppo_generated": -2.681910276412964, |
|
"logits/oppo_real": -2.8930723667144775, |
|
"logits/real": -2.393826484680176, |
|
"logps/generated": -184.09539794921875, |
|
"logps/oppo_gen": -69.35714721679688, |
|
"logps/oppo_real": -321.68878173828125, |
|
"logps/real": -300.22052001953125, |
|
"loss": -2.9885, |
|
"loss/gen": 0.6509556770324707, |
|
"loss/real": -3.5244717597961426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -114.73826599121094, |
|
"rewards/margins": 136.20651245117188, |
|
"rewards/real": 21.46826171875, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 205.9144144875487, |
|
"learning_rate": 4.4515669515669514e-07, |
|
"logits/generated": -2.5176801681518555, |
|
"logits/oppo_generated": -2.910146951675415, |
|
"logits/oppo_real": -2.842686653137207, |
|
"logits/real": -2.6160125732421875, |
|
"logps/generated": -192.1990966796875, |
|
"logps/oppo_gen": -55.29602813720703, |
|
"logps/oppo_real": -188.457763671875, |
|
"logps/real": -170.13516235351562, |
|
"loss": -3.8694, |
|
"loss/gen": 0.29164981842041016, |
|
"loss/real": -3.5206615924835205, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -136.903076171875, |
|
"rewards/margins": 155.22567749023438, |
|
"rewards/real": 18.32259750366211, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 72.13692086277665, |
|
"learning_rate": 4.448005698005698e-07, |
|
"logits/generated": -2.6976194381713867, |
|
"logits/oppo_generated": -2.9482345581054688, |
|
"logits/oppo_real": -3.0109448432922363, |
|
"logits/real": -2.652653455734253, |
|
"logps/generated": -196.82077026367188, |
|
"logps/oppo_gen": -70.6409912109375, |
|
"logps/oppo_real": -375.189697265625, |
|
"logps/real": -351.4737243652344, |
|
"loss": -3.0215, |
|
"loss/gen": 0.35152187943458557, |
|
"loss/real": -3.49497127532959, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -126.1797866821289, |
|
"rewards/margins": 149.89573669433594, |
|
"rewards/real": 23.7159423828125, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 61.61398545074811, |
|
"learning_rate": 4.444444444444444e-07, |
|
"logits/generated": -2.5743110179901123, |
|
"logits/oppo_generated": -2.7811834812164307, |
|
"logits/oppo_real": -2.923962116241455, |
|
"logits/real": -2.454921245574951, |
|
"logps/generated": -196.2818603515625, |
|
"logps/oppo_gen": -71.71026611328125, |
|
"logps/oppo_real": -353.846923828125, |
|
"logps/real": -345.35919189453125, |
|
"loss": -3.0779, |
|
"loss/gen": 0.32238900661468506, |
|
"loss/real": -3.2966091632843018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -124.57159423828125, |
|
"rewards/margins": 133.05931091308594, |
|
"rewards/real": 8.487724304199219, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 57.15490447432092, |
|
"learning_rate": 4.4408831908831906e-07, |
|
"logits/generated": -2.782914161682129, |
|
"logits/oppo_generated": -2.8043360710144043, |
|
"logits/oppo_real": -3.0211949348449707, |
|
"logits/real": -2.5231986045837402, |
|
"logps/generated": -194.0042724609375, |
|
"logps/oppo_gen": -77.71004486083984, |
|
"logps/oppo_real": -389.77301025390625, |
|
"logps/real": -358.2027282714844, |
|
"loss": -3.0323, |
|
"loss/gen": 0.423714816570282, |
|
"loss/real": -3.7564921379089355, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -116.29424285888672, |
|
"rewards/margins": 147.8645477294922, |
|
"rewards/real": 31.570310592651367, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 59.74080162590932, |
|
"learning_rate": 4.4373219373219373e-07, |
|
"logits/generated": -2.357008934020996, |
|
"logits/oppo_generated": -2.7760987281799316, |
|
"logits/oppo_real": -2.740163803100586, |
|
"logits/real": -2.439347505569458, |
|
"logps/generated": -205.67852783203125, |
|
"logps/oppo_gen": -88.69313049316406, |
|
"logps/oppo_real": -338.8006591796875, |
|
"logps/real": -314.58441162109375, |
|
"loss": -3.0479, |
|
"loss/gen": 0.4672941565513611, |
|
"loss/real": -3.506350040435791, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -116.98541259765625, |
|
"rewards/margins": 141.20166015625, |
|
"rewards/real": 24.21624755859375, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1848.1626687064102, |
|
"learning_rate": 4.4337606837606836e-07, |
|
"logits/generated": -2.4638514518737793, |
|
"logits/oppo_generated": -2.7127938270568848, |
|
"logits/oppo_real": -2.803234577178955, |
|
"logits/real": -2.38244366645813, |
|
"logps/generated": -237.40353393554688, |
|
"logps/oppo_gen": -85.75541687011719, |
|
"logps/oppo_real": -242.4071807861328, |
|
"logps/real": -237.742431640625, |
|
"loss": -5.9552, |
|
"loss/gen": 0.2063872367143631, |
|
"loss/real": -3.0251030921936035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -151.64813232421875, |
|
"rewards/margins": 156.31288146972656, |
|
"rewards/real": 4.664756774902344, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 400.5685997741211, |
|
"learning_rate": 4.43019943019943e-07, |
|
"logits/generated": -2.5298070907592773, |
|
"logits/oppo_generated": -2.995426654815674, |
|
"logits/oppo_real": -2.8803281784057617, |
|
"logits/real": -2.6857643127441406, |
|
"logps/generated": -184.82400512695312, |
|
"logps/oppo_gen": -68.82854461669922, |
|
"logps/oppo_real": -337.844482421875, |
|
"logps/real": -296.872314453125, |
|
"loss": -4.4013, |
|
"loss/gen": 0.568313717842102, |
|
"loss/real": -4.289045333862305, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -115.99545288085938, |
|
"rewards/margins": 156.96762084960938, |
|
"rewards/real": 40.97218322753906, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 40892.68061646241, |
|
"learning_rate": 4.4266381766381765e-07, |
|
"logits/generated": -2.705117702484131, |
|
"logits/oppo_generated": -2.6126418113708496, |
|
"logits/oppo_real": -3.0222294330596924, |
|
"logits/real": -2.3141441345214844, |
|
"logps/generated": -171.36810302734375, |
|
"logps/oppo_gen": -56.36054992675781, |
|
"logps/oppo_real": -325.3075256347656, |
|
"logps/real": -308.48114013671875, |
|
"loss": -129.61, |
|
"loss/gen": 0.39068758487701416, |
|
"loss/real": -3.74429988861084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -115.00755310058594, |
|
"rewards/margins": 131.83392333984375, |
|
"rewards/real": 16.826370239257812, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3590.9546084572, |
|
"learning_rate": 4.423076923076923e-07, |
|
"logits/generated": -2.696446418762207, |
|
"logits/oppo_generated": -3.026592254638672, |
|
"logits/oppo_real": -2.9974026679992676, |
|
"logits/real": -2.68188214302063, |
|
"logps/generated": -209.5288543701172, |
|
"logps/oppo_gen": -81.62860107421875, |
|
"logps/oppo_real": -354.01513671875, |
|
"logps/real": -335.030517578125, |
|
"loss": -13.5441, |
|
"loss/gen": 0.3259393572807312, |
|
"loss/real": -3.325887680053711, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -127.90025329589844, |
|
"rewards/margins": 146.8848876953125, |
|
"rewards/real": 18.9846248626709, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 62.96425038203723, |
|
"learning_rate": 4.4195156695156695e-07, |
|
"logits/generated": -2.4695000648498535, |
|
"logits/oppo_generated": -2.86299991607666, |
|
"logits/oppo_real": -2.897392749786377, |
|
"logits/real": -2.52815580368042, |
|
"logps/generated": -159.04405212402344, |
|
"logps/oppo_gen": -55.654396057128906, |
|
"logps/oppo_real": -286.4037170410156, |
|
"logps/real": -273.60174560546875, |
|
"loss": -3.259, |
|
"loss/gen": 0.6177021265029907, |
|
"loss/real": -3.30937123298645, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -103.38966369628906, |
|
"rewards/margins": 116.19161224365234, |
|
"rewards/real": 12.801952362060547, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 57.797468485057806, |
|
"learning_rate": 4.4159544159544157e-07, |
|
"logits/generated": -2.4351806640625, |
|
"logits/oppo_generated": -2.8678367137908936, |
|
"logits/oppo_real": -2.797013759613037, |
|
"logits/real": -2.479971408843994, |
|
"logps/generated": -265.5663757324219, |
|
"logps/oppo_gen": -154.916748046875, |
|
"logps/oppo_real": -268.4582824707031, |
|
"logps/real": -245.14251708984375, |
|
"loss": -3.1633, |
|
"loss/gen": 0.5528236627578735, |
|
"loss/real": -3.513732433319092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -110.6496353149414, |
|
"rewards/margins": 133.96539306640625, |
|
"rewards/real": 23.315759658813477, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 80.14004161173895, |
|
"learning_rate": 4.412393162393162e-07, |
|
"logits/generated": -2.6762161254882812, |
|
"logits/oppo_generated": -2.879833221435547, |
|
"logits/oppo_real": -3.0112786293029785, |
|
"logits/real": -2.480961799621582, |
|
"logps/generated": -193.92112731933594, |
|
"logps/oppo_gen": -96.10844421386719, |
|
"logps/oppo_real": -492.59039306640625, |
|
"logps/real": -480.1392517089844, |
|
"loss": -2.9444, |
|
"loss/gen": 1.0747777223587036, |
|
"loss/real": -3.2505507469177246, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -97.81267547607422, |
|
"rewards/margins": 110.26382446289062, |
|
"rewards/real": 12.45114803314209, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 27165.010436221077, |
|
"learning_rate": 4.4088319088319087e-07, |
|
"logits/generated": -2.661689281463623, |
|
"logits/oppo_generated": -2.855457305908203, |
|
"logits/oppo_real": -3.161579132080078, |
|
"logits/real": -2.475346565246582, |
|
"logps/generated": -173.70448303222656, |
|
"logps/oppo_gen": -79.04156494140625, |
|
"logps/oppo_real": -508.73779296875, |
|
"logps/real": -485.790283203125, |
|
"loss": -63.2568, |
|
"loss/gen": 0.8462377786636353, |
|
"loss/real": -3.7985730171203613, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -94.66291809082031, |
|
"rewards/margins": 117.61046600341797, |
|
"rewards/real": 22.947547912597656, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 79.71609474703807, |
|
"learning_rate": 4.4052706552706555e-07, |
|
"logits/generated": -2.6024856567382812, |
|
"logits/oppo_generated": -2.8270015716552734, |
|
"logits/oppo_real": -2.9884450435638428, |
|
"logits/real": -2.381761074066162, |
|
"logps/generated": -193.1739959716797, |
|
"logps/oppo_gen": -79.96229553222656, |
|
"logps/oppo_real": -295.296630859375, |
|
"logps/real": -280.0252685546875, |
|
"loss": -3.1061, |
|
"loss/gen": 0.8399382829666138, |
|
"loss/real": -3.309168815612793, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -113.2116928100586, |
|
"rewards/margins": 128.48306274414062, |
|
"rewards/real": 15.27135944366455, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 117.95946155486003, |
|
"learning_rate": 4.4017094017094017e-07, |
|
"logits/generated": -2.274564266204834, |
|
"logits/oppo_generated": -2.7040886878967285, |
|
"logits/oppo_real": -2.816561698913574, |
|
"logits/real": -2.2658865451812744, |
|
"logps/generated": -187.32923889160156, |
|
"logps/oppo_gen": -55.71031188964844, |
|
"logps/oppo_real": -202.95962524414062, |
|
"logps/real": -166.00265502929688, |
|
"loss": -3.2002, |
|
"loss/gen": 0.33614322543144226, |
|
"loss/real": -4.0761213302612305, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -131.61892700195312, |
|
"rewards/margins": 168.57589721679688, |
|
"rewards/real": 36.956966400146484, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 88.19558620351276, |
|
"learning_rate": 4.398148148148148e-07, |
|
"logits/generated": -1.9364006519317627, |
|
"logits/oppo_generated": -2.385345458984375, |
|
"logits/oppo_real": -2.4835422039031982, |
|
"logits/real": -1.808083415031433, |
|
"logps/generated": -179.90750122070312, |
|
"logps/oppo_gen": -75.58077239990234, |
|
"logps/oppo_real": -339.3034973144531, |
|
"logps/real": -272.0712585449219, |
|
"loss": -3.2605, |
|
"loss/gen": 0.8947268724441528, |
|
"loss/real": -8.812257766723633, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -104.32673645019531, |
|
"rewards/margins": 171.55897521972656, |
|
"rewards/real": 67.23223876953125, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 5908.862834759325, |
|
"learning_rate": 4.394586894586894e-07, |
|
"logits/generated": -2.601797580718994, |
|
"logits/oppo_generated": -3.011491060256958, |
|
"logits/oppo_real": -3.0487937927246094, |
|
"logits/real": -2.562220573425293, |
|
"logps/generated": -246.65899658203125, |
|
"logps/oppo_gen": -131.22396850585938, |
|
"logps/oppo_real": -400.33868408203125, |
|
"logps/real": -376.3664245605469, |
|
"loss": -13.8663, |
|
"loss/gen": 0.42373475432395935, |
|
"loss/real": -3.5093624591827393, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -115.43504333496094, |
|
"rewards/margins": 139.40728759765625, |
|
"rewards/real": 23.972253799438477, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 95.4678242590141, |
|
"learning_rate": 4.391025641025641e-07, |
|
"logits/generated": -2.3840436935424805, |
|
"logits/oppo_generated": -2.755108118057251, |
|
"logits/oppo_real": -2.8694067001342773, |
|
"logits/real": -2.2530529499053955, |
|
"logps/generated": -177.37356567382812, |
|
"logps/oppo_gen": -61.73572540283203, |
|
"logps/oppo_real": -230.838134765625, |
|
"logps/real": -215.96424865722656, |
|
"loss": -3.0607, |
|
"loss/gen": 0.6986711025238037, |
|
"loss/real": -3.6220858097076416, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -115.63784790039062, |
|
"rewards/margins": 130.51173400878906, |
|
"rewards/real": 14.873891830444336, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 87.51268602233479, |
|
"learning_rate": 4.3874643874643876e-07, |
|
"logits/generated": -2.3197760581970215, |
|
"logits/oppo_generated": -2.8574419021606445, |
|
"logits/oppo_real": -2.923137903213501, |
|
"logits/real": -2.3333818912506104, |
|
"logps/generated": -231.46023559570312, |
|
"logps/oppo_gen": -82.77210998535156, |
|
"logps/oppo_real": -252.58892822265625, |
|
"logps/real": -270.0004577636719, |
|
"loss": -2.98, |
|
"loss/gen": 0.21608535945415497, |
|
"loss/real": -2.6946630477905273, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -148.6881103515625, |
|
"rewards/margins": 131.2765655517578, |
|
"rewards/real": -17.411537170410156, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 87.51268602233479, |
|
"learning_rate": 4.3874643874643876e-07, |
|
"logits/generated": -2.242218017578125, |
|
"logits/oppo_generated": -2.994565010070801, |
|
"logits/oppo_real": -2.8149280548095703, |
|
"logits/real": -2.4609484672546387, |
|
"logps/generated": -181.10708618164062, |
|
"logps/oppo_gen": -48.2861213684082, |
|
"logps/oppo_real": -137.37625122070312, |
|
"logps/real": -162.7267608642578, |
|
"loss": -7932.2959, |
|
"loss/gen": 0.7526332139968872, |
|
"loss/real": -2.542538642883301, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -132.82098388671875, |
|
"rewards/margins": 107.47045135498047, |
|
"rewards/real": -25.350521087646484, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 67.6441819490967, |
|
"learning_rate": 4.3839031339031333e-07, |
|
"logits/generated": -2.315286636352539, |
|
"logits/oppo_generated": -2.816603422164917, |
|
"logits/oppo_real": -2.9343314170837402, |
|
"logits/real": -2.3111538887023926, |
|
"logps/generated": -155.87979125976562, |
|
"logps/oppo_gen": -30.44548988342285, |
|
"logps/oppo_real": -174.9966278076172, |
|
"logps/real": -169.0306854248047, |
|
"loss": -2.8919, |
|
"loss/gen": 0.31436973810195923, |
|
"loss/real": -3.049879550933838, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -125.43431091308594, |
|
"rewards/margins": 131.40023803710938, |
|
"rewards/real": 5.965947151184082, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 61.01237787454485, |
|
"learning_rate": 4.38034188034188e-07, |
|
"logits/generated": -2.3058667182922363, |
|
"logits/oppo_generated": -2.6415185928344727, |
|
"logits/oppo_real": -3.0115818977355957, |
|
"logits/real": -2.004304885864258, |
|
"logps/generated": -215.7899627685547, |
|
"logps/oppo_gen": -93.466064453125, |
|
"logps/oppo_real": -340.529296875, |
|
"logps/real": -337.6749572753906, |
|
"loss": -3.1087, |
|
"loss/gen": 0.5611802339553833, |
|
"loss/real": -2.9437613487243652, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -122.32388305664062, |
|
"rewards/margins": 125.1782455444336, |
|
"rewards/real": 2.8543548583984375, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 68.83477018148241, |
|
"learning_rate": 4.376780626780627e-07, |
|
"logits/generated": -2.3567757606506348, |
|
"logits/oppo_generated": -2.7984108924865723, |
|
"logits/oppo_real": -2.9754528999328613, |
|
"logits/real": -2.275757312774658, |
|
"logps/generated": -208.89105224609375, |
|
"logps/oppo_gen": -69.67858123779297, |
|
"logps/oppo_real": -268.7974853515625, |
|
"logps/real": -266.00445556640625, |
|
"loss": -3.1351, |
|
"loss/gen": 0.24598746001720428, |
|
"loss/real": -3.0755763053894043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -139.21246337890625, |
|
"rewards/margins": 142.00546264648438, |
|
"rewards/real": 2.7930030822753906, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 103.64887767723296, |
|
"learning_rate": 4.373219373219373e-07, |
|
"logits/generated": -2.2009589672088623, |
|
"logits/oppo_generated": -2.7994847297668457, |
|
"logits/oppo_real": -2.687981605529785, |
|
"logits/real": -2.264253616333008, |
|
"logps/generated": -200.60202026367188, |
|
"logps/oppo_gen": -76.17577362060547, |
|
"logps/oppo_real": -381.5020751953125, |
|
"logps/real": -350.08245849609375, |
|
"loss": -3.0609, |
|
"loss/gen": 0.34163713455200195, |
|
"loss/real": -4.447661399841309, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -124.42623138427734, |
|
"rewards/margins": 155.8458251953125, |
|
"rewards/real": 31.419601440429688, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 4236.734355609282, |
|
"learning_rate": 4.3696581196581193e-07, |
|
"logits/generated": -2.313900947570801, |
|
"logits/oppo_generated": -2.8429031372070312, |
|
"logits/oppo_real": -3.0224597454071045, |
|
"logits/real": -2.2679154872894287, |
|
"logps/generated": -197.47596740722656, |
|
"logps/oppo_gen": -78.5534439086914, |
|
"logps/oppo_real": -246.5026397705078, |
|
"logps/real": -232.70751953125, |
|
"loss": -7.8903, |
|
"loss/gen": 0.3893076479434967, |
|
"loss/real": -3.408906936645508, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -118.92252349853516, |
|
"rewards/margins": 132.71763610839844, |
|
"rewards/real": 13.795119285583496, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 65.91049876655653, |
|
"learning_rate": 4.366096866096866e-07, |
|
"logits/generated": -2.122530221939087, |
|
"logits/oppo_generated": -2.5529236793518066, |
|
"logits/oppo_real": -2.7146146297454834, |
|
"logits/real": -1.90482759475708, |
|
"logps/generated": -217.27114868164062, |
|
"logps/oppo_gen": -79.70944213867188, |
|
"logps/oppo_real": -106.01055145263672, |
|
"logps/real": -128.87461853027344, |
|
"loss": -3.0589, |
|
"loss/gen": 0.2452090084552765, |
|
"loss/real": -2.452601909637451, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -137.56170654296875, |
|
"rewards/margins": 114.6976547241211, |
|
"rewards/real": -22.864065170288086, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 100.04292047666341, |
|
"learning_rate": 4.362535612535612e-07, |
|
"logits/generated": -1.987313985824585, |
|
"logits/oppo_generated": -2.5894346237182617, |
|
"logits/oppo_real": -2.6849865913391113, |
|
"logits/real": -2.029129981994629, |
|
"logps/generated": -221.10101318359375, |
|
"logps/oppo_gen": -67.09019470214844, |
|
"logps/oppo_real": -256.4427185058594, |
|
"logps/real": -237.23843383789062, |
|
"loss": -3.213, |
|
"loss/gen": 0.4131355285644531, |
|
"loss/real": -3.562318801879883, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -154.0108184814453, |
|
"rewards/margins": 173.215087890625, |
|
"rewards/real": 19.204273223876953, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 79.75284391970766, |
|
"learning_rate": 4.358974358974359e-07, |
|
"logits/generated": -2.341341495513916, |
|
"logits/oppo_generated": -2.959817886352539, |
|
"logits/oppo_real": -2.9362192153930664, |
|
"logits/real": -2.416731119155884, |
|
"logps/generated": -216.04949951171875, |
|
"logps/oppo_gen": -82.48292541503906, |
|
"logps/oppo_real": -458.88818359375, |
|
"logps/real": -485.8726806640625, |
|
"loss": -2.8748, |
|
"loss/gen": 0.28640565276145935, |
|
"loss/real": -2.7164149284362793, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -133.5665740966797, |
|
"rewards/margins": 106.58207702636719, |
|
"rewards/real": -26.9844970703125, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 263.329314974615, |
|
"learning_rate": 4.355413105413105e-07, |
|
"logits/generated": -2.236466884613037, |
|
"logits/oppo_generated": -2.7284858226776123, |
|
"logits/oppo_real": -2.8326492309570312, |
|
"logits/real": -2.176626205444336, |
|
"logps/generated": -187.85845947265625, |
|
"logps/oppo_gen": -60.89936828613281, |
|
"logps/oppo_real": -245.58233642578125, |
|
"logps/real": -235.84445190429688, |
|
"loss": -3.2515, |
|
"loss/gen": 0.3334371745586395, |
|
"loss/real": -3.059086322784424, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -126.9590835571289, |
|
"rewards/margins": 136.69699096679688, |
|
"rewards/real": 9.737905502319336, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 130.99742811168088, |
|
"learning_rate": 4.3518518518518514e-07, |
|
"logits/generated": -2.2553353309631348, |
|
"logits/oppo_generated": -2.884782075881958, |
|
"logits/oppo_real": -3.007986545562744, |
|
"logits/real": -2.3814938068389893, |
|
"logps/generated": -198.6071319580078, |
|
"logps/oppo_gen": -64.29571533203125, |
|
"logps/oppo_real": -445.2386169433594, |
|
"logps/real": -404.5937805175781, |
|
"loss": -3.0983, |
|
"loss/gen": 0.26874154806137085, |
|
"loss/real": -4.306643962860107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -134.31141662597656, |
|
"rewards/margins": 174.95623779296875, |
|
"rewards/real": 40.64482498168945, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 70.3636098964248, |
|
"learning_rate": 4.348290598290598e-07, |
|
"logits/generated": -2.1750454902648926, |
|
"logits/oppo_generated": -2.8430304527282715, |
|
"logits/oppo_real": -2.873483657836914, |
|
"logits/real": -2.3181915283203125, |
|
"logps/generated": -196.6973876953125, |
|
"logps/oppo_gen": -68.79239654541016, |
|
"logps/oppo_real": -391.89910888671875, |
|
"logps/real": -368.2919616699219, |
|
"loss": -2.8684, |
|
"loss/gen": 0.313241183757782, |
|
"loss/real": -3.5294508934020996, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -127.90498352050781, |
|
"rewards/margins": 151.51217651367188, |
|
"rewards/real": 23.607187271118164, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 69.64450397263053, |
|
"learning_rate": 4.3447293447293444e-07, |
|
"logits/generated": -2.3948874473571777, |
|
"logits/oppo_generated": -2.8508265018463135, |
|
"logits/oppo_real": -2.9677348136901855, |
|
"logits/real": -2.3284974098205566, |
|
"logps/generated": -211.0993194580078, |
|
"logps/oppo_gen": -88.43344116210938, |
|
"logps/oppo_real": -438.55322265625, |
|
"logps/real": -395.72943115234375, |
|
"loss": -3.1273, |
|
"loss/gen": 0.3554914891719818, |
|
"loss/real": -4.292209625244141, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -122.66587829589844, |
|
"rewards/margins": 165.48968505859375, |
|
"rewards/real": 42.82379913330078, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 287.0907957423923, |
|
"learning_rate": 4.341168091168091e-07, |
|
"logits/generated": -2.3292388916015625, |
|
"logits/oppo_generated": -2.816070079803467, |
|
"logits/oppo_real": -3.012850761413574, |
|
"logits/real": -2.2773959636688232, |
|
"logps/generated": -189.18850708007812, |
|
"logps/oppo_gen": -55.2912483215332, |
|
"logps/oppo_real": -255.20977783203125, |
|
"logps/real": -236.0473175048828, |
|
"loss": -3.7901, |
|
"loss/gen": 0.27096259593963623, |
|
"loss/real": -3.297393321990967, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -133.89724731445312, |
|
"rewards/margins": 153.05970764160156, |
|
"rewards/real": 19.162452697753906, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 68.14058737960438, |
|
"learning_rate": 4.3376068376068374e-07, |
|
"logits/generated": -2.3537933826446533, |
|
"logits/oppo_generated": -2.701869487762451, |
|
"logits/oppo_real": -2.963564872741699, |
|
"logits/real": -2.1307592391967773, |
|
"logps/generated": -207.35293579101562, |
|
"logps/oppo_gen": -83.03327941894531, |
|
"logps/oppo_real": -312.4057312011719, |
|
"logps/real": -294.8611755371094, |
|
"loss": -3.0868, |
|
"loss/gen": 0.3351461589336395, |
|
"loss/real": -3.4385178089141846, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -124.31964111328125, |
|
"rewards/margins": 141.8642120361328, |
|
"rewards/real": 17.544559478759766, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 66.31466478558872, |
|
"learning_rate": 4.3340455840455836e-07, |
|
"logits/generated": -2.291761636734009, |
|
"logits/oppo_generated": -2.8546152114868164, |
|
"logits/oppo_real": -3.036848545074463, |
|
"logits/real": -2.2752645015716553, |
|
"logps/generated": -213.76364135742188, |
|
"logps/oppo_gen": -75.19477844238281, |
|
"logps/oppo_real": -314.191162109375, |
|
"logps/real": -290.7506103515625, |
|
"loss": -3.1633, |
|
"loss/gen": 0.2661153972148895, |
|
"loss/real": -3.5240395069122314, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -138.56887817382812, |
|
"rewards/margins": 162.0093994140625, |
|
"rewards/real": 23.440532684326172, |
|
"step": 239 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1434, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|