Upload trainer_state.json with huggingface_hub
Browse files- trainer_state.json +2697 -0
trainer_state.json
ADDED
@@ -0,0 +1,2697 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9997167941093175,
|
5 |
+
"eval_steps": 100000,
|
6 |
+
"global_step": 1765,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.0005664117813650524,
|
13 |
+
"grad_norm": 8.920203173210156,
|
14 |
+
"learning_rate": 2.824858757062147e-09,
|
15 |
+
"logits/chosen": 0.09167595952749252,
|
16 |
+
"logits/rejected": 0.08131548762321472,
|
17 |
+
"logps/chosen": -324.3444519042969,
|
18 |
+
"logps/rejected": -319.4935607910156,
|
19 |
+
"loss": 0.6931,
|
20 |
+
"rewards/accuracies": 0.0,
|
21 |
+
"rewards/chosen": 0.0,
|
22 |
+
"rewards/margins": 0.0,
|
23 |
+
"rewards/rejected": 0.0,
|
24 |
+
"step": 1
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.005664117813650524,
|
28 |
+
"grad_norm": 9.052636069881236,
|
29 |
+
"learning_rate": 2.8248587570621467e-08,
|
30 |
+
"logits/chosen": -0.11684032529592514,
|
31 |
+
"logits/rejected": -0.11305296421051025,
|
32 |
+
"logps/chosen": -549.3125,
|
33 |
+
"logps/rejected": -521.6323852539062,
|
34 |
+
"loss": 0.6931,
|
35 |
+
"rewards/accuracies": 0.4722222089767456,
|
36 |
+
"rewards/chosen": 0.00030700574279762805,
|
37 |
+
"rewards/margins": 0.00035680277505889535,
|
38 |
+
"rewards/rejected": -4.9797094106907025e-05,
|
39 |
+
"step": 10
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 0.011328235627301049,
|
43 |
+
"grad_norm": 9.28894369536706,
|
44 |
+
"learning_rate": 5.6497175141242935e-08,
|
45 |
+
"logits/chosen": 0.052977461367845535,
|
46 |
+
"logits/rejected": 0.03228786215186119,
|
47 |
+
"logps/chosen": -330.4373779296875,
|
48 |
+
"logps/rejected": -324.8442077636719,
|
49 |
+
"loss": 0.6934,
|
50 |
+
"rewards/accuracies": 0.44999998807907104,
|
51 |
+
"rewards/chosen": -0.0014236660208553076,
|
52 |
+
"rewards/margins": -0.0023983772844076157,
|
53 |
+
"rewards/rejected": 0.000974711321759969,
|
54 |
+
"step": 20
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 0.016992353440951572,
|
58 |
+
"grad_norm": 8.995558692774207,
|
59 |
+
"learning_rate": 8.47457627118644e-08,
|
60 |
+
"logits/chosen": -0.1682974398136139,
|
61 |
+
"logits/rejected": -0.17686393857002258,
|
62 |
+
"logps/chosen": -577.6813354492188,
|
63 |
+
"logps/rejected": -540.6315307617188,
|
64 |
+
"loss": 0.6933,
|
65 |
+
"rewards/accuracies": 0.4000000059604645,
|
66 |
+
"rewards/chosen": 0.00039245429798029363,
|
67 |
+
"rewards/margins": -0.0011196346022188663,
|
68 |
+
"rewards/rejected": 0.0015120886964723468,
|
69 |
+
"step": 30
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 0.022656471254602097,
|
73 |
+
"grad_norm": 7.350404748510338,
|
74 |
+
"learning_rate": 1.1299435028248587e-07,
|
75 |
+
"logits/chosen": -0.004499862901866436,
|
76 |
+
"logits/rejected": 0.0006781384581699967,
|
77 |
+
"logps/chosen": -402.2951354980469,
|
78 |
+
"logps/rejected": -402.3536071777344,
|
79 |
+
"loss": 0.693,
|
80 |
+
"rewards/accuracies": 0.42500001192092896,
|
81 |
+
"rewards/chosen": 0.001362536335363984,
|
82 |
+
"rewards/margins": 2.0598527044057846e-05,
|
83 |
+
"rewards/rejected": 0.0013419378083199263,
|
84 |
+
"step": 40
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"epoch": 0.02832058906825262,
|
88 |
+
"grad_norm": 9.877657993004474,
|
89 |
+
"learning_rate": 1.4124293785310734e-07,
|
90 |
+
"logits/chosen": -0.17724382877349854,
|
91 |
+
"logits/rejected": -0.14357277750968933,
|
92 |
+
"logps/chosen": -485.1160583496094,
|
93 |
+
"logps/rejected": -462.67364501953125,
|
94 |
+
"loss": 0.6925,
|
95 |
+
"rewards/accuracies": 0.42500001192092896,
|
96 |
+
"rewards/chosen": 0.004250611178576946,
|
97 |
+
"rewards/margins": -0.0006281146197579801,
|
98 |
+
"rewards/rejected": 0.0048787253908813,
|
99 |
+
"step": 50
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 0.033984706881903144,
|
103 |
+
"grad_norm": 9.281132819647825,
|
104 |
+
"learning_rate": 1.694915254237288e-07,
|
105 |
+
"logits/chosen": -0.042291849851608276,
|
106 |
+
"logits/rejected": -0.05441279336810112,
|
107 |
+
"logps/chosen": -504.1480407714844,
|
108 |
+
"logps/rejected": -505.86505126953125,
|
109 |
+
"loss": 0.6922,
|
110 |
+
"rewards/accuracies": 0.5249999761581421,
|
111 |
+
"rewards/chosen": 0.011762259528040886,
|
112 |
+
"rewards/margins": 0.00283249793574214,
|
113 |
+
"rewards/rejected": 0.008929761126637459,
|
114 |
+
"step": 60
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.03964882469555367,
|
118 |
+
"grad_norm": 9.053471169113529,
|
119 |
+
"learning_rate": 1.9774011299435027e-07,
|
120 |
+
"logits/chosen": -0.13191935420036316,
|
121 |
+
"logits/rejected": -0.09768908470869064,
|
122 |
+
"logps/chosen": -506.79461669921875,
|
123 |
+
"logps/rejected": -472.0877380371094,
|
124 |
+
"loss": 0.6919,
|
125 |
+
"rewards/accuracies": 0.5249999761581421,
|
126 |
+
"rewards/chosen": 0.019224129617214203,
|
127 |
+
"rewards/margins": 0.0018368273740634322,
|
128 |
+
"rewards/rejected": 0.017387302592396736,
|
129 |
+
"step": 70
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"epoch": 0.045312942509204195,
|
133 |
+
"grad_norm": 8.967160357325453,
|
134 |
+
"learning_rate": 2.2598870056497174e-07,
|
135 |
+
"logits/chosen": -0.011566092260181904,
|
136 |
+
"logits/rejected": -0.0008640438318252563,
|
137 |
+
"logps/chosen": -416.4815979003906,
|
138 |
+
"logps/rejected": -403.4819030761719,
|
139 |
+
"loss": 0.693,
|
140 |
+
"rewards/accuracies": 0.42500001192092896,
|
141 |
+
"rewards/chosen": 0.02151104062795639,
|
142 |
+
"rewards/margins": -0.0011001474922522902,
|
143 |
+
"rewards/rejected": 0.022611189633607864,
|
144 |
+
"step": 80
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"epoch": 0.05097706032285471,
|
148 |
+
"grad_norm": 11.210156815835907,
|
149 |
+
"learning_rate": 2.542372881355932e-07,
|
150 |
+
"logits/chosen": -0.09484784305095673,
|
151 |
+
"logits/rejected": -0.10812617838382721,
|
152 |
+
"logps/chosen": -490.9479064941406,
|
153 |
+
"logps/rejected": -515.5724487304688,
|
154 |
+
"loss": 0.6904,
|
155 |
+
"rewards/accuracies": 0.5,
|
156 |
+
"rewards/chosen": 0.03245864063501358,
|
157 |
+
"rewards/margins": -0.0015371677000075579,
|
158 |
+
"rewards/rejected": 0.03399580717086792,
|
159 |
+
"step": 90
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"epoch": 0.05664117813650524,
|
163 |
+
"grad_norm": 8.92543373085589,
|
164 |
+
"learning_rate": 2.824858757062147e-07,
|
165 |
+
"logits/chosen": -0.03574278578162193,
|
166 |
+
"logits/rejected": -0.04354934021830559,
|
167 |
+
"logps/chosen": -447.6690979003906,
|
168 |
+
"logps/rejected": -444.5801696777344,
|
169 |
+
"loss": 0.6912,
|
170 |
+
"rewards/accuracies": 0.550000011920929,
|
171 |
+
"rewards/chosen": 0.05052490904927254,
|
172 |
+
"rewards/margins": 0.0031251353211700916,
|
173 |
+
"rewards/rejected": 0.04739977791905403,
|
174 |
+
"step": 100
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 0.06230529595015576,
|
178 |
+
"grad_norm": 8.246981680201595,
|
179 |
+
"learning_rate": 3.1073446327683617e-07,
|
180 |
+
"logits/chosen": 0.07561548054218292,
|
181 |
+
"logits/rejected": 0.05347290635108948,
|
182 |
+
"logps/chosen": -462.77081298828125,
|
183 |
+
"logps/rejected": -458.2320251464844,
|
184 |
+
"loss": 0.6903,
|
185 |
+
"rewards/accuracies": 0.625,
|
186 |
+
"rewards/chosen": 0.07939761877059937,
|
187 |
+
"rewards/margins": -0.00035358889726921916,
|
188 |
+
"rewards/rejected": 0.07975120842456818,
|
189 |
+
"step": 110
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 0.06796941376380629,
|
193 |
+
"grad_norm": 7.813418682487144,
|
194 |
+
"learning_rate": 3.389830508474576e-07,
|
195 |
+
"logits/chosen": 0.0744490697979927,
|
196 |
+
"logits/rejected": 0.06200702115893364,
|
197 |
+
"logps/chosen": -463.47918701171875,
|
198 |
+
"logps/rejected": -434.5580139160156,
|
199 |
+
"loss": 0.6885,
|
200 |
+
"rewards/accuracies": 0.625,
|
201 |
+
"rewards/chosen": 0.10037367045879364,
|
202 |
+
"rewards/margins": 0.022612569853663445,
|
203 |
+
"rewards/rejected": 0.07776109874248505,
|
204 |
+
"step": 120
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"epoch": 0.0736335315774568,
|
208 |
+
"grad_norm": 8.352060171431797,
|
209 |
+
"learning_rate": 3.672316384180791e-07,
|
210 |
+
"logits/chosen": 0.09073454886674881,
|
211 |
+
"logits/rejected": 0.05861488729715347,
|
212 |
+
"logps/chosen": -421.78839111328125,
|
213 |
+
"logps/rejected": -408.28045654296875,
|
214 |
+
"loss": 0.6921,
|
215 |
+
"rewards/accuracies": 0.574999988079071,
|
216 |
+
"rewards/chosen": 0.10209091752767563,
|
217 |
+
"rewards/margins": 0.013570049777626991,
|
218 |
+
"rewards/rejected": 0.08852086216211319,
|
219 |
+
"step": 130
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.07929764939110734,
|
223 |
+
"grad_norm": 7.350219776962672,
|
224 |
+
"learning_rate": 3.9548022598870054e-07,
|
225 |
+
"logits/chosen": 0.0739302709698677,
|
226 |
+
"logits/rejected": 0.08551234006881714,
|
227 |
+
"logps/chosen": -451.78851318359375,
|
228 |
+
"logps/rejected": -444.747802734375,
|
229 |
+
"loss": 0.685,
|
230 |
+
"rewards/accuracies": 0.675000011920929,
|
231 |
+
"rewards/chosen": 0.13255102932453156,
|
232 |
+
"rewards/margins": 0.01697494462132454,
|
233 |
+
"rewards/rejected": 0.11557607352733612,
|
234 |
+
"step": 140
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"epoch": 0.08496176720475786,
|
238 |
+
"grad_norm": 7.368496064412902,
|
239 |
+
"learning_rate": 4.23728813559322e-07,
|
240 |
+
"logits/chosen": 0.01890203356742859,
|
241 |
+
"logits/rejected": 0.05110060051083565,
|
242 |
+
"logps/chosen": -414.914306640625,
|
243 |
+
"logps/rejected": -387.0083312988281,
|
244 |
+
"loss": 0.6864,
|
245 |
+
"rewards/accuracies": 0.5249999761581421,
|
246 |
+
"rewards/chosen": 0.13907325267791748,
|
247 |
+
"rewards/margins": 0.002160780131816864,
|
248 |
+
"rewards/rejected": 0.1369124799966812,
|
249 |
+
"step": 150
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"epoch": 0.09062588501840839,
|
253 |
+
"grad_norm": 7.548722136067434,
|
254 |
+
"learning_rate": 4.519774011299435e-07,
|
255 |
+
"logits/chosen": -0.04512980952858925,
|
256 |
+
"logits/rejected": -0.008629368618130684,
|
257 |
+
"logps/chosen": -533.8591918945312,
|
258 |
+
"logps/rejected": -540.7676391601562,
|
259 |
+
"loss": 0.6834,
|
260 |
+
"rewards/accuracies": 0.574999988079071,
|
261 |
+
"rewards/chosen": 0.18351757526397705,
|
262 |
+
"rewards/margins": 0.02642633020877838,
|
263 |
+
"rewards/rejected": 0.15709123015403748,
|
264 |
+
"step": 160
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"epoch": 0.09629000283205891,
|
268 |
+
"grad_norm": 7.70075563520064,
|
269 |
+
"learning_rate": 4.80225988700565e-07,
|
270 |
+
"logits/chosen": 0.03039904311299324,
|
271 |
+
"logits/rejected": 0.04146042466163635,
|
272 |
+
"logps/chosen": -396.4923400878906,
|
273 |
+
"logps/rejected": -347.1358642578125,
|
274 |
+
"loss": 0.6793,
|
275 |
+
"rewards/accuracies": 0.6000000238418579,
|
276 |
+
"rewards/chosen": 0.165240079164505,
|
277 |
+
"rewards/margins": 0.061240702867507935,
|
278 |
+
"rewards/rejected": 0.10399937629699707,
|
279 |
+
"step": 170
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"epoch": 0.10195412064570943,
|
283 |
+
"grad_norm": 5.761017969269167,
|
284 |
+
"learning_rate": 4.999955969867048e-07,
|
285 |
+
"logits/chosen": -0.025510674342513084,
|
286 |
+
"logits/rejected": -0.03141719102859497,
|
287 |
+
"logps/chosen": -445.02813720703125,
|
288 |
+
"logps/rejected": -441.5028381347656,
|
289 |
+
"loss": 0.6769,
|
290 |
+
"rewards/accuracies": 0.675000011920929,
|
291 |
+
"rewards/chosen": 0.2438725233078003,
|
292 |
+
"rewards/margins": 0.04211033508181572,
|
293 |
+
"rewards/rejected": 0.20176219940185547,
|
294 |
+
"step": 180
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"epoch": 0.10761823845935996,
|
298 |
+
"grad_norm": 8.147395627823983,
|
299 |
+
"learning_rate": 4.999173255092139e-07,
|
300 |
+
"logits/chosen": -0.12435302883386612,
|
301 |
+
"logits/rejected": -0.05531524866819382,
|
302 |
+
"logps/chosen": -551.278076171875,
|
303 |
+
"logps/rejected": -506.13812255859375,
|
304 |
+
"loss": 0.6769,
|
305 |
+
"rewards/accuracies": 0.574999988079071,
|
306 |
+
"rewards/chosen": 0.288453072309494,
|
307 |
+
"rewards/margins": 0.029089733958244324,
|
308 |
+
"rewards/rejected": 0.2593633830547333,
|
309 |
+
"step": 190
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"epoch": 0.11328235627301048,
|
313 |
+
"grad_norm": 6.8306403873848875,
|
314 |
+
"learning_rate": 4.997412445518907e-07,
|
315 |
+
"logits/chosen": 0.037917762994766235,
|
316 |
+
"logits/rejected": 0.05893224477767944,
|
317 |
+
"logps/chosen": -392.0666198730469,
|
318 |
+
"logps/rejected": -373.0283203125,
|
319 |
+
"loss": 0.6819,
|
320 |
+
"rewards/accuracies": 0.5249999761581421,
|
321 |
+
"rewards/chosen": 0.21145746111869812,
|
322 |
+
"rewards/margins": 0.06931839883327484,
|
323 |
+
"rewards/rejected": 0.14213906228542328,
|
324 |
+
"step": 200
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.11894647408666101,
|
328 |
+
"grad_norm": 6.633087457971285,
|
329 |
+
"learning_rate": 4.994674230270714e-07,
|
330 |
+
"logits/chosen": -0.04183816909790039,
|
331 |
+
"logits/rejected": -0.008494583889842033,
|
332 |
+
"logps/chosen": -475.88885498046875,
|
333 |
+
"logps/rejected": -419.31805419921875,
|
334 |
+
"loss": 0.6809,
|
335 |
+
"rewards/accuracies": 0.625,
|
336 |
+
"rewards/chosen": 0.2550015449523926,
|
337 |
+
"rewards/margins": 0.057939767837524414,
|
338 |
+
"rewards/rejected": 0.19706180691719055,
|
339 |
+
"step": 210
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 0.12461059190031153,
|
343 |
+
"grad_norm": 6.222645381570541,
|
344 |
+
"learning_rate": 4.990959680995591e-07,
|
345 |
+
"logits/chosen": 0.019654836505651474,
|
346 |
+
"logits/rejected": -0.009270086884498596,
|
347 |
+
"logps/chosen": -439.2235412597656,
|
348 |
+
"logps/rejected": -480.212158203125,
|
349 |
+
"loss": 0.674,
|
350 |
+
"rewards/accuracies": 0.6499999761581421,
|
351 |
+
"rewards/chosen": 0.29933345317840576,
|
352 |
+
"rewards/margins": 0.08341382443904877,
|
353 |
+
"rewards/rejected": 0.21591965854167938,
|
354 |
+
"step": 220
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 0.13027470971396204,
|
358 |
+
"grad_norm": 7.247831481359495,
|
359 |
+
"learning_rate": 4.986270251446819e-07,
|
360 |
+
"logits/chosen": -0.016120824962854385,
|
361 |
+
"logits/rejected": 0.020067866891622543,
|
362 |
+
"logps/chosen": -443.7124938964844,
|
363 |
+
"logps/rejected": -426.8396911621094,
|
364 |
+
"loss": 0.6708,
|
365 |
+
"rewards/accuracies": 0.7250000238418579,
|
366 |
+
"rewards/chosen": 0.24699635803699493,
|
367 |
+
"rewards/margins": 0.05648481845855713,
|
368 |
+
"rewards/rejected": 0.190511554479599,
|
369 |
+
"step": 230
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"epoch": 0.13593882752761258,
|
373 |
+
"grad_norm": 7.731105957944728,
|
374 |
+
"learning_rate": 4.980607776913984e-07,
|
375 |
+
"logits/chosen": -0.01023712195456028,
|
376 |
+
"logits/rejected": 0.029122397303581238,
|
377 |
+
"logps/chosen": -424.93963623046875,
|
378 |
+
"logps/rejected": -387.46197509765625,
|
379 |
+
"loss": 0.6677,
|
380 |
+
"rewards/accuracies": 0.625,
|
381 |
+
"rewards/chosen": 0.2441348135471344,
|
382 |
+
"rewards/margins": 0.052381645888090134,
|
383 |
+
"rewards/rejected": 0.19175319373607635,
|
384 |
+
"step": 240
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"epoch": 0.1416029453412631,
|
388 |
+
"grad_norm": 7.76875586139277,
|
389 |
+
"learning_rate": 4.973974473504705e-07,
|
390 |
+
"logits/chosen": -0.1571817398071289,
|
391 |
+
"logits/rejected": -0.13499276340007782,
|
392 |
+
"logps/chosen": -498.03692626953125,
|
393 |
+
"logps/rejected": -460.9757385253906,
|
394 |
+
"loss": 0.6908,
|
395 |
+
"rewards/accuracies": 0.550000011920929,
|
396 |
+
"rewards/chosen": 0.2809702754020691,
|
397 |
+
"rewards/margins": 0.04384630173444748,
|
398 |
+
"rewards/rejected": 0.23712392151355743,
|
399 |
+
"step": 250
|
400 |
+
},
|
401 |
+
{
|
402 |
+
"epoch": 0.1472670631549136,
|
403 |
+
"grad_norm": 7.046439253317865,
|
404 |
+
"learning_rate": 4.966372937277314e-07,
|
405 |
+
"logits/chosen": 0.008683884516358376,
|
406 |
+
"logits/rejected": 0.03034578636288643,
|
407 |
+
"logps/chosen": -427.47283935546875,
|
408 |
+
"logps/rejected": -423.66033935546875,
|
409 |
+
"loss": 0.6672,
|
410 |
+
"rewards/accuracies": 0.6000000238418579,
|
411 |
+
"rewards/chosen": 0.23050042986869812,
|
412 |
+
"rewards/margins": 0.05516275018453598,
|
413 |
+
"rewards/rejected": 0.17533767223358154,
|
414 |
+
"step": 260
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"epoch": 0.15293118096856415,
|
418 |
+
"grad_norm": 8.273099722403256,
|
419 |
+
"learning_rate": 4.957806143224855e-07,
|
420 |
+
"logits/chosen": -0.02415129914879799,
|
421 |
+
"logits/rejected": -0.027190949767827988,
|
422 |
+
"logps/chosen": -503.2647399902344,
|
423 |
+
"logps/rejected": -487.62213134765625,
|
424 |
+
"loss": 0.6875,
|
425 |
+
"rewards/accuracies": 0.6499999761581421,
|
426 |
+
"rewards/chosen": 0.2844032049179077,
|
427 |
+
"rewards/margins": 0.020670583471655846,
|
428 |
+
"rewards/rejected": 0.2637326121330261,
|
429 |
+
"step": 270
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"epoch": 0.15859529878221468,
|
433 |
+
"grad_norm": 6.632746838428199,
|
434 |
+
"learning_rate": 4.94827744411076e-07,
|
435 |
+
"logits/chosen": -0.049058981239795685,
|
436 |
+
"logits/rejected": -0.08267603814601898,
|
437 |
+
"logps/chosen": -398.5274658203125,
|
438 |
+
"logps/rejected": -403.6279296875,
|
439 |
+
"loss": 0.6657,
|
440 |
+
"rewards/accuracies": 0.625,
|
441 |
+
"rewards/chosen": 0.21822825074195862,
|
442 |
+
"rewards/margins": 0.038919974118471146,
|
443 |
+
"rewards/rejected": 0.17930825054645538,
|
444 |
+
"step": 280
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"epoch": 0.16425941659586518,
|
448 |
+
"grad_norm": 7.218507745358829,
|
449 |
+
"learning_rate": 4.937790569156689e-07,
|
450 |
+
"logits/chosen": 0.029155444353818893,
|
451 |
+
"logits/rejected": 0.05234605073928833,
|
452 |
+
"logps/chosen": -451.5755920410156,
|
453 |
+
"logps/rejected": -424.02484130859375,
|
454 |
+
"loss": 0.6792,
|
455 |
+
"rewards/accuracies": 0.6000000238418579,
|
456 |
+
"rewards/chosen": 0.23795238137245178,
|
457 |
+
"rewards/margins": 0.07340480387210846,
|
458 |
+
"rewards/rejected": 0.16454759240150452,
|
459 |
+
"step": 290
|
460 |
+
},
|
461 |
+
{
|
462 |
+
"epoch": 0.16992353440951571,
|
463 |
+
"grad_norm": 7.6926913086077935,
|
464 |
+
"learning_rate": 4.926349622583038e-07,
|
465 |
+
"logits/chosen": -0.05263520032167435,
|
466 |
+
"logits/rejected": -0.0020814030431210995,
|
467 |
+
"logps/chosen": -406.317626953125,
|
468 |
+
"logps/rejected": -372.07769775390625,
|
469 |
+
"loss": 0.6738,
|
470 |
+
"rewards/accuracies": 0.6000000238418579,
|
471 |
+
"rewards/chosen": 0.19559504091739655,
|
472 |
+
"rewards/margins": 0.06914862990379333,
|
473 |
+
"rewards/rejected": 0.1264464110136032,
|
474 |
+
"step": 300
|
475 |
+
},
|
476 |
+
{
|
477 |
+
"epoch": 0.17558765222316625,
|
478 |
+
"grad_norm": 6.5222551125823935,
|
479 |
+
"learning_rate": 4.913959082002677e-07,
|
480 |
+
"logits/chosen": -0.0497591607272625,
|
481 |
+
"logits/rejected": 0.0054474459029734135,
|
482 |
+
"logps/chosen": -370.96685791015625,
|
483 |
+
"logps/rejected": -358.66912841796875,
|
484 |
+
"loss": 0.6678,
|
485 |
+
"rewards/accuracies": 0.550000011920929,
|
486 |
+
"rewards/chosen": 0.2062123566865921,
|
487 |
+
"rewards/margins": 0.04517778381705284,
|
488 |
+
"rewards/rejected": 0.16103455424308777,
|
489 |
+
"step": 310
|
490 |
+
},
|
491 |
+
{
|
492 |
+
"epoch": 0.18125177003681678,
|
493 |
+
"grad_norm": 6.40916314482675,
|
494 |
+
"learning_rate": 4.900623796668559e-07,
|
495 |
+
"logits/chosen": -0.12707683444023132,
|
496 |
+
"logits/rejected": -0.13572077453136444,
|
497 |
+
"logps/chosen": -479.86346435546875,
|
498 |
+
"logps/rejected": -423.7693786621094,
|
499 |
+
"loss": 0.6626,
|
500 |
+
"rewards/accuracies": 0.675000011920929,
|
501 |
+
"rewards/chosen": 0.21128025650978088,
|
502 |
+
"rewards/margins": 0.09720635414123535,
|
503 |
+
"rewards/rejected": 0.11407390981912613,
|
504 |
+
"step": 320
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"epoch": 0.18691588785046728,
|
508 |
+
"grad_norm": 6.645268185687199,
|
509 |
+
"learning_rate": 4.886348985575884e-07,
|
510 |
+
"logits/chosen": -0.10542762279510498,
|
511 |
+
"logits/rejected": -0.12311786413192749,
|
512 |
+
"logps/chosen": -457.22589111328125,
|
513 |
+
"logps/rejected": -476.0166015625,
|
514 |
+
"loss": 0.6589,
|
515 |
+
"rewards/accuracies": 0.6499999761581421,
|
516 |
+
"rewards/chosen": 0.19297997653484344,
|
517 |
+
"rewards/margins": 0.037894029170274734,
|
518 |
+
"rewards/rejected": 0.15508592128753662,
|
519 |
+
"step": 330
|
520 |
+
},
|
521 |
+
{
|
522 |
+
"epoch": 0.19258000566411781,
|
523 |
+
"grad_norm": 8.79307170795037,
|
524 |
+
"learning_rate": 4.871140235419551e-07,
|
525 |
+
"logits/chosen": 0.011003658175468445,
|
526 |
+
"logits/rejected": -0.03487353399395943,
|
527 |
+
"logps/chosen": -303.2231750488281,
|
528 |
+
"logps/rejected": -321.04071044921875,
|
529 |
+
"loss": 0.6687,
|
530 |
+
"rewards/accuracies": 0.6499999761581421,
|
531 |
+
"rewards/chosen": 0.1460239738225937,
|
532 |
+
"rewards/margins": 0.10739554464817047,
|
533 |
+
"rewards/rejected": 0.03862842172384262,
|
534 |
+
"step": 340
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 0.19824412347776835,
|
538 |
+
"grad_norm": 6.624600376940614,
|
539 |
+
"learning_rate": 4.85500349840771e-07,
|
540 |
+
"logits/chosen": 0.048854436725378036,
|
541 |
+
"logits/rejected": 0.012037856504321098,
|
542 |
+
"logps/chosen": -453.3700256347656,
|
543 |
+
"logps/rejected": -483.99005126953125,
|
544 |
+
"loss": 0.6697,
|
545 |
+
"rewards/accuracies": 0.699999988079071,
|
546 |
+
"rewards/chosen": 0.20664839446544647,
|
547 |
+
"rewards/margins": 0.08074460178613663,
|
548 |
+
"rewards/rejected": 0.12590381503105164,
|
549 |
+
"step": 350
|
550 |
+
},
|
551 |
+
{
|
552 |
+
"epoch": 0.20390824129141885,
|
553 |
+
"grad_norm": 6.596193590739345,
|
554 |
+
"learning_rate": 4.837945089932261e-07,
|
555 |
+
"logits/chosen": 0.11997655779123306,
|
556 |
+
"logits/rejected": 0.18299253284931183,
|
557 |
+
"logps/chosen": -460.0072326660156,
|
558 |
+
"logps/rejected": -421.7837829589844,
|
559 |
+
"loss": 0.6679,
|
560 |
+
"rewards/accuracies": 0.6000000238418579,
|
561 |
+
"rewards/chosen": 0.24212124943733215,
|
562 |
+
"rewards/margins": 0.09361882507801056,
|
563 |
+
"rewards/rejected": 0.1485023945569992,
|
564 |
+
"step": 360
|
565 |
+
},
|
566 |
+
{
|
567 |
+
"epoch": 0.20957235910506938,
|
568 |
+
"grad_norm": 7.905418678010064,
|
569 |
+
"learning_rate": 4.819971686097217e-07,
|
570 |
+
"logits/chosen": 0.026029860600829124,
|
571 |
+
"logits/rejected": 0.06209796667098999,
|
572 |
+
"logps/chosen": -451.7420349121094,
|
573 |
+
"logps/rejected": -457.9381408691406,
|
574 |
+
"loss": 0.6714,
|
575 |
+
"rewards/accuracies": 0.5249999761581421,
|
576 |
+
"rewards/chosen": 0.13577082753181458,
|
577 |
+
"rewards/margins": 0.06413926184177399,
|
578 |
+
"rewards/rejected": 0.07163156569004059,
|
579 |
+
"step": 370
|
580 |
+
},
|
581 |
+
{
|
582 |
+
"epoch": 0.21523647691871992,
|
583 |
+
"grad_norm": 8.79182824488632,
|
584 |
+
"learning_rate": 4.801090321105896e-07,
|
585 |
+
"logits/chosen": -0.1300145834684372,
|
586 |
+
"logits/rejected": -0.11859152466058731,
|
587 |
+
"logps/chosen": -547.6380004882812,
|
588 |
+
"logps/rejected": -494.87689208984375,
|
589 |
+
"loss": 0.6534,
|
590 |
+
"rewards/accuracies": 0.7250000238418579,
|
591 |
+
"rewards/chosen": 0.23817840218544006,
|
592 |
+
"rewards/margins": 0.1310252845287323,
|
593 |
+
"rewards/rejected": 0.10715309530496597,
|
594 |
+
"step": 380
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"epoch": 0.22090059473237042,
|
598 |
+
"grad_norm": 7.386220719656124,
|
599 |
+
"learning_rate": 4.781308384507959e-07,
|
600 |
+
"logits/chosen": -0.09379091113805771,
|
601 |
+
"logits/rejected": -0.034805141389369965,
|
602 |
+
"logps/chosen": -480.873779296875,
|
603 |
+
"logps/rejected": -466.55517578125,
|
604 |
+
"loss": 0.6588,
|
605 |
+
"rewards/accuracies": 0.824999988079071,
|
606 |
+
"rewards/chosen": 0.2688544988632202,
|
607 |
+
"rewards/margins": 0.1596641093492508,
|
608 |
+
"rewards/rejected": 0.10919040441513062,
|
609 |
+
"step": 390
|
610 |
+
},
|
611 |
+
{
|
612 |
+
"epoch": 0.22656471254602095,
|
613 |
+
"grad_norm": 8.951091336105508,
|
614 |
+
"learning_rate": 4.760633618307386e-07,
|
615 |
+
"logits/chosen": -0.03577841818332672,
|
616 |
+
"logits/rejected": -0.03505768999457359,
|
617 |
+
"logps/chosen": -410.76617431640625,
|
618 |
+
"logps/rejected": -404.79296875,
|
619 |
+
"loss": 0.6672,
|
620 |
+
"rewards/accuracies": 0.550000011920929,
|
621 |
+
"rewards/chosen": 0.12271402776241302,
|
622 |
+
"rewards/margins": 0.09359300136566162,
|
623 |
+
"rewards/rejected": 0.02912103570997715,
|
624 |
+
"step": 400
|
625 |
+
},
|
626 |
+
{
|
627 |
+
"epoch": 0.23222883035967148,
|
628 |
+
"grad_norm": 7.867464061099172,
|
629 |
+
"learning_rate": 4.7390741139325063e-07,
|
630 |
+
"logits/chosen": -0.13187697529792786,
|
631 |
+
"logits/rejected": -0.1505255401134491,
|
632 |
+
"logps/chosen": -442.73736572265625,
|
633 |
+
"logps/rejected": -453.91497802734375,
|
634 |
+
"loss": 0.6551,
|
635 |
+
"rewards/accuracies": 0.625,
|
636 |
+
"rewards/chosen": 0.1331796944141388,
|
637 |
+
"rewards/margins": 0.108833447098732,
|
638 |
+
"rewards/rejected": 0.024346251040697098,
|
639 |
+
"step": 410
|
640 |
+
},
|
641 |
+
{
|
642 |
+
"epoch": 0.23789294817332202,
|
643 |
+
"grad_norm": 8.897905773307174,
|
644 |
+
"learning_rate": 4.7166383090692797e-07,
|
645 |
+
"logits/chosen": -0.05865805596113205,
|
646 |
+
"logits/rejected": -0.04420238733291626,
|
647 |
+
"logps/chosen": -510.46112060546875,
|
648 |
+
"logps/rejected": -491.46368408203125,
|
649 |
+
"loss": 0.6571,
|
650 |
+
"rewards/accuracies": 0.75,
|
651 |
+
"rewards/chosen": 0.13560162484645844,
|
652 |
+
"rewards/margins": 0.12260621786117554,
|
653 |
+
"rewards/rejected": 0.01299543958157301,
|
654 |
+
"step": 420
|
655 |
+
},
|
656 |
+
{
|
657 |
+
"epoch": 0.24355706598697252,
|
658 |
+
"grad_norm": 8.480862384495287,
|
659 |
+
"learning_rate": 4.693334984359059e-07,
|
660 |
+
"logits/chosen": -0.03462984040379524,
|
661 |
+
"logits/rejected": -0.004300132393836975,
|
662 |
+
"logps/chosen": -276.5245666503906,
|
663 |
+
"logps/rejected": -281.5501708984375,
|
664 |
+
"loss": 0.6735,
|
665 |
+
"rewards/accuracies": 0.625,
|
666 |
+
"rewards/chosen": 0.024655651301145554,
|
667 |
+
"rewards/margins": 0.03693979233503342,
|
668 |
+
"rewards/rejected": -0.012284127995371819,
|
669 |
+
"step": 430
|
670 |
+
},
|
671 |
+
{
|
672 |
+
"epoch": 0.24922118380062305,
|
673 |
+
"grad_norm": 8.050427726048714,
|
674 |
+
"learning_rate": 4.6691732599621365e-07,
|
675 |
+
"logits/chosen": -0.10734639316797256,
|
676 |
+
"logits/rejected": -0.11525185406208038,
|
677 |
+
"logps/chosen": -359.0762023925781,
|
678 |
+
"logps/rejected": -346.8539733886719,
|
679 |
+
"loss": 0.6723,
|
680 |
+
"rewards/accuracies": 0.625,
|
681 |
+
"rewards/chosen": 0.06632296741008759,
|
682 |
+
"rewards/margins": 0.0617077462375164,
|
683 |
+
"rewards/rejected": 0.004615230951458216,
|
684 |
+
"step": 440
|
685 |
+
},
|
686 |
+
{
|
687 |
+
"epoch": 0.2548853016142736,
|
688 |
+
"grad_norm": 7.895428149064327,
|
689 |
+
"learning_rate": 4.6441625919884083e-07,
|
690 |
+
"logits/chosen": -0.19662366807460785,
|
691 |
+
"logits/rejected": -0.15727293491363525,
|
692 |
+
"logps/chosen": -459.8854064941406,
|
693 |
+
"logps/rejected": -464.7665100097656,
|
694 |
+
"loss": 0.6619,
|
695 |
+
"rewards/accuracies": 0.550000011920929,
|
696 |
+
"rewards/chosen": 0.1074344739317894,
|
697 |
+
"rewards/margins": 0.06199081987142563,
|
698 |
+
"rewards/rejected": 0.04544364660978317,
|
699 |
+
"step": 450
|
700 |
+
},
|
701 |
+
{
|
702 |
+
"epoch": 0.2605494194279241,
|
703 |
+
"grad_norm": 9.122379390128415,
|
704 |
+
"learning_rate": 4.6183127687965634e-07,
|
705 |
+
"logits/chosen": -0.242090106010437,
|
706 |
+
"logits/rejected": -0.23871174454689026,
|
707 |
+
"logps/chosen": -543.6893920898438,
|
708 |
+
"logps/rejected": -521.69677734375,
|
709 |
+
"loss": 0.6572,
|
710 |
+
"rewards/accuracies": 0.6499999761581421,
|
711 |
+
"rewards/chosen": 0.136667862534523,
|
712 |
+
"rewards/margins": 0.09476637840270996,
|
713 |
+
"rewards/rejected": 0.04190149903297424,
|
714 |
+
"step": 460
|
715 |
+
},
|
716 |
+
{
|
717 |
+
"epoch": 0.26621353724157465,
|
718 |
+
"grad_norm": 8.854419843076306,
|
719 |
+
"learning_rate": 4.5916339071632407e-07,
|
720 |
+
"logits/chosen": -0.1867067664861679,
|
721 |
+
"logits/rejected": -0.18054267764091492,
|
722 |
+
"logps/chosen": -450.6224670410156,
|
723 |
+
"logps/rejected": -452.8955078125,
|
724 |
+
"loss": 0.6538,
|
725 |
+
"rewards/accuracies": 0.6499999761581421,
|
726 |
+
"rewards/chosen": 0.014217564836144447,
|
727 |
+
"rewards/margins": 0.020388774573802948,
|
728 |
+
"rewards/rejected": -0.0061712078750133514,
|
729 |
+
"step": 470
|
730 |
+
},
|
731 |
+
{
|
732 |
+
"epoch": 0.27187765505522515,
|
733 |
+
"grad_norm": 7.9942888923716495,
|
734 |
+
"learning_rate": 4.564136448323651e-07,
|
735 |
+
"logits/chosen": -0.18171334266662598,
|
736 |
+
"logits/rejected": -0.2345585823059082,
|
737 |
+
"logps/chosen": -452.36871337890625,
|
738 |
+
"logps/rejected": -476.38970947265625,
|
739 |
+
"loss": 0.6448,
|
740 |
+
"rewards/accuracies": 0.75,
|
741 |
+
"rewards/chosen": 0.09399638324975967,
|
742 |
+
"rewards/margins": 0.21560397744178772,
|
743 |
+
"rewards/rejected": -0.12160757929086685,
|
744 |
+
"step": 480
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"epoch": 0.27754177286887566,
|
748 |
+
"grad_norm": 7.356252784701507,
|
749 |
+
"learning_rate": 4.535831153885219e-07,
|
750 |
+
"logits/chosen": -0.13978612422943115,
|
751 |
+
"logits/rejected": -0.05468880012631416,
|
752 |
+
"logps/chosen": -514.1116943359375,
|
753 |
+
"logps/rejected": -457.27496337890625,
|
754 |
+
"loss": 0.6582,
|
755 |
+
"rewards/accuracies": 0.625,
|
756 |
+
"rewards/chosen": -0.011979525908827782,
|
757 |
+
"rewards/margins": 0.09640632569789886,
|
758 |
+
"rewards/rejected": -0.10838586091995239,
|
759 |
+
"step": 490
|
760 |
+
},
|
761 |
+
{
|
762 |
+
"epoch": 0.2832058906825262,
|
763 |
+
"grad_norm": 8.844130628791795,
|
764 |
+
"learning_rate": 4.5067291016158415e-07,
|
765 |
+
"logits/chosen": -0.19480007886886597,
|
766 |
+
"logits/rejected": -0.19345858693122864,
|
767 |
+
"logps/chosen": -500.5552673339844,
|
768 |
+
"logps/rejected": -494.234130859375,
|
769 |
+
"loss": 0.6411,
|
770 |
+
"rewards/accuracies": 0.8500000238418579,
|
771 |
+
"rewards/chosen": -0.0037498758174479008,
|
772 |
+
"rewards/margins": 0.25029653310775757,
|
773 |
+
"rewards/rejected": -0.2540464401245117,
|
774 |
+
"step": 500
|
775 |
+
},
|
776 |
+
{
|
777 |
+
"epoch": 0.2888700084961767,
|
778 |
+
"grad_norm": 8.61728399806357,
|
779 |
+
"learning_rate": 4.476841681108412e-07,
|
780 |
+
"logits/chosen": 0.1435726284980774,
|
781 |
+
"logits/rejected": 0.17776526510715485,
|
782 |
+
"logps/chosen": -373.3977355957031,
|
783 |
+
"logps/rejected": -370.46435546875,
|
784 |
+
"loss": 0.6397,
|
785 |
+
"rewards/accuracies": 0.75,
|
786 |
+
"rewards/chosen": -0.09569720178842545,
|
787 |
+
"rewards/margins": 0.2144606113433838,
|
788 |
+
"rewards/rejected": -0.31015780568122864,
|
789 |
+
"step": 510
|
790 |
+
},
|
791 |
+
{
|
792 |
+
"epoch": 0.2945341263098272,
|
793 |
+
"grad_norm": 8.533003411384605,
|
794 |
+
"learning_rate": 4.4461805893233056e-07,
|
795 |
+
"logits/chosen": -0.07490365207195282,
|
796 |
+
"logits/rejected": -0.0622992217540741,
|
797 |
+
"logps/chosen": -473.82781982421875,
|
798 |
+
"logps/rejected": -462.300537109375,
|
799 |
+
"loss": 0.6701,
|
800 |
+
"rewards/accuracies": 0.625,
|
801 |
+
"rewards/chosen": -0.19030603766441345,
|
802 |
+
"rewards/margins": 0.06755149364471436,
|
803 |
+
"rewards/rejected": -0.2578575015068054,
|
804 |
+
"step": 520
|
805 |
+
},
|
806 |
+
{
|
807 |
+
"epoch": 0.3001982441234778,
|
808 |
+
"grad_norm": 7.979882700781325,
|
809 |
+
"learning_rate": 4.414757826010569e-07,
|
810 |
+
"logits/chosen": -0.32479414343833923,
|
811 |
+
"logits/rejected": -0.295619934797287,
|
812 |
+
"logps/chosen": -658.0303955078125,
|
813 |
+
"logps/rejected": -624.2590942382812,
|
814 |
+
"loss": 0.6598,
|
815 |
+
"rewards/accuracies": 0.550000011920929,
|
816 |
+
"rewards/chosen": -0.11945202201604843,
|
817 |
+
"rewards/margins": 0.11993242800235748,
|
818 |
+
"rewards/rejected": -0.2393844574689865,
|
819 |
+
"step": 530
|
820 |
+
},
|
821 |
+
{
|
822 |
+
"epoch": 0.3058623619371283,
|
823 |
+
"grad_norm": 8.080193283084418,
|
824 |
+
"learning_rate": 4.3825856890136127e-07,
|
825 |
+
"logits/chosen": -0.06019747257232666,
|
826 |
+
"logits/rejected": -0.047830767929553986,
|
827 |
+
"logps/chosen": -387.4117736816406,
|
828 |
+
"logps/rejected": -392.5433349609375,
|
829 |
+
"loss": 0.6472,
|
830 |
+
"rewards/accuracies": 0.574999988079071,
|
831 |
+
"rewards/chosen": -0.19570650160312653,
|
832 |
+
"rewards/margins": 0.10248363018035889,
|
833 |
+
"rewards/rejected": -0.2981901466846466,
|
834 |
+
"step": 540
|
835 |
+
},
|
836 |
+
{
|
837 |
+
"epoch": 0.3115264797507788,
|
838 |
+
"grad_norm": 7.907791715789903,
|
839 |
+
"learning_rate": 4.3496767694562337e-07,
|
840 |
+
"logits/chosen": -0.16734859347343445,
|
841 |
+
"logits/rejected": -0.15815582871437073,
|
842 |
+
"logps/chosen": -517.5618896484375,
|
843 |
+
"logps/rejected": -519.4200439453125,
|
844 |
+
"loss": 0.6358,
|
845 |
+
"rewards/accuracies": 0.6499999761581421,
|
846 |
+
"rewards/chosen": -0.18276867270469666,
|
847 |
+
"rewards/margins": 0.1622096300125122,
|
848 |
+
"rewards/rejected": -0.34497830271720886,
|
849 |
+
"step": 550
|
850 |
+
},
|
851 |
+
{
|
852 |
+
"epoch": 0.31719059756442936,
|
853 |
+
"grad_norm": 12.308275450379226,
|
854 |
+
"learning_rate": 4.316043946814865e-07,
|
855 |
+
"logits/chosen": -0.1519699990749359,
|
856 |
+
"logits/rejected": -0.07414064556360245,
|
857 |
+
"logps/chosen": -554.9448852539062,
|
858 |
+
"logps/rejected": -555.5247802734375,
|
859 |
+
"loss": 0.67,
|
860 |
+
"rewards/accuracies": 0.6499999761581421,
|
861 |
+
"rewards/chosen": -0.12103432416915894,
|
862 |
+
"rewards/margins": 0.14699925482273102,
|
863 |
+
"rewards/rejected": -0.26803356409072876,
|
864 |
+
"step": 560
|
865 |
+
},
|
866 |
+
{
|
867 |
+
"epoch": 0.32285471537807986,
|
868 |
+
"grad_norm": 9.151118233428134,
|
869 |
+
"learning_rate": 4.281700383877963e-07,
|
870 |
+
"logits/chosen": -0.07585703581571579,
|
871 |
+
"logits/rejected": -0.07688557356595993,
|
872 |
+
"logps/chosen": -523.4305419921875,
|
873 |
+
"logps/rejected": -514.0325927734375,
|
874 |
+
"loss": 0.6523,
|
875 |
+
"rewards/accuracies": 0.699999988079071,
|
876 |
+
"rewards/chosen": -0.2873981297016144,
|
877 |
+
"rewards/margins": 0.24022600054740906,
|
878 |
+
"rewards/rejected": -0.5276241302490234,
|
879 |
+
"step": 570
|
880 |
+
},
|
881 |
+
{
|
882 |
+
"epoch": 0.32851883319173036,
|
883 |
+
"grad_norm": 10.100605813404021,
|
884 |
+
"learning_rate": 4.2466595215945304e-07,
|
885 |
+
"logits/chosen": -0.11299272626638412,
|
886 |
+
"logits/rejected": -0.11114968359470367,
|
887 |
+
"logps/chosen": -460.5335998535156,
|
888 |
+
"logps/rejected": -500.4085388183594,
|
889 |
+
"loss": 0.6558,
|
890 |
+
"rewards/accuracies": 0.6499999761581421,
|
891 |
+
"rewards/chosen": -0.37247395515441895,
|
892 |
+
"rewards/margins": 0.21001112461090088,
|
893 |
+
"rewards/rejected": -0.5824850797653198,
|
894 |
+
"step": 580
|
895 |
+
},
|
896 |
+
{
|
897 |
+
"epoch": 0.3341829510053809,
|
898 |
+
"grad_norm": 7.561427128198726,
|
899 |
+
"learning_rate": 4.21093507381376e-07,
|
900 |
+
"logits/chosen": -0.15097267925739288,
|
901 |
+
"logits/rejected": -0.17012974619865417,
|
902 |
+
"logps/chosen": -486.6136169433594,
|
903 |
+
"logps/rejected": -479.129150390625,
|
904 |
+
"loss": 0.6412,
|
905 |
+
"rewards/accuracies": 0.699999988079071,
|
906 |
+
"rewards/chosen": -0.2694224417209625,
|
907 |
+
"rewards/margins": 0.13754698634147644,
|
908 |
+
"rewards/rejected": -0.4069693982601166,
|
909 |
+
"step": 590
|
910 |
+
},
|
911 |
+
{
|
912 |
+
"epoch": 0.33984706881903143,
|
913 |
+
"grad_norm": 9.803340241849913,
|
914 |
+
"learning_rate": 4.1745410219178846e-07,
|
915 |
+
"logits/chosen": -0.16447195410728455,
|
916 |
+
"logits/rejected": -0.1477648913860321,
|
917 |
+
"logps/chosen": -575.5458984375,
|
918 |
+
"logps/rejected": -575.5802001953125,
|
919 |
+
"loss": 0.6354,
|
920 |
+
"rewards/accuracies": 0.699999988079071,
|
921 |
+
"rewards/chosen": -0.302044153213501,
|
922 |
+
"rewards/margins": 0.1591712236404419,
|
923 |
+
"rewards/rejected": -0.4612153470516205,
|
924 |
+
"step": 600
|
925 |
+
},
|
926 |
+
{
|
927 |
+
"epoch": 0.34551118663268193,
|
928 |
+
"grad_norm": 8.394709669140541,
|
929 |
+
"learning_rate": 4.137491609350322e-07,
|
930 |
+
"logits/chosen": -0.2764771282672882,
|
931 |
+
"logits/rejected": -0.25419965386390686,
|
932 |
+
"logps/chosen": -449.9400329589844,
|
933 |
+
"logps/rejected": -436.87664794921875,
|
934 |
+
"loss": 0.6401,
|
935 |
+
"rewards/accuracies": 0.550000011920929,
|
936 |
+
"rewards/chosen": -0.33376139402389526,
|
937 |
+
"rewards/margins": 0.040792159736156464,
|
938 |
+
"rewards/rejected": -0.3745535612106323,
|
939 |
+
"step": 610
|
940 |
+
},
|
941 |
+
{
|
942 |
+
"epoch": 0.3511753044463325,
|
943 |
+
"grad_norm": 10.478475585898996,
|
944 |
+
"learning_rate": 4.099801336041255e-07,
|
945 |
+
"logits/chosen": -0.09448835998773575,
|
946 |
+
"logits/rejected": -0.1009138971567154,
|
947 |
+
"logps/chosen": -545.4580078125,
|
948 |
+
"logps/rejected": -574.4044799804688,
|
949 |
+
"loss": 0.6436,
|
950 |
+
"rewards/accuracies": 0.675000011920929,
|
951 |
+
"rewards/chosen": -0.4826622009277344,
|
952 |
+
"rewards/margins": 0.22007235884666443,
|
953 |
+
"rewards/rejected": -0.7027345895767212,
|
954 |
+
"step": 620
|
955 |
+
},
|
956 |
+
{
|
957 |
+
"epoch": 0.356839422259983,
|
958 |
+
"grad_norm": 9.229820112984584,
|
959 |
+
"learning_rate": 4.0614849527328334e-07,
|
960 |
+
"logits/chosen": -0.1743849813938141,
|
961 |
+
"logits/rejected": -0.11722008883953094,
|
962 |
+
"logps/chosen": -528.2041625976562,
|
963 |
+
"logps/rejected": -509.2850646972656,
|
964 |
+
"loss": 0.6289,
|
965 |
+
"rewards/accuracies": 0.550000011920929,
|
966 |
+
"rewards/chosen": -0.43195396661758423,
|
967 |
+
"rewards/margins": 0.06749050319194794,
|
968 |
+
"rewards/rejected": -0.4994444251060486,
|
969 |
+
"step": 630
|
970 |
+
},
|
971 |
+
{
|
972 |
+
"epoch": 0.36250354007363356,
|
973 |
+
"grad_norm": 14.872985983217255,
|
974 |
+
"learning_rate": 4.022557455206211e-07,
|
975 |
+
"logits/chosen": -0.1340872049331665,
|
976 |
+
"logits/rejected": -0.18022653460502625,
|
977 |
+
"logps/chosen": -468.09320068359375,
|
978 |
+
"logps/rejected": -506.20281982421875,
|
979 |
+
"loss": 0.6604,
|
980 |
+
"rewards/accuracies": 0.699999988079071,
|
981 |
+
"rewards/chosen": -0.4429725110530853,
|
982 |
+
"rewards/margins": 0.22569486498832703,
|
983 |
+
"rewards/rejected": -0.6686673760414124,
|
984 |
+
"step": 640
|
985 |
+
},
|
986 |
+
{
|
987 |
+
"epoch": 0.36816765788728406,
|
988 |
+
"grad_norm": 9.563384752062458,
|
989 |
+
"learning_rate": 3.9830340784126935e-07,
|
990 |
+
"logits/chosen": 0.0594959631562233,
|
991 |
+
"logits/rejected": 0.06747709214687347,
|
992 |
+
"logps/chosen": -519.6448974609375,
|
993 |
+
"logps/rejected": -527.3829345703125,
|
994 |
+
"loss": 0.6339,
|
995 |
+
"rewards/accuracies": 0.75,
|
996 |
+
"rewards/chosen": -0.602764368057251,
|
997 |
+
"rewards/margins": 0.2741518020629883,
|
998 |
+
"rewards/rejected": -0.8769161105155945,
|
999 |
+
"step": 650
|
1000 |
+
},
|
1001 |
+
{
|
1002 |
+
"epoch": 0.37383177570093457,
|
1003 |
+
"grad_norm": 9.321968230497752,
|
1004 |
+
"learning_rate": 3.942930290511272e-07,
|
1005 |
+
"logits/chosen": -0.1886819303035736,
|
1006 |
+
"logits/rejected": -0.15237857401371002,
|
1007 |
+
"logps/chosen": -434.3822326660156,
|
1008 |
+
"logps/rejected": -435.7581481933594,
|
1009 |
+
"loss": 0.6297,
|
1010 |
+
"rewards/accuracies": 0.75,
|
1011 |
+
"rewards/chosen": -0.34810423851013184,
|
1012 |
+
"rewards/margins": 0.15688088536262512,
|
1013 |
+
"rewards/rejected": -0.5049852132797241,
|
1014 |
+
"step": 660
|
1015 |
+
},
|
1016 |
+
{
|
1017 |
+
"epoch": 0.3794958935145851,
|
1018 |
+
"grad_norm": 10.82596879655571,
|
1019 |
+
"learning_rate": 3.902261786814889e-07,
|
1020 |
+
"logits/chosen": 0.005353009793907404,
|
1021 |
+
"logits/rejected": 0.0019110903376713395,
|
1022 |
+
"logps/chosen": -452.5858459472656,
|
1023 |
+
"logps/rejected": -470.3968811035156,
|
1024 |
+
"loss": 0.656,
|
1025 |
+
"rewards/accuracies": 0.699999988079071,
|
1026 |
+
"rewards/chosen": -0.4791645109653473,
|
1027 |
+
"rewards/margins": 0.18721643090248108,
|
1028 |
+
"rewards/rejected": -0.6663809418678284,
|
1029 |
+
"step": 670
|
1030 |
+
},
|
1031 |
+
{
|
1032 |
+
"epoch": 0.38516001132823563,
|
1033 |
+
"grad_norm": 11.466215492392923,
|
1034 |
+
"learning_rate": 3.8610444836478097e-07,
|
1035 |
+
"logits/chosen": -0.06002754718065262,
|
1036 |
+
"logits/rejected": -0.024013454094529152,
|
1037 |
+
"logps/chosen": -430.5911560058594,
|
1038 |
+
"logps/rejected": -400.8475646972656,
|
1039 |
+
"loss": 0.6485,
|
1040 |
+
"rewards/accuracies": 0.7250000238418579,
|
1041 |
+
"rewards/chosen": -0.42653408646583557,
|
1042 |
+
"rewards/margins": 0.1883515566587448,
|
1043 |
+
"rewards/rejected": -0.6148856282234192,
|
1044 |
+
"step": 680
|
1045 |
+
},
|
1046 |
+
{
|
1047 |
+
"epoch": 0.39082412914188613,
|
1048 |
+
"grad_norm": 9.942937076630836,
|
1049 |
+
"learning_rate": 3.8192945121164886e-07,
|
1050 |
+
"logits/chosen": -0.12901607155799866,
|
1051 |
+
"logits/rejected": -0.12658382952213287,
|
1052 |
+
"logps/chosen": -471.71307373046875,
|
1053 |
+
"logps/rejected": -470.94598388671875,
|
1054 |
+
"loss": 0.6208,
|
1055 |
+
"rewards/accuracies": 0.675000011920929,
|
1056 |
+
"rewards/chosen": -0.38932669162750244,
|
1057 |
+
"rewards/margins": 0.18685248494148254,
|
1058 |
+
"rewards/rejected": -0.5761792063713074,
|
1059 |
+
"step": 690
|
1060 |
+
},
|
1061 |
+
{
|
1062 |
+
"epoch": 0.3964882469555367,
|
1063 |
+
"grad_norm": 10.327977471567648,
|
1064 |
+
"learning_rate": 3.777028211796386e-07,
|
1065 |
+
"logits/chosen": -0.1806946098804474,
|
1066 |
+
"logits/rejected": -0.1935458481311798,
|
1067 |
+
"logps/chosen": -635.4529418945312,
|
1068 |
+
"logps/rejected": -643.0109252929688,
|
1069 |
+
"loss": 0.656,
|
1070 |
+
"rewards/accuracies": 0.6499999761581421,
|
1071 |
+
"rewards/chosen": -0.6373857855796814,
|
1072 |
+
"rewards/margins": 0.19065091013908386,
|
1073 |
+
"rewards/rejected": -0.8280366659164429,
|
1074 |
+
"step": 700
|
1075 |
+
},
|
1076 |
+
{
|
1077 |
+
"epoch": 0.4021523647691872,
|
1078 |
+
"grad_norm": 9.863883005146906,
|
1079 |
+
"learning_rate": 3.734262124337185e-07,
|
1080 |
+
"logits/chosen": -0.036530423909425735,
|
1081 |
+
"logits/rejected": 0.05539344623684883,
|
1082 |
+
"logps/chosen": -425.04248046875,
|
1083 |
+
"logps/rejected": -418.19989013671875,
|
1084 |
+
"loss": 0.6271,
|
1085 |
+
"rewards/accuracies": 0.625,
|
1086 |
+
"rewards/chosen": -0.5510110259056091,
|
1087 |
+
"rewards/margins": 0.2016453742980957,
|
1088 |
+
"rewards/rejected": -0.7526563405990601,
|
1089 |
+
"step": 710
|
1090 |
+
},
|
1091 |
+
{
|
1092 |
+
"epoch": 0.4078164825828377,
|
1093 |
+
"grad_norm": 10.91303076089921,
|
1094 |
+
"learning_rate": 3.691012986988936e-07,
|
1095 |
+
"logits/chosen": -0.14848558604717255,
|
1096 |
+
"logits/rejected": -0.0716543048620224,
|
1097 |
+
"logps/chosen": -499.07293701171875,
|
1098 |
+
"logps/rejected": -508.61669921875,
|
1099 |
+
"loss": 0.6297,
|
1100 |
+
"rewards/accuracies": 0.6000000238418579,
|
1101 |
+
"rewards/chosen": -0.6473081707954407,
|
1102 |
+
"rewards/margins": 0.08113422244787216,
|
1103 |
+
"rewards/rejected": -0.7284424304962158,
|
1104 |
+
"step": 720
|
1105 |
+
},
|
1106 |
+
{
|
1107 |
+
"epoch": 0.41348060039648826,
|
1108 |
+
"grad_norm": 10.638718740555106,
|
1109 |
+
"learning_rate": 3.647297726051641e-07,
|
1110 |
+
"logits/chosen": -0.16046440601348877,
|
1111 |
+
"logits/rejected": -0.1986566036939621,
|
1112 |
+
"logps/chosen": -564.6318359375,
|
1113 |
+
"logps/rejected": -551.3790283203125,
|
1114 |
+
"loss": 0.6292,
|
1115 |
+
"rewards/accuracies": 0.675000011920929,
|
1116 |
+
"rewards/chosen": -0.7726220488548279,
|
1117 |
+
"rewards/margins": 0.1552344411611557,
|
1118 |
+
"rewards/rejected": -0.9278565645217896,
|
1119 |
+
"step": 730
|
1120 |
+
},
|
1121 |
+
{
|
1122 |
+
"epoch": 0.41914471821013877,
|
1123 |
+
"grad_norm": 9.82408592787301,
|
1124 |
+
"learning_rate": 3.6031334502508524e-07,
|
1125 |
+
"logits/chosen": -0.22338561713695526,
|
1126 |
+
"logits/rejected": -0.2281237542629242,
|
1127 |
+
"logps/chosen": -592.9271240234375,
|
1128 |
+
"logps/rejected": -604.35009765625,
|
1129 |
+
"loss": 0.6244,
|
1130 |
+
"rewards/accuracies": 0.574999988079071,
|
1131 |
+
"rewards/chosen": -0.7858235239982605,
|
1132 |
+
"rewards/margins": 0.24623067677021027,
|
1133 |
+
"rewards/rejected": -1.0320541858673096,
|
1134 |
+
"step": 740
|
1135 |
+
},
|
1136 |
+
{
|
1137 |
+
"epoch": 0.42480883602378927,
|
1138 |
+
"grad_norm": 11.946249923246347,
|
1139 |
+
"learning_rate": 3.558537444041879e-07,
|
1140 |
+
"logits/chosen": -0.35307231545448303,
|
1141 |
+
"logits/rejected": -0.28875821828842163,
|
1142 |
+
"logps/chosen": -557.7034912109375,
|
1143 |
+
"logps/rejected": -546.9818725585938,
|
1144 |
+
"loss": 0.6343,
|
1145 |
+
"rewards/accuracies": 0.7250000238418579,
|
1146 |
+
"rewards/chosen": -0.8830374479293823,
|
1147 |
+
"rewards/margins": 0.13727910816669464,
|
1148 |
+
"rewards/rejected": -1.020316481590271,
|
1149 |
+
"step": 750
|
1150 |
+
},
|
1151 |
+
{
|
1152 |
+
"epoch": 0.43047295383743983,
|
1153 |
+
"grad_norm": 12.375783232448724,
|
1154 |
+
"learning_rate": 3.513527160845209e-07,
|
1155 |
+
"logits/chosen": -0.17869731783866882,
|
1156 |
+
"logits/rejected": -0.2512727975845337,
|
1157 |
+
"logps/chosen": -587.8796997070312,
|
1158 |
+
"logps/rejected": -617.7659912109375,
|
1159 |
+
"loss": 0.6246,
|
1160 |
+
"rewards/accuracies": 0.675000011920929,
|
1161 |
+
"rewards/chosen": -0.8949899673461914,
|
1162 |
+
"rewards/margins": 0.2545369267463684,
|
1163 |
+
"rewards/rejected": -1.1495269536972046,
|
1164 |
+
"step": 760
|
1165 |
+
},
|
1166 |
+
{
|
1167 |
+
"epoch": 0.43613707165109034,
|
1168 |
+
"grad_norm": 9.798605023058345,
|
1169 |
+
"learning_rate": 3.4681202162158173e-07,
|
1170 |
+
"logits/chosen": -0.07858623564243317,
|
1171 |
+
"logits/rejected": -0.08539044857025146,
|
1172 |
+
"logps/chosen": -487.92791748046875,
|
1173 |
+
"logps/rejected": -499.284912109375,
|
1174 |
+
"loss": 0.6411,
|
1175 |
+
"rewards/accuracies": 0.6499999761581421,
|
1176 |
+
"rewards/chosen": -0.9030693173408508,
|
1177 |
+
"rewards/margins": 0.18724389374256134,
|
1178 |
+
"rewards/rejected": -1.090313196182251,
|
1179 |
+
"step": 770
|
1180 |
+
},
|
1181 |
+
{
|
1182 |
+
"epoch": 0.44180118946474084,
|
1183 |
+
"grad_norm": 12.27412569981436,
|
1184 |
+
"learning_rate": 3.4223343809490103e-07,
|
1185 |
+
"logits/chosen": -0.052330244332551956,
|
1186 |
+
"logits/rejected": -0.11102048307657242,
|
1187 |
+
"logps/chosen": -478.4798889160156,
|
1188 |
+
"logps/rejected": -495.06396484375,
|
1189 |
+
"loss": 0.635,
|
1190 |
+
"rewards/accuracies": 0.675000011920929,
|
1191 |
+
"rewards/chosen": -0.7351385951042175,
|
1192 |
+
"rewards/margins": 0.2151581346988678,
|
1193 |
+
"rewards/rejected": -0.9502967000007629,
|
1194 |
+
"step": 780
|
1195 |
+
},
|
1196 |
+
{
|
1197 |
+
"epoch": 0.4474653072783914,
|
1198 |
+
"grad_norm": 12.52089221779439,
|
1199 |
+
"learning_rate": 3.3761875741255155e-07,
|
1200 |
+
"logits/chosen": -0.09298163652420044,
|
1201 |
+
"logits/rejected": -0.08369234949350357,
|
1202 |
+
"logps/chosen": -604.6654663085938,
|
1203 |
+
"logps/rejected": -636.544677734375,
|
1204 |
+
"loss": 0.6409,
|
1205 |
+
"rewards/accuracies": 0.4749999940395355,
|
1206 |
+
"rewards/chosen": -1.117897868156433,
|
1207 |
+
"rewards/margins": 0.055749792605638504,
|
1208 |
+
"rewards/rejected": -1.1736476421356201,
|
1209 |
+
"step": 790
|
1210 |
+
},
|
1211 |
+
{
|
1212 |
+
"epoch": 0.4531294250920419,
|
1213 |
+
"grad_norm": 12.055368493983458,
|
1214 |
+
"learning_rate": 3.32969785609854e-07,
|
1215 |
+
"logits/chosen": -0.14993992447853088,
|
1216 |
+
"logits/rejected": -0.1439143717288971,
|
1217 |
+
"logps/chosen": -581.8445434570312,
|
1218 |
+
"logps/rejected": -543.5735473632812,
|
1219 |
+
"loss": 0.6345,
|
1220 |
+
"rewards/accuracies": 0.6499999761581421,
|
1221 |
+
"rewards/chosen": -1.0062134265899658,
|
1222 |
+
"rewards/margins": 0.28742164373397827,
|
1223 |
+
"rewards/rejected": -1.2936350107192993,
|
1224 |
+
"step": 800
|
1225 |
+
},
|
1226 |
+
{
|
1227 |
+
"epoch": 0.45879354290569246,
|
1228 |
+
"grad_norm": 12.482925557658405,
|
1229 |
+
"learning_rate": 3.2828834214255396e-07,
|
1230 |
+
"logits/chosen": -0.12979525327682495,
|
1231 |
+
"logits/rejected": -0.08011293411254883,
|
1232 |
+
"logps/chosen": -733.0359497070312,
|
1233 |
+
"logps/rejected": -733.6185302734375,
|
1234 |
+
"loss": 0.6143,
|
1235 |
+
"rewards/accuracies": 0.625,
|
1236 |
+
"rewards/chosen": -1.321715235710144,
|
1237 |
+
"rewards/margins": 0.10633653402328491,
|
1238 |
+
"rewards/rejected": -1.4280518293380737,
|
1239 |
+
"step": 810
|
1240 |
+
},
|
1241 |
+
{
|
1242 |
+
"epoch": 0.46445766071934297,
|
1243 |
+
"grad_norm": 11.273282654052506,
|
1244 |
+
"learning_rate": 3.235762591747458e-07,
|
1245 |
+
"logits/chosen": -0.16923761367797852,
|
1246 |
+
"logits/rejected": -0.161187082529068,
|
1247 |
+
"logps/chosen": -494.4414978027344,
|
1248 |
+
"logps/rejected": -488.517578125,
|
1249 |
+
"loss": 0.6139,
|
1250 |
+
"rewards/accuracies": 0.625,
|
1251 |
+
"rewards/chosen": -1.0632091760635376,
|
1252 |
+
"rewards/margins": 0.09481721371412277,
|
1253 |
+
"rewards/rejected": -1.1580263376235962,
|
1254 |
+
"step": 820
|
1255 |
+
},
|
1256 |
+
{
|
1257 |
+
"epoch": 0.4701217785329935,
|
1258 |
+
"grad_norm": 12.585012235836388,
|
1259 |
+
"learning_rate": 3.188353808618241e-07,
|
1260 |
+
"logits/chosen": -0.09800489246845245,
|
1261 |
+
"logits/rejected": -0.09487877786159515,
|
1262 |
+
"logps/chosen": -541.4532470703125,
|
1263 |
+
"logps/rejected": -552.1514892578125,
|
1264 |
+
"loss": 0.6201,
|
1265 |
+
"rewards/accuracies": 0.5249999761581421,
|
1266 |
+
"rewards/chosen": -1.165722131729126,
|
1267 |
+
"rewards/margins": 0.1106475368142128,
|
1268 |
+
"rewards/rejected": -1.2763696908950806,
|
1269 |
+
"step": 830
|
1270 |
+
},
|
1271 |
+
{
|
1272 |
+
"epoch": 0.47578589634664403,
|
1273 |
+
"grad_norm": 12.896022597342322,
|
1274 |
+
"learning_rate": 3.1406756262874097e-07,
|
1275 |
+
"logits/chosen": -0.12023751437664032,
|
1276 |
+
"logits/rejected": -0.19186559319496155,
|
1277 |
+
"logps/chosen": -506.29705810546875,
|
1278 |
+
"logps/rejected": -528.5136108398438,
|
1279 |
+
"loss": 0.6289,
|
1280 |
+
"rewards/accuracies": 0.699999988079071,
|
1281 |
+
"rewards/chosen": -1.0864802598953247,
|
1282 |
+
"rewards/margins": 0.25540152192115784,
|
1283 |
+
"rewards/rejected": -1.3418817520141602,
|
1284 |
+
"step": 840
|
1285 |
+
},
|
1286 |
+
{
|
1287 |
+
"epoch": 0.48145001416029454,
|
1288 |
+
"grad_norm": 14.37685541871016,
|
1289 |
+
"learning_rate": 3.0927467044385364e-07,
|
1290 |
+
"logits/chosen": -0.1970689296722412,
|
1291 |
+
"logits/rejected": -0.20069988071918488,
|
1292 |
+
"logps/chosen": -616.935791015625,
|
1293 |
+
"logps/rejected": -660.5993041992188,
|
1294 |
+
"loss": 0.6094,
|
1295 |
+
"rewards/accuracies": 0.574999988079071,
|
1296 |
+
"rewards/chosen": -1.5820796489715576,
|
1297 |
+
"rewards/margins": 0.2973397374153137,
|
1298 |
+
"rewards/rejected": -1.8794193267822266,
|
1299 |
+
"step": 850
|
1300 |
+
},
|
1301 |
+
{
|
1302 |
+
"epoch": 0.48711413197394504,
|
1303 |
+
"grad_norm": 13.003776590794605,
|
1304 |
+
"learning_rate": 3.044585800886452e-07,
|
1305 |
+
"logits/chosen": -0.2083606719970703,
|
1306 |
+
"logits/rejected": -0.21461403369903564,
|
1307 |
+
"logps/chosen": -601.05859375,
|
1308 |
+
"logps/rejected": -628.2822265625,
|
1309 |
+
"loss": 0.6325,
|
1310 |
+
"rewards/accuracies": 0.5249999761581421,
|
1311 |
+
"rewards/chosen": -1.4588840007781982,
|
1312 |
+
"rewards/margins": 0.1004204973578453,
|
1313 |
+
"rewards/rejected": -1.5593047142028809,
|
1314 |
+
"step": 860
|
1315 |
+
},
|
1316 |
+
{
|
1317 |
+
"epoch": 0.4927782497875956,
|
1318 |
+
"grad_norm": 15.852933713937096,
|
1319 |
+
"learning_rate": 2.996211764236051e-07,
|
1320 |
+
"logits/chosen": -0.198094442486763,
|
1321 |
+
"logits/rejected": -0.15960830450057983,
|
1322 |
+
"logps/chosen": -638.4063110351562,
|
1323 |
+
"logps/rejected": -647.378173828125,
|
1324 |
+
"loss": 0.6139,
|
1325 |
+
"rewards/accuracies": 0.675000011920929,
|
1326 |
+
"rewards/chosen": -1.6155242919921875,
|
1327 |
+
"rewards/margins": 0.23866060376167297,
|
1328 |
+
"rewards/rejected": -1.8541847467422485,
|
1329 |
+
"step": 870
|
1330 |
+
},
|
1331 |
+
{
|
1332 |
+
"epoch": 0.4984423676012461,
|
1333 |
+
"grad_norm": 12.409431772391303,
|
1334 |
+
"learning_rate": 2.947643526505562e-07,
|
1335 |
+
"logits/chosen": -0.2549227476119995,
|
1336 |
+
"logits/rejected": -0.2773335576057434,
|
1337 |
+
"logps/chosen": -583.142578125,
|
1338 |
+
"logps/rejected": -609.6130981445312,
|
1339 |
+
"loss": 0.6237,
|
1340 |
+
"rewards/accuracies": 0.625,
|
1341 |
+
"rewards/chosen": -1.3404490947723389,
|
1342 |
+
"rewards/margins": 0.16823282837867737,
|
1343 |
+
"rewards/rejected": -1.5086817741394043,
|
1344 |
+
"step": 880
|
1345 |
+
},
|
1346 |
+
{
|
1347 |
+
"epoch": 0.5041064854148967,
|
1348 |
+
"grad_norm": 11.633974731071602,
|
1349 |
+
"learning_rate": 2.8989000957171727e-07,
|
1350 |
+
"logits/chosen": -0.10305686295032501,
|
1351 |
+
"logits/rejected": -0.1576327383518219,
|
1352 |
+
"logps/chosen": -641.8757934570312,
|
1353 |
+
"logps/rejected": -665.0620727539062,
|
1354 |
+
"loss": 0.5978,
|
1355 |
+
"rewards/accuracies": 0.7250000238418579,
|
1356 |
+
"rewards/chosen": -1.3931224346160889,
|
1357 |
+
"rewards/margins": 0.32287827134132385,
|
1358 |
+
"rewards/rejected": -1.7160007953643799,
|
1359 |
+
"step": 890
|
1360 |
+
},
|
1361 |
+
{
|
1362 |
+
"epoch": 0.5097706032285472,
|
1363 |
+
"grad_norm": 16.050034972192993,
|
1364 |
+
"learning_rate": 2.850000548457917e-07,
|
1365 |
+
"logits/chosen": 0.14430885016918182,
|
1366 |
+
"logits/rejected": 0.1290302276611328,
|
1367 |
+
"logps/chosen": -486.8666076660156,
|
1368 |
+
"logps/rejected": -488.3501892089844,
|
1369 |
+
"loss": 0.6073,
|
1370 |
+
"rewards/accuracies": 0.7250000238418579,
|
1371 |
+
"rewards/chosen": -1.0954620838165283,
|
1372 |
+
"rewards/margins": 0.26246118545532227,
|
1373 |
+
"rewards/rejected": -1.3579232692718506,
|
1374 |
+
"step": 900
|
1375 |
+
},
|
1376 |
+
{
|
1377 |
+
"epoch": 0.5154347210421977,
|
1378 |
+
"grad_norm": 19.840618375098327,
|
1379 |
+
"learning_rate": 2.8009640224137114e-07,
|
1380 |
+
"logits/chosen": -0.22643284499645233,
|
1381 |
+
"logits/rejected": -0.25430920720100403,
|
1382 |
+
"logps/chosen": -601.1445922851562,
|
1383 |
+
"logps/rejected": -614.4564208984375,
|
1384 |
+
"loss": 0.6495,
|
1385 |
+
"rewards/accuracies": 0.675000011920929,
|
1386 |
+
"rewards/chosen": -1.603725790977478,
|
1387 |
+
"rewards/margins": 0.3380531966686249,
|
1388 |
+
"rewards/rejected": -1.9417788982391357,
|
1389 |
+
"step": 910
|
1390 |
+
},
|
1391 |
+
{
|
1392 |
+
"epoch": 0.5210988388558482,
|
1393 |
+
"grad_norm": 17.51073505222395,
|
1394 |
+
"learning_rate": 2.751809708879502e-07,
|
1395 |
+
"logits/chosen": -0.21161291003227234,
|
1396 |
+
"logits/rejected": -0.17318451404571533,
|
1397 |
+
"logps/chosen": -563.7437133789062,
|
1398 |
+
"logps/rejected": -568.7862548828125,
|
1399 |
+
"loss": 0.6246,
|
1400 |
+
"rewards/accuracies": 0.574999988079071,
|
1401 |
+
"rewards/chosen": -1.2936795949935913,
|
1402 |
+
"rewards/margins": 0.2570227384567261,
|
1403 |
+
"rewards/rejected": -1.550702452659607,
|
1404 |
+
"step": 920
|
1405 |
+
},
|
1406 |
+
{
|
1407 |
+
"epoch": 0.5267629566694987,
|
1408 |
+
"grad_norm": 12.714429256653643,
|
1409 |
+
"learning_rate": 2.7025568452484067e-07,
|
1410 |
+
"logits/chosen": -0.047567375004291534,
|
1411 |
+
"logits/rejected": -0.04497741162776947,
|
1412 |
+
"logps/chosen": -527.0328979492188,
|
1413 |
+
"logps/rejected": -553.7757568359375,
|
1414 |
+
"loss": 0.6113,
|
1415 |
+
"rewards/accuracies": 0.574999988079071,
|
1416 |
+
"rewards/chosen": -1.0018929243087769,
|
1417 |
+
"rewards/margins": 0.2291610985994339,
|
1418 |
+
"rewards/rejected": -1.2310539484024048,
|
1419 |
+
"step": 930
|
1420 |
+
},
|
1421 |
+
{
|
1422 |
+
"epoch": 0.5324270744831493,
|
1423 |
+
"grad_norm": 15.205234986430579,
|
1424 |
+
"learning_rate": 2.653224707482835e-07,
|
1425 |
+
"logits/chosen": -0.07980841398239136,
|
1426 |
+
"logits/rejected": -0.024791846051812172,
|
1427 |
+
"logps/chosen": -734.6407470703125,
|
1428 |
+
"logps/rejected": -734.0439453125,
|
1429 |
+
"loss": 0.6008,
|
1430 |
+
"rewards/accuracies": 0.7250000238418579,
|
1431 |
+
"rewards/chosen": -1.6005531549453735,
|
1432 |
+
"rewards/margins": 0.21221895515918732,
|
1433 |
+
"rewards/rejected": -1.8127720355987549,
|
1434 |
+
"step": 940
|
1435 |
+
},
|
1436 |
+
{
|
1437 |
+
"epoch": 0.5380911922967998,
|
1438 |
+
"grad_norm": 13.78792024492432,
|
1439 |
+
"learning_rate": 2.603832602570505e-07,
|
1440 |
+
"logits/chosen": -0.19060659408569336,
|
1441 |
+
"logits/rejected": -0.08821268379688263,
|
1442 |
+
"logps/chosen": -565.0804443359375,
|
1443 |
+
"logps/rejected": -584.7576293945312,
|
1444 |
+
"loss": 0.6236,
|
1445 |
+
"rewards/accuracies": 0.7250000238418579,
|
1446 |
+
"rewards/chosen": -1.112985372543335,
|
1447 |
+
"rewards/margins": 0.2864472568035126,
|
1448 |
+
"rewards/rejected": -1.39943265914917,
|
1449 |
+
"step": 950
|
1450 |
+
},
|
1451 |
+
{
|
1452 |
+
"epoch": 0.5437553101104503,
|
1453 |
+
"grad_norm": 14.265593088211887,
|
1454 |
+
"learning_rate": 2.554399860968316e-07,
|
1455 |
+
"logits/chosen": -0.0522037036716938,
|
1456 |
+
"logits/rejected": -0.00027574002160690725,
|
1457 |
+
"logps/chosen": -639.2886962890625,
|
1458 |
+
"logps/rejected": -651.7282104492188,
|
1459 |
+
"loss": 0.5918,
|
1460 |
+
"rewards/accuracies": 0.550000011920929,
|
1461 |
+
"rewards/chosen": -1.2711105346679688,
|
1462 |
+
"rewards/margins": 0.2477395236492157,
|
1463 |
+
"rewards/rejected": -1.5188500881195068,
|
1464 |
+
"step": 960
|
1465 |
+
},
|
1466 |
+
{
|
1467 |
+
"epoch": 0.5494194279241008,
|
1468 |
+
"grad_norm": 11.543996534625997,
|
1469 |
+
"learning_rate": 2.504945829037042e-07,
|
1470 |
+
"logits/chosen": -0.1183534637093544,
|
1471 |
+
"logits/rejected": -0.1250295788049698,
|
1472 |
+
"logps/chosen": -552.4560546875,
|
1473 |
+
"logps/rejected": -575.94775390625,
|
1474 |
+
"loss": 0.5996,
|
1475 |
+
"rewards/accuracies": 0.7250000238418579,
|
1476 |
+
"rewards/chosen": -1.3370610475540161,
|
1477 |
+
"rewards/margins": 0.22345292568206787,
|
1478 |
+
"rewards/rejected": -1.5605138540267944,
|
1479 |
+
"step": 970
|
1480 |
+
},
|
1481 |
+
{
|
1482 |
+
"epoch": 0.5550835457377513,
|
1483 |
+
"grad_norm": 14.687067318658848,
|
1484 |
+
"learning_rate": 2.4554898614697943e-07,
|
1485 |
+
"logits/chosen": -0.09519994258880615,
|
1486 |
+
"logits/rejected": 0.008313467726111412,
|
1487 |
+
"logps/chosen": -533.7589111328125,
|
1488 |
+
"logps/rejected": -561.341064453125,
|
1489 |
+
"loss": 0.609,
|
1490 |
+
"rewards/accuracies": 0.7250000238418579,
|
1491 |
+
"rewards/chosen": -1.5500489473342896,
|
1492 |
+
"rewards/margins": 0.150363028049469,
|
1493 |
+
"rewards/rejected": -1.7004121541976929,
|
1494 |
+
"step": 980
|
1495 |
+
},
|
1496 |
+
{
|
1497 |
+
"epoch": 0.5607476635514018,
|
1498 |
+
"grad_norm": 14.406590393647319,
|
1499 |
+
"learning_rate": 2.406051313717232e-07,
|
1500 |
+
"logits/chosen": -0.15122143924236298,
|
1501 |
+
"logits/rejected": -0.18891175091266632,
|
1502 |
+
"logps/chosen": -624.895263671875,
|
1503 |
+
"logps/rejected": -634.48193359375,
|
1504 |
+
"loss": 0.6132,
|
1505 |
+
"rewards/accuracies": 0.625,
|
1506 |
+
"rewards/chosen": -1.6897090673446655,
|
1507 |
+
"rewards/margins": 0.36004889011383057,
|
1508 |
+
"rewards/rejected": -2.049757957458496,
|
1509 |
+
"step": 990
|
1510 |
+
},
|
1511 |
+
{
|
1512 |
+
"epoch": 0.5664117813650524,
|
1513 |
+
"grad_norm": 15.706647427077176,
|
1514 |
+
"learning_rate": 2.3566495344124662e-07,
|
1515 |
+
"logits/chosen": -0.13275066018104553,
|
1516 |
+
"logits/rejected": -0.1235559806227684,
|
1517 |
+
"logps/chosen": -751.436279296875,
|
1518 |
+
"logps/rejected": -777.4478759765625,
|
1519 |
+
"loss": 0.6097,
|
1520 |
+
"rewards/accuracies": 0.7250000238418579,
|
1521 |
+
"rewards/chosen": -2.308816909790039,
|
1522 |
+
"rewards/margins": 0.3510530889034271,
|
1523 |
+
"rewards/rejected": -2.659869909286499,
|
1524 |
+
"step": 1000
|
1525 |
+
},
|
1526 |
+
{
|
1527 |
+
"epoch": 0.5720758991787029,
|
1528 |
+
"grad_norm": 15.828215079444101,
|
1529 |
+
"learning_rate": 2.3073038577986357e-07,
|
1530 |
+
"logits/chosen": -0.07961639761924744,
|
1531 |
+
"logits/rejected": -0.16084156930446625,
|
1532 |
+
"logps/chosen": -487.7679138183594,
|
1533 |
+
"logps/rejected": -514.5160522460938,
|
1534 |
+
"loss": 0.613,
|
1535 |
+
"rewards/accuracies": 0.550000011920929,
|
1536 |
+
"rewards/chosen": -1.5107285976409912,
|
1537 |
+
"rewards/margins": 0.1231117695569992,
|
1538 |
+
"rewards/rejected": -1.633840560913086,
|
1539 |
+
"step": 1010
|
1540 |
+
},
|
1541 |
+
{
|
1542 |
+
"epoch": 0.5777400169923534,
|
1543 |
+
"grad_norm": 16.98606079743348,
|
1544 |
+
"learning_rate": 2.2580335961621235e-07,
|
1545 |
+
"logits/chosen": -0.029454564675688744,
|
1546 |
+
"logits/rejected": -0.06869341433048248,
|
1547 |
+
"logps/chosen": -622.4491577148438,
|
1548 |
+
"logps/rejected": -640.3287353515625,
|
1549 |
+
"loss": 0.6172,
|
1550 |
+
"rewards/accuracies": 0.6499999761581421,
|
1551 |
+
"rewards/chosen": -1.6914516687393188,
|
1552 |
+
"rewards/margins": 0.5090458989143372,
|
1553 |
+
"rewards/rejected": -2.2004973888397217,
|
1554 |
+
"step": 1020
|
1555 |
+
},
|
1556 |
+
{
|
1557 |
+
"epoch": 0.583404134806004,
|
1558 |
+
"grad_norm": 17.188915741370625,
|
1559 |
+
"learning_rate": 2.20885803227435e-07,
|
1560 |
+
"logits/chosen": -0.11491873115301132,
|
1561 |
+
"logits/rejected": -0.11692114919424057,
|
1562 |
+
"logps/chosen": -667.051513671875,
|
1563 |
+
"logps/rejected": -692.4650268554688,
|
1564 |
+
"loss": 0.6128,
|
1565 |
+
"rewards/accuracies": 0.550000011920929,
|
1566 |
+
"rewards/chosen": -1.9581496715545654,
|
1567 |
+
"rewards/margins": 0.2536167800426483,
|
1568 |
+
"rewards/rejected": -2.2117667198181152,
|
1569 |
+
"step": 1030
|
1570 |
+
},
|
1571 |
+
{
|
1572 |
+
"epoch": 0.5890682526196545,
|
1573 |
+
"grad_norm": 18.64862832145123,
|
1574 |
+
"learning_rate": 2.159796411845128e-07,
|
1575 |
+
"logits/chosen": -0.2661534249782562,
|
1576 |
+
"logits/rejected": -0.26083916425704956,
|
1577 |
+
"logps/chosen": -598.6723022460938,
|
1578 |
+
"logps/rejected": -628.9426879882812,
|
1579 |
+
"loss": 0.5988,
|
1580 |
+
"rewards/accuracies": 0.75,
|
1581 |
+
"rewards/chosen": -1.5122573375701904,
|
1582 |
+
"rewards/margins": 0.391846239566803,
|
1583 |
+
"rewards/rejected": -1.9041036367416382,
|
1584 |
+
"step": 1040
|
1585 |
+
},
|
1586 |
+
{
|
1587 |
+
"epoch": 0.594732370433305,
|
1588 |
+
"grad_norm": 34.61759080017131,
|
1589 |
+
"learning_rate": 2.110867935990524e-07,
|
1590 |
+
"logits/chosen": -0.14623607695102692,
|
1591 |
+
"logits/rejected": -0.08916531503200531,
|
1592 |
+
"logps/chosen": -743.6148681640625,
|
1593 |
+
"logps/rejected": -772.8529052734375,
|
1594 |
+
"loss": 0.6145,
|
1595 |
+
"rewards/accuracies": 0.699999988079071,
|
1596 |
+
"rewards/chosen": -1.8820927143096924,
|
1597 |
+
"rewards/margins": 0.48489370942115784,
|
1598 |
+
"rewards/rejected": -2.366986036300659,
|
1599 |
+
"step": 1050
|
1600 |
+
},
|
1601 |
+
{
|
1602 |
+
"epoch": 0.6003964882469556,
|
1603 |
+
"grad_norm": 15.508947196938045,
|
1604 |
+
"learning_rate": 2.0620917537181646e-07,
|
1605 |
+
"logits/chosen": -0.10365153849124908,
|
1606 |
+
"logits/rejected": -0.0691133588552475,
|
1607 |
+
"logps/chosen": -673.80224609375,
|
1608 |
+
"logps/rejected": -674.360595703125,
|
1609 |
+
"loss": 0.6345,
|
1610 |
+
"rewards/accuracies": 0.699999988079071,
|
1611 |
+
"rewards/chosen": -1.8425861597061157,
|
1612 |
+
"rewards/margins": 0.2413545399904251,
|
1613 |
+
"rewards/rejected": -2.0839409828186035,
|
1614 |
+
"step": 1060
|
1615 |
+
},
|
1616 |
+
{
|
1617 |
+
"epoch": 0.6060606060606061,
|
1618 |
+
"grad_norm": 16.329701704182607,
|
1619 |
+
"learning_rate": 2.013486954432943e-07,
|
1620 |
+
"logits/chosen": -0.10045067965984344,
|
1621 |
+
"logits/rejected": -0.051240742206573486,
|
1622 |
+
"logps/chosen": -639.800537109375,
|
1623 |
+
"logps/rejected": -659.7952880859375,
|
1624 |
+
"loss": 0.6148,
|
1625 |
+
"rewards/accuracies": 0.7250000238418579,
|
1626 |
+
"rewards/chosen": -1.61115300655365,
|
1627 |
+
"rewards/margins": 0.4051777720451355,
|
1628 |
+
"rewards/rejected": -2.0163307189941406,
|
1629 |
+
"step": 1070
|
1630 |
+
},
|
1631 |
+
{
|
1632 |
+
"epoch": 0.6117247238742566,
|
1633 |
+
"grad_norm": 19.231855038307156,
|
1634 |
+
"learning_rate": 1.9650725604660473e-07,
|
1635 |
+
"logits/chosen": -0.10707108676433563,
|
1636 |
+
"logits/rejected": -0.11871937662363052,
|
1637 |
+
"logps/chosen": -561.498291015625,
|
1638 |
+
"logps/rejected": -613.8865966796875,
|
1639 |
+
"loss": 0.5976,
|
1640 |
+
"rewards/accuracies": 0.6000000238418579,
|
1641 |
+
"rewards/chosen": -1.4580614566802979,
|
1642 |
+
"rewards/margins": 0.1603337824344635,
|
1643 |
+
"rewards/rejected": -1.618395209312439,
|
1644 |
+
"step": 1080
|
1645 |
+
},
|
1646 |
+
{
|
1647 |
+
"epoch": 0.6173888416879071,
|
1648 |
+
"grad_norm": 14.596940988757039,
|
1649 |
+
"learning_rate": 1.9168675196302411e-07,
|
1650 |
+
"logits/chosen": -0.080962173640728,
|
1651 |
+
"logits/rejected": -0.1715216189622879,
|
1652 |
+
"logps/chosen": -638.506591796875,
|
1653 |
+
"logps/rejected": -675.8438720703125,
|
1654 |
+
"loss": 0.594,
|
1655 |
+
"rewards/accuracies": 0.7250000238418579,
|
1656 |
+
"rewards/chosen": -1.5369634628295898,
|
1657 |
+
"rewards/margins": 0.35168346762657166,
|
1658 |
+
"rewards/rejected": -1.8886468410491943,
|
1659 |
+
"step": 1090
|
1660 |
+
},
|
1661 |
+
{
|
1662 |
+
"epoch": 0.6230529595015576,
|
1663 |
+
"grad_norm": 15.698411262500759,
|
1664 |
+
"learning_rate": 1.8688906978043097e-07,
|
1665 |
+
"logits/chosen": -0.08788567781448364,
|
1666 |
+
"logits/rejected": -0.09263203293085098,
|
1667 |
+
"logps/chosen": -656.8270874023438,
|
1668 |
+
"logps/rejected": -675.0738525390625,
|
1669 |
+
"loss": 0.5837,
|
1670 |
+
"rewards/accuracies": 0.6499999761581421,
|
1671 |
+
"rewards/chosen": -1.9104950428009033,
|
1672 |
+
"rewards/margins": 0.25849801301956177,
|
1673 |
+
"rewards/rejected": -2.168992757797241,
|
1674 |
+
"step": 1100
|
1675 |
+
},
|
1676 |
+
{
|
1677 |
+
"epoch": 0.6287170773152082,
|
1678 |
+
"grad_norm": 11.248760224035648,
|
1679 |
+
"learning_rate": 1.8211608715495725e-07,
|
1680 |
+
"logits/chosen": -0.20944643020629883,
|
1681 |
+
"logits/rejected": -0.21442022919654846,
|
1682 |
+
"logps/chosen": -639.87109375,
|
1683 |
+
"logps/rejected": -680.1671142578125,
|
1684 |
+
"loss": 0.561,
|
1685 |
+
"rewards/accuracies": 0.75,
|
1686 |
+
"rewards/chosen": -1.946523666381836,
|
1687 |
+
"rewards/margins": 0.565606951713562,
|
1688 |
+
"rewards/rejected": -2.5121307373046875,
|
1689 |
+
"step": 1110
|
1690 |
+
},
|
1691 |
+
{
|
1692 |
+
"epoch": 0.6343811951288587,
|
1693 |
+
"grad_norm": 23.371346801612088,
|
1694 |
+
"learning_rate": 1.7736967207613456e-07,
|
1695 |
+
"logits/chosen": -0.1694624125957489,
|
1696 |
+
"logits/rejected": -0.20123568177223206,
|
1697 |
+
"logps/chosen": -699.4913330078125,
|
1698 |
+
"logps/rejected": -709.8638916015625,
|
1699 |
+
"loss": 0.5982,
|
1700 |
+
"rewards/accuracies": 0.675000011920929,
|
1701 |
+
"rewards/chosen": -2.20566987991333,
|
1702 |
+
"rewards/margins": 0.46030035614967346,
|
1703 |
+
"rewards/rejected": -2.6659703254699707,
|
1704 |
+
"step": 1120
|
1705 |
+
},
|
1706 |
+
{
|
1707 |
+
"epoch": 0.6400453129425092,
|
1708 |
+
"grad_norm": 14.972751738612004,
|
1709 |
+
"learning_rate": 1.7265168213582442e-07,
|
1710 |
+
"logits/chosen": -0.17189843952655792,
|
1711 |
+
"logits/rejected": -0.051060281693935394,
|
1712 |
+
"logps/chosen": -745.7001953125,
|
1713 |
+
"logps/rejected": -743.2514038085938,
|
1714 |
+
"loss": 0.5806,
|
1715 |
+
"rewards/accuracies": 0.6499999761581421,
|
1716 |
+
"rewards/chosen": -2.1485393047332764,
|
1717 |
+
"rewards/margins": 0.38365238904953003,
|
1718 |
+
"rewards/rejected": -2.532191753387451,
|
1719 |
+
"step": 1130
|
1720 |
+
},
|
1721 |
+
{
|
1722 |
+
"epoch": 0.6457094307561597,
|
1723 |
+
"grad_norm": 15.350450735594052,
|
1724 |
+
"learning_rate": 1.679639638012175e-07,
|
1725 |
+
"logits/chosen": -0.2607277035713196,
|
1726 |
+
"logits/rejected": -0.25059252977371216,
|
1727 |
+
"logps/chosen": -670.5557861328125,
|
1728 |
+
"logps/rejected": -712.263916015625,
|
1729 |
+
"loss": 0.5772,
|
1730 |
+
"rewards/accuracies": 0.7250000238418579,
|
1731 |
+
"rewards/chosen": -2.157758951187134,
|
1732 |
+
"rewards/margins": 0.6318100094795227,
|
1733 |
+
"rewards/rejected": -2.7895689010620117,
|
1734 |
+
"step": 1140
|
1735 |
+
},
|
1736 |
+
{
|
1737 |
+
"epoch": 0.6513735485698102,
|
1738 |
+
"grad_norm": 18.101523725425658,
|
1739 |
+
"learning_rate": 1.6330835169218643e-07,
|
1740 |
+
"logits/chosen": -0.17243096232414246,
|
1741 |
+
"logits/rejected": -0.12170116603374481,
|
1742 |
+
"logps/chosen": -694.1895141601562,
|
1743 |
+
"logps/rejected": -697.4880981445312,
|
1744 |
+
"loss": 0.5791,
|
1745 |
+
"rewards/accuracies": 0.7250000238418579,
|
1746 |
+
"rewards/chosen": -2.00701642036438,
|
1747 |
+
"rewards/margins": 0.3331385552883148,
|
1748 |
+
"rewards/rejected": -2.3401551246643066,
|
1749 |
+
"step": 1150
|
1750 |
+
},
|
1751 |
+
{
|
1752 |
+
"epoch": 0.6570376663834607,
|
1753 |
+
"grad_norm": 18.372994873462016,
|
1754 |
+
"learning_rate": 1.5868666786327576e-07,
|
1755 |
+
"logits/chosen": -0.21193864941596985,
|
1756 |
+
"logits/rejected": -0.2452685534954071,
|
1757 |
+
"logps/chosen": -725.9470825195312,
|
1758 |
+
"logps/rejected": -772.846923828125,
|
1759 |
+
"loss": 0.6111,
|
1760 |
+
"rewards/accuracies": 0.675000011920929,
|
1761 |
+
"rewards/chosen": -2.013594627380371,
|
1762 |
+
"rewards/margins": 0.4714154303073883,
|
1763 |
+
"rewards/rejected": -2.4850101470947266,
|
1764 |
+
"step": 1160
|
1765 |
+
},
|
1766 |
+
{
|
1767 |
+
"epoch": 0.6627017841971113,
|
1768 |
+
"grad_norm": 21.510900198503908,
|
1769 |
+
"learning_rate": 1.5410072109060908e-07,
|
1770 |
+
"logits/chosen": -0.2692334055900574,
|
1771 |
+
"logits/rejected": -0.20904116332530975,
|
1772 |
+
"logps/chosen": -731.9447631835938,
|
1773 |
+
"logps/rejected": -718.5181884765625,
|
1774 |
+
"loss": 0.6129,
|
1775 |
+
"rewards/accuracies": 0.675000011920929,
|
1776 |
+
"rewards/chosen": -2.5587196350097656,
|
1777 |
+
"rewards/margins": 0.21226339042186737,
|
1778 |
+
"rewards/rejected": -2.7709832191467285,
|
1779 |
+
"step": 1170
|
1780 |
+
},
|
1781 |
+
{
|
1782 |
+
"epoch": 0.6683659020107618,
|
1783 |
+
"grad_norm": 18.927344667610054,
|
1784 |
+
"learning_rate": 1.4955230616399316e-07,
|
1785 |
+
"logits/chosen": -0.3498212993144989,
|
1786 |
+
"logits/rejected": -0.33364471793174744,
|
1787 |
+
"logps/chosen": -781.1145629882812,
|
1788 |
+
"logps/rejected": -797.6188354492188,
|
1789 |
+
"loss": 0.6615,
|
1790 |
+
"rewards/accuracies": 0.6499999761581421,
|
1791 |
+
"rewards/chosen": -2.542229175567627,
|
1792 |
+
"rewards/margins": 0.2951570153236389,
|
1793 |
+
"rewards/rejected": -2.837385892868042,
|
1794 |
+
"step": 1180
|
1795 |
+
},
|
1796 |
+
{
|
1797 |
+
"epoch": 0.6740300198244124,
|
1798 |
+
"grad_norm": 14.255891622779398,
|
1799 |
+
"learning_rate": 1.450432031844959e-07,
|
1800 |
+
"logits/chosen": -0.23423922061920166,
|
1801 |
+
"logits/rejected": -0.25003939867019653,
|
1802 |
+
"logps/chosen": -525.1193237304688,
|
1803 |
+
"logps/rejected": -553.8635864257812,
|
1804 |
+
"loss": 0.5995,
|
1805 |
+
"rewards/accuracies": 0.800000011920929,
|
1806 |
+
"rewards/chosen": -1.4713220596313477,
|
1807 |
+
"rewards/margins": 0.4264037013053894,
|
1808 |
+
"rewards/rejected": -1.8977254629135132,
|
1809 |
+
"step": 1190
|
1810 |
+
},
|
1811 |
+
{
|
1812 |
+
"epoch": 0.6796941376380629,
|
1813 |
+
"grad_norm": 33.28066042950332,
|
1814 |
+
"learning_rate": 1.405751768677732e-07,
|
1815 |
+
"logits/chosen": -0.15600809454917908,
|
1816 |
+
"logits/rejected": -0.1329360455274582,
|
1817 |
+
"logps/chosen": -642.7808837890625,
|
1818 |
+
"logps/rejected": -663.9035034179688,
|
1819 |
+
"loss": 0.5752,
|
1820 |
+
"rewards/accuracies": 0.699999988079071,
|
1821 |
+
"rewards/chosen": -1.7750027179718018,
|
1822 |
+
"rewards/margins": 0.4877189099788666,
|
1823 |
+
"rewards/rejected": -2.262721300125122,
|
1824 |
+
"step": 1200
|
1825 |
+
},
|
1826 |
+
{
|
1827 |
+
"epoch": 0.6853582554517134,
|
1828 |
+
"grad_norm": 16.003547501120885,
|
1829 |
+
"learning_rate": 1.3614997585341592e-07,
|
1830 |
+
"logits/chosen": -0.08609099686145782,
|
1831 |
+
"logits/rejected": -0.06985644996166229,
|
1832 |
+
"logps/chosen": -608.2687377929688,
|
1833 |
+
"logps/rejected": -635.8214721679688,
|
1834 |
+
"loss": 0.5796,
|
1835 |
+
"rewards/accuracies": 0.699999988079071,
|
1836 |
+
"rewards/chosen": -1.749746561050415,
|
1837 |
+
"rewards/margins": 0.35603052377700806,
|
1838 |
+
"rewards/rejected": -2.1057772636413574,
|
1839 |
+
"step": 1210
|
1840 |
+
},
|
1841 |
+
{
|
1842 |
+
"epoch": 0.6910223732653639,
|
1843 |
+
"grad_norm": 15.622558143952144,
|
1844 |
+
"learning_rate": 1.3176933202059066e-07,
|
1845 |
+
"logits/chosen": -0.03839196264743805,
|
1846 |
+
"logits/rejected": -0.09653138369321823,
|
1847 |
+
"logps/chosen": -697.4569702148438,
|
1848 |
+
"logps/rejected": -703.6799926757812,
|
1849 |
+
"loss": 0.5842,
|
1850 |
+
"rewards/accuracies": 0.75,
|
1851 |
+
"rewards/chosen": -1.8517509698867798,
|
1852 |
+
"rewards/margins": 0.5271908640861511,
|
1853 |
+
"rewards/rejected": -2.378941774368286,
|
1854 |
+
"step": 1220
|
1855 |
+
},
|
1856 |
+
{
|
1857 |
+
"epoch": 0.6966864910790145,
|
1858 |
+
"grad_norm": 25.321745533517717,
|
1859 |
+
"learning_rate": 1.2743495981023782e-07,
|
1860 |
+
"logits/chosen": -0.1990116387605667,
|
1861 |
+
"logits/rejected": -0.20654082298278809,
|
1862 |
+
"logps/chosen": -585.44580078125,
|
1863 |
+
"logps/rejected": -625.1630859375,
|
1864 |
+
"loss": 0.6205,
|
1865 |
+
"rewards/accuracies": 0.675000011920929,
|
1866 |
+
"rewards/chosen": -1.6379293203353882,
|
1867 |
+
"rewards/margins": 0.3536279797554016,
|
1868 |
+
"rewards/rejected": -1.9915573596954346,
|
1869 |
+
"step": 1230
|
1870 |
+
},
|
1871 |
+
{
|
1872 |
+
"epoch": 0.702350608892665,
|
1873 |
+
"grad_norm": 17.503280710653502,
|
1874 |
+
"learning_rate": 1.2314855555409628e-07,
|
1875 |
+
"logits/chosen": 0.002707863226532936,
|
1876 |
+
"logits/rejected": -0.1227339655160904,
|
1877 |
+
"logps/chosen": -765.3572998046875,
|
1878 |
+
"logps/rejected": -813.5208129882812,
|
1879 |
+
"loss": 0.5777,
|
1880 |
+
"rewards/accuracies": 0.625,
|
1881 |
+
"rewards/chosen": -2.487704038619995,
|
1882 |
+
"rewards/margins": 0.45925870537757874,
|
1883 |
+
"rewards/rejected": -2.946962594985962,
|
1884 |
+
"step": 1240
|
1885 |
+
},
|
1886 |
+
{
|
1887 |
+
"epoch": 0.7080147267063155,
|
1888 |
+
"grad_norm": 18.496058504632952,
|
1889 |
+
"learning_rate": 1.1891179681081375e-07,
|
1890 |
+
"logits/chosen": -0.29699820280075073,
|
1891 |
+
"logits/rejected": -0.21896013617515564,
|
1892 |
+
"logps/chosen": -615.5306396484375,
|
1893 |
+
"logps/rejected": -633.4918212890625,
|
1894 |
+
"loss": 0.5981,
|
1895 |
+
"rewards/accuracies": 0.7250000238418579,
|
1896 |
+
"rewards/chosen": -1.971556305885315,
|
1897 |
+
"rewards/margins": 0.4856339991092682,
|
1898 |
+
"rewards/rejected": -2.4571902751922607,
|
1899 |
+
"step": 1250
|
1900 |
+
},
|
1901 |
+
{
|
1902 |
+
"epoch": 0.713678844519966,
|
1903 |
+
"grad_norm": 15.706262251859547,
|
1904 |
+
"learning_rate": 1.1472634170940554e-07,
|
1905 |
+
"logits/chosen": -0.13945288956165314,
|
1906 |
+
"logits/rejected": -0.12554016709327698,
|
1907 |
+
"logps/chosen": -695.7122802734375,
|
1908 |
+
"logps/rejected": -725.1580810546875,
|
1909 |
+
"loss": 0.5713,
|
1910 |
+
"rewards/accuracies": 0.824999988079071,
|
1911 |
+
"rewards/chosen": -1.980285882949829,
|
1912 |
+
"rewards/margins": 0.4433578848838806,
|
1913 |
+
"rewards/rejected": -2.4236435890197754,
|
1914 |
+
"step": 1260
|
1915 |
+
},
|
1916 |
+
{
|
1917 |
+
"epoch": 0.7193429623336165,
|
1918 |
+
"grad_norm": 23.487633259948737,
|
1919 |
+
"learning_rate": 1.1059382830031699e-07,
|
1920 |
+
"logits/chosen": -0.06917256116867065,
|
1921 |
+
"logits/rejected": -0.1285679042339325,
|
1922 |
+
"logps/chosen": -598.0589599609375,
|
1923 |
+
"logps/rejected": -606.0611572265625,
|
1924 |
+
"loss": 0.6216,
|
1925 |
+
"rewards/accuracies": 0.5,
|
1926 |
+
"rewards/chosen": -1.9320385456085205,
|
1927 |
+
"rewards/margins": 0.06984461843967438,
|
1928 |
+
"rewards/rejected": -2.001883029937744,
|
1929 |
+
"step": 1270
|
1930 |
+
},
|
1931 |
+
{
|
1932 |
+
"epoch": 0.7250070801472671,
|
1933 |
+
"grad_norm": 30.32460919379098,
|
1934 |
+
"learning_rate": 1.0651587391434364e-07,
|
1935 |
+
"logits/chosen": -0.01632758043706417,
|
1936 |
+
"logits/rejected": 0.0047612241469323635,
|
1937 |
+
"logps/chosen": -612.826171875,
|
1938 |
+
"logps/rejected": -657.8508911132812,
|
1939 |
+
"loss": 0.6225,
|
1940 |
+
"rewards/accuracies": 0.675000011920929,
|
1941 |
+
"rewards/chosen": -2.194361448287964,
|
1942 |
+
"rewards/margins": 0.47857794165611267,
|
1943 |
+
"rewards/rejected": -2.6729393005371094,
|
1944 |
+
"step": 1280
|
1945 |
+
},
|
1946 |
+
{
|
1947 |
+
"epoch": 0.7306711979609176,
|
1948 |
+
"grad_norm": 17.40955248228261,
|
1949 |
+
"learning_rate": 1.0249407452966156e-07,
|
1950 |
+
"logits/chosen": -0.11788008362054825,
|
1951 |
+
"logits/rejected": -0.14579714834690094,
|
1952 |
+
"logps/chosen": -577.5438232421875,
|
1953 |
+
"logps/rejected": -614.2926025390625,
|
1954 |
+
"loss": 0.5966,
|
1955 |
+
"rewards/accuracies": 0.6000000238418579,
|
1956 |
+
"rewards/chosen": -1.6694962978363037,
|
1957 |
+
"rewards/margins": 0.33409008383750916,
|
1958 |
+
"rewards/rejected": -2.0035862922668457,
|
1959 |
+
"step": 1290
|
1960 |
+
},
|
1961 |
+
{
|
1962 |
+
"epoch": 0.7363353157745681,
|
1963 |
+
"grad_norm": 18.249339537864724,
|
1964 |
+
"learning_rate": 9.853000414721278e-08,
|
1965 |
+
"logits/chosen": -0.14815405011177063,
|
1966 |
+
"logits/rejected": -0.16669398546218872,
|
1967 |
+
"logps/chosen": -727.6856689453125,
|
1968 |
+
"logps/rejected": -736.8724975585938,
|
1969 |
+
"loss": 0.6142,
|
1970 |
+
"rewards/accuracies": 0.625,
|
1971 |
+
"rewards/chosen": -2.2588837146759033,
|
1972 |
+
"rewards/margins": 0.05782442167401314,
|
1973 |
+
"rewards/rejected": -2.3167080879211426,
|
1974 |
+
"step": 1300
|
1975 |
+
},
|
1976 |
+
{
|
1977 |
+
"epoch": 0.7419994335882186,
|
1978 |
+
"grad_norm": 19.72793699039372,
|
1979 |
+
"learning_rate": 9.462521417469318e-08,
|
1980 |
+
"logits/chosen": -0.18796461820602417,
|
1981 |
+
"logits/rejected": -0.14377792179584503,
|
1982 |
+
"logps/chosen": -631.5921020507812,
|
1983 |
+
"logps/rejected": -615.8783569335938,
|
1984 |
+
"loss": 0.6164,
|
1985 |
+
"rewards/accuracies": 0.550000011920929,
|
1986 |
+
"rewards/chosen": -1.9607959985733032,
|
1987 |
+
"rewards/margins": 0.09862452745437622,
|
1988 |
+
"rewards/rejected": -2.059420347213745,
|
1989 |
+
"step": 1310
|
1990 |
+
},
|
1991 |
+
{
|
1992 |
+
"epoch": 0.7476635514018691,
|
1993 |
+
"grad_norm": 15.97453620320132,
|
1994 |
+
"learning_rate": 9.078123281938208e-08,
|
1995 |
+
"logits/chosen": -0.02748515084385872,
|
1996 |
+
"logits/rejected": -0.026533063501119614,
|
1997 |
+
"logps/chosen": -657.2438354492188,
|
1998 |
+
"logps/rejected": -698.208984375,
|
1999 |
+
"loss": 0.6275,
|
2000 |
+
"rewards/accuracies": 0.675000011920929,
|
2001 |
+
"rewards/chosen": -1.9977624416351318,
|
2002 |
+
"rewards/margins": 0.47537675499916077,
|
2003 |
+
"rewards/rejected": -2.4731392860412598,
|
2004 |
+
"step": 1320
|
2005 |
+
},
|
2006 |
+
{
|
2007 |
+
"epoch": 0.7533276692155196,
|
2008 |
+
"grad_norm": 18.344545537200407,
|
2009 |
+
"learning_rate": 8.699956449005178e-08,
|
2010 |
+
"logits/chosen": -0.09640650451183319,
|
2011 |
+
"logits/rejected": -0.1486753523349762,
|
2012 |
+
"logps/chosen": -665.8047485351562,
|
2013 |
+
"logps/rejected": -741.2623291015625,
|
2014 |
+
"loss": 0.5892,
|
2015 |
+
"rewards/accuracies": 0.8500000238418579,
|
2016 |
+
"rewards/chosen": -1.8255144357681274,
|
2017 |
+
"rewards/margins": 0.7320832014083862,
|
2018 |
+
"rewards/rejected": -2.5575976371765137,
|
2019 |
+
"step": 1330
|
2020 |
+
},
|
2021 |
+
{
|
2022 |
+
"epoch": 0.7589917870291703,
|
2023 |
+
"grad_norm": 18.92039821837182,
|
2024 |
+
"learning_rate": 8.328168920819112e-08,
|
2025 |
+
"logits/chosen": -0.14237159490585327,
|
2026 |
+
"logits/rejected": -0.09421875327825546,
|
2027 |
+
"logps/chosen": -598.2032470703125,
|
2028 |
+
"logps/rejected": -603.7655639648438,
|
2029 |
+
"loss": 0.5898,
|
2030 |
+
"rewards/accuracies": 0.574999988079071,
|
2031 |
+
"rewards/chosen": -1.6649417877197266,
|
2032 |
+
"rewards/margins": 0.15654519200325012,
|
2033 |
+
"rewards/rejected": -1.8214870691299438,
|
2034 |
+
"step": 1340
|
2035 |
+
},
|
2036 |
+
{
|
2037 |
+
"epoch": 0.7646559048428208,
|
2038 |
+
"grad_norm": 18.64061644797886,
|
2039 |
+
"learning_rate": 7.962906202877345e-08,
|
2040 |
+
"logits/chosen": -0.2773872911930084,
|
2041 |
+
"logits/rejected": -0.33335018157958984,
|
2042 |
+
"logps/chosen": -691.8497314453125,
|
2043 |
+
"logps/rejected": -737.4361572265625,
|
2044 |
+
"loss": 0.6093,
|
2045 |
+
"rewards/accuracies": 0.675000011920929,
|
2046 |
+
"rewards/chosen": -1.9401991367340088,
|
2047 |
+
"rewards/margins": 0.4151093363761902,
|
2048 |
+
"rewards/rejected": -2.3553085327148438,
|
2049 |
+
"step": 1350
|
2050 |
+
},
|
2051 |
+
{
|
2052 |
+
"epoch": 0.7703200226564713,
|
2053 |
+
"grad_norm": 21.726379131857296,
|
2054 |
+
"learning_rate": 7.604311247079553e-08,
|
2055 |
+
"logits/chosen": 0.04578697308897972,
|
2056 |
+
"logits/rejected": 0.061678219586610794,
|
2057 |
+
"logps/chosen": -688.2077026367188,
|
2058 |
+
"logps/rejected": -728.4756469726562,
|
2059 |
+
"loss": 0.6249,
|
2060 |
+
"rewards/accuracies": 0.675000011920929,
|
2061 |
+
"rewards/chosen": -2.2095139026641846,
|
2062 |
+
"rewards/margins": 0.3764384388923645,
|
2063 |
+
"rewards/rejected": -2.5859522819519043,
|
2064 |
+
"step": 1360
|
2065 |
+
},
|
2066 |
+
{
|
2067 |
+
"epoch": 0.7759841404701218,
|
2068 |
+
"grad_norm": 17.40731293276627,
|
2069 |
+
"learning_rate": 7.252524395781051e-08,
|
2070 |
+
"logits/chosen": -0.07461674511432648,
|
2071 |
+
"logits/rejected": -0.13030676543712616,
|
2072 |
+
"logps/chosen": -713.3646240234375,
|
2073 |
+
"logps/rejected": -774.5071411132812,
|
2074 |
+
"loss": 0.6029,
|
2075 |
+
"rewards/accuracies": 0.7749999761581421,
|
2076 |
+
"rewards/chosen": -2.1679880619049072,
|
2077 |
+
"rewards/margins": 0.5716091394424438,
|
2078 |
+
"rewards/rejected": -2.7395970821380615,
|
2079 |
+
"step": 1370
|
2080 |
+
},
|
2081 |
+
{
|
2082 |
+
"epoch": 0.7816482582837723,
|
2083 |
+
"grad_norm": 21.79464945664344,
|
2084 |
+
"learning_rate": 6.907683326867397e-08,
|
2085 |
+
"logits/chosen": -0.12649454176425934,
|
2086 |
+
"logits/rejected": -0.1244879737496376,
|
2087 |
+
"logps/chosen": -536.6094970703125,
|
2088 |
+
"logps/rejected": -555.4559936523438,
|
2089 |
+
"loss": 0.5841,
|
2090 |
+
"rewards/accuracies": 0.6499999761581421,
|
2091 |
+
"rewards/chosen": -1.5893771648406982,
|
2092 |
+
"rewards/margins": 0.261711984872818,
|
2093 |
+
"rewards/rejected": -1.8510891199111938,
|
2094 |
+
"step": 1380
|
2095 |
+
},
|
2096 |
+
{
|
2097 |
+
"epoch": 0.7873123760974228,
|
2098 |
+
"grad_norm": 20.63309990944421,
|
2099 |
+
"learning_rate": 6.569922999871735e-08,
|
2100 |
+
"logits/chosen": -0.05570756644010544,
|
2101 |
+
"logits/rejected": -0.08853740990161896,
|
2102 |
+
"logps/chosen": -629.28515625,
|
2103 |
+
"logps/rejected": -664.021728515625,
|
2104 |
+
"loss": 0.6065,
|
2105 |
+
"rewards/accuracies": 0.800000011920929,
|
2106 |
+
"rewards/chosen": -2.2577807903289795,
|
2107 |
+
"rewards/margins": 0.6096078753471375,
|
2108 |
+
"rewards/rejected": -2.8673884868621826,
|
2109 |
+
"step": 1390
|
2110 |
+
},
|
2111 |
+
{
|
2112 |
+
"epoch": 0.7929764939110734,
|
2113 |
+
"grad_norm": 17.52906274578837,
|
2114 |
+
"learning_rate": 6.239375603156042e-08,
|
2115 |
+
"logits/chosen": -0.22038058936595917,
|
2116 |
+
"logits/rejected": -0.16050884127616882,
|
2117 |
+
"logps/chosen": -776.4019165039062,
|
2118 |
+
"logps/rejected": -753.0433959960938,
|
2119 |
+
"loss": 0.5981,
|
2120 |
+
"rewards/accuracies": 0.75,
|
2121 |
+
"rewards/chosen": -2.1483802795410156,
|
2122 |
+
"rewards/margins": 0.5532088875770569,
|
2123 |
+
"rewards/rejected": -2.7015891075134277,
|
2124 |
+
"step": 1400
|
2125 |
+
},
|
2126 |
+
{
|
2127 |
+
"epoch": 0.7986406117247239,
|
2128 |
+
"grad_norm": 16.192935277358306,
|
2129 |
+
"learning_rate": 5.916170502176937e-08,
|
2130 |
+
"logits/chosen": -0.171335369348526,
|
2131 |
+
"logits/rejected": -0.09694649279117584,
|
2132 |
+
"logps/chosen": -679.2623291015625,
|
2133 |
+
"logps/rejected": -678.6334228515625,
|
2134 |
+
"loss": 0.6244,
|
2135 |
+
"rewards/accuracies": 0.574999988079071,
|
2136 |
+
"rewards/chosen": -2.268268585205078,
|
2137 |
+
"rewards/margins": 0.08156970143318176,
|
2138 |
+
"rewards/rejected": -2.3498384952545166,
|
2139 |
+
"step": 1410
|
2140 |
+
},
|
2141 |
+
{
|
2142 |
+
"epoch": 0.8043047295383744,
|
2143 |
+
"grad_norm": 14.541878638529214,
|
2144 |
+
"learning_rate": 5.6004341888562816e-08,
|
2145 |
+
"logits/chosen": -0.13256961107254028,
|
2146 |
+
"logits/rejected": -0.14669382572174072,
|
2147 |
+
"logps/chosen": -630.8463745117188,
|
2148 |
+
"logps/rejected": -676.73779296875,
|
2149 |
+
"loss": 0.6054,
|
2150 |
+
"rewards/accuracies": 0.699999988079071,
|
2151 |
+
"rewards/chosen": -1.901886224746704,
|
2152 |
+
"rewards/margins": 0.3245258331298828,
|
2153 |
+
"rewards/rejected": -2.226411819458008,
|
2154 |
+
"step": 1420
|
2155 |
+
},
|
2156 |
+
{
|
2157 |
+
"epoch": 0.8099688473520249,
|
2158 |
+
"grad_norm": 22.202056701225032,
|
2159 |
+
"learning_rate": 5.2922902320763296e-08,
|
2160 |
+
"logits/chosen": -0.11466534435749054,
|
2161 |
+
"logits/rejected": -0.17642728984355927,
|
2162 |
+
"logps/chosen": -626.3448486328125,
|
2163 |
+
"logps/rejected": -645.9719848632812,
|
2164 |
+
"loss": 0.5945,
|
2165 |
+
"rewards/accuracies": 0.6000000238418579,
|
2166 |
+
"rewards/chosen": -1.7750495672225952,
|
2167 |
+
"rewards/margins": 0.3986364006996155,
|
2168 |
+
"rewards/rejected": -2.1736862659454346,
|
2169 |
+
"step": 1430
|
2170 |
+
},
|
2171 |
+
{
|
2172 |
+
"epoch": 0.8156329651656754,
|
2173 |
+
"grad_norm": 18.29898787871736,
|
2174 |
+
"learning_rate": 4.9918592293189206e-08,
|
2175 |
+
"logits/chosen": -0.23186799883842468,
|
2176 |
+
"logits/rejected": -0.20798341929912567,
|
2177 |
+
"logps/chosen": -752.2689819335938,
|
2178 |
+
"logps/rejected": -765.903564453125,
|
2179 |
+
"loss": 0.5843,
|
2180 |
+
"rewards/accuracies": 0.6499999761581421,
|
2181 |
+
"rewards/chosen": -2.287684440612793,
|
2182 |
+
"rewards/margins": 0.38349679112434387,
|
2183 |
+
"rewards/rejected": -2.6711812019348145,
|
2184 |
+
"step": 1440
|
2185 |
+
},
|
2186 |
+
{
|
2187 |
+
"epoch": 0.821297082979326,
|
2188 |
+
"grad_norm": 23.001989193030713,
|
2189 |
+
"learning_rate": 4.6992587594675806e-08,
|
2190 |
+
"logits/chosen": -0.15347278118133545,
|
2191 |
+
"logits/rejected": -0.16635027527809143,
|
2192 |
+
"logps/chosen": -672.3074951171875,
|
2193 |
+
"logps/rejected": -702.2915649414062,
|
2194 |
+
"loss": 0.5987,
|
2195 |
+
"rewards/accuracies": 0.7250000238418579,
|
2196 |
+
"rewards/chosen": -2.0705018043518066,
|
2197 |
+
"rewards/margins": 0.5524295568466187,
|
2198 |
+
"rewards/rejected": -2.622931480407715,
|
2199 |
+
"step": 1450
|
2200 |
+
},
|
2201 |
+
{
|
2202 |
+
"epoch": 0.8269612007929765,
|
2203 |
+
"grad_norm": 14.552405766755014,
|
2204 |
+
"learning_rate": 4.414603336790959e-08,
|
2205 |
+
"logits/chosen": -0.29983657598495483,
|
2206 |
+
"logits/rejected": -0.28856927156448364,
|
2207 |
+
"logps/chosen": -660.117919921875,
|
2208 |
+
"logps/rejected": -722.7196655273438,
|
2209 |
+
"loss": 0.5645,
|
2210 |
+
"rewards/accuracies": 0.800000011920929,
|
2211 |
+
"rewards/chosen": -1.8668142557144165,
|
2212 |
+
"rewards/margins": 0.7347536683082581,
|
2213 |
+
"rewards/rejected": -2.6015677452087402,
|
2214 |
+
"step": 1460
|
2215 |
+
},
|
2216 |
+
{
|
2217 |
+
"epoch": 0.832625318606627,
|
2218 |
+
"grad_norm": 19.99667624232941,
|
2219 |
+
"learning_rate": 4.1380043661257024e-08,
|
2220 |
+
"logits/chosen": 0.08385223895311356,
|
2221 |
+
"logits/rejected": 0.006908579729497433,
|
2222 |
+
"logps/chosen": -622.146240234375,
|
2223 |
+
"logps/rejected": -680.0650634765625,
|
2224 |
+
"loss": 0.5934,
|
2225 |
+
"rewards/accuracies": 0.7250000238418579,
|
2226 |
+
"rewards/chosen": -1.9830541610717773,
|
2227 |
+
"rewards/margins": 0.4676002562046051,
|
2228 |
+
"rewards/rejected": -2.4506545066833496,
|
2229 |
+
"step": 1470
|
2230 |
+
},
|
2231 |
+
{
|
2232 |
+
"epoch": 0.8382894364202775,
|
2233 |
+
"grad_norm": 16.875286112617857,
|
2234 |
+
"learning_rate": 3.86957009927624e-08,
|
2235 |
+
"logits/chosen": -0.03332146629691124,
|
2236 |
+
"logits/rejected": -0.046583205461502075,
|
2237 |
+
"logps/chosen": -722.2545166015625,
|
2238 |
+
"logps/rejected": -746.12060546875,
|
2239 |
+
"loss": 0.576,
|
2240 |
+
"rewards/accuracies": 0.699999988079071,
|
2241 |
+
"rewards/chosen": -2.3616933822631836,
|
2242 |
+
"rewards/margins": 0.3179056942462921,
|
2243 |
+
"rewards/rejected": -2.6795990467071533,
|
2244 |
+
"step": 1480
|
2245 |
+
},
|
2246 |
+
{
|
2247 |
+
"epoch": 0.843953554233928,
|
2248 |
+
"grad_norm": 32.05017847240802,
|
2249 |
+
"learning_rate": 3.609405592648543e-08,
|
2250 |
+
"logits/chosen": -0.001988898264244199,
|
2251 |
+
"logits/rejected": -0.06228378415107727,
|
2252 |
+
"logps/chosen": -650.3690185546875,
|
2253 |
+
"logps/rejected": -709.6563720703125,
|
2254 |
+
"loss": 0.5915,
|
2255 |
+
"rewards/accuracies": 0.675000011920929,
|
2256 |
+
"rewards/chosen": -2.5558290481567383,
|
2257 |
+
"rewards/margins": 0.3824766278266907,
|
2258 |
+
"rewards/rejected": -2.938305616378784,
|
2259 |
+
"step": 1490
|
2260 |
+
},
|
2261 |
+
{
|
2262 |
+
"epoch": 0.8496176720475785,
|
2263 |
+
"grad_norm": 15.255726599915509,
|
2264 |
+
"learning_rate": 3.357612666134496e-08,
|
2265 |
+
"logits/chosen": -0.03047587350010872,
|
2266 |
+
"logits/rejected": -0.05225413292646408,
|
2267 |
+
"logps/chosen": -561.4085693359375,
|
2268 |
+
"logps/rejected": -593.854248046875,
|
2269 |
+
"loss": 0.5601,
|
2270 |
+
"rewards/accuracies": 0.7250000238418579,
|
2271 |
+
"rewards/chosen": -1.7476489543914795,
|
2272 |
+
"rewards/margins": 0.5453365445137024,
|
2273 |
+
"rewards/rejected": -2.292985439300537,
|
2274 |
+
"step": 1500
|
2275 |
+
},
|
2276 |
+
{
|
2277 |
+
"epoch": 0.8552817898612292,
|
2278 |
+
"grad_norm": 18.890827754495124,
|
2279 |
+
"learning_rate": 3.1142898632629285e-08,
|
2280 |
+
"logits/chosen": -0.18943175673484802,
|
2281 |
+
"logits/rejected": -0.31268787384033203,
|
2282 |
+
"logps/chosen": -611.166259765625,
|
2283 |
+
"logps/rejected": -627.4059448242188,
|
2284 |
+
"loss": 0.5847,
|
2285 |
+
"rewards/accuracies": 0.75,
|
2286 |
+
"rewards/chosen": -2.0698297023773193,
|
2287 |
+
"rewards/margins": 0.39990168809890747,
|
2288 |
+
"rewards/rejected": -2.469731330871582,
|
2289 |
+
"step": 1510
|
2290 |
+
},
|
2291 |
+
{
|
2292 |
+
"epoch": 0.8609459076748797,
|
2293 |
+
"grad_norm": 28.452391944889012,
|
2294 |
+
"learning_rate": 2.8795324126328596e-08,
|
2295 |
+
"logits/chosen": -0.16797076165676117,
|
2296 |
+
"logits/rejected": -0.18733422458171844,
|
2297 |
+
"logps/chosen": -828.4105224609375,
|
2298 |
+
"logps/rejected": -889.1057739257812,
|
2299 |
+
"loss": 0.6415,
|
2300 |
+
"rewards/accuracies": 0.7250000238418579,
|
2301 |
+
"rewards/chosen": -2.8144371509552,
|
2302 |
+
"rewards/margins": 0.8011550903320312,
|
2303 |
+
"rewards/rejected": -3.6155917644500732,
|
2304 |
+
"step": 1520
|
2305 |
+
},
|
2306 |
+
{
|
2307 |
+
"epoch": 0.8666100254885302,
|
2308 |
+
"grad_norm": 17.872239456429195,
|
2309 |
+
"learning_rate": 2.653432190644156e-08,
|
2310 |
+
"logits/chosen": -0.14598588645458221,
|
2311 |
+
"logits/rejected": -0.1877167820930481,
|
2312 |
+
"logps/chosen": -615.3109130859375,
|
2313 |
+
"logps/rejected": -689.0770874023438,
|
2314 |
+
"loss": 0.5953,
|
2315 |
+
"rewards/accuracies": 0.675000011920929,
|
2316 |
+
"rewards/chosen": -2.064265727996826,
|
2317 |
+
"rewards/margins": 0.5991795659065247,
|
2318 |
+
"rewards/rejected": -2.663445472717285,
|
2319 |
+
"step": 1530
|
2320 |
+
},
|
2321 |
+
{
|
2322 |
+
"epoch": 0.8722741433021807,
|
2323 |
+
"grad_norm": 16.92421864322488,
|
2324 |
+
"learning_rate": 2.4360776855401084e-08,
|
2325 |
+
"logits/chosen": -0.24489791691303253,
|
2326 |
+
"logits/rejected": -0.28221431374549866,
|
2327 |
+
"logps/chosen": -726.8218994140625,
|
2328 |
+
"logps/rejected": -759.2882080078125,
|
2329 |
+
"loss": 0.5991,
|
2330 |
+
"rewards/accuracies": 0.6000000238418579,
|
2331 |
+
"rewards/chosen": -2.599058151245117,
|
2332 |
+
"rewards/margins": 0.23930224776268005,
|
2333 |
+
"rewards/rejected": -2.838360548019409,
|
2334 |
+
"step": 1540
|
2335 |
+
},
|
2336 |
+
{
|
2337 |
+
"epoch": 0.8779382611158312,
|
2338 |
+
"grad_norm": 39.14715681223177,
|
2339 |
+
"learning_rate": 2.2275539627760214e-08,
|
2340 |
+
"logits/chosen": -0.3715534806251526,
|
2341 |
+
"logits/rejected": -0.3498302400112152,
|
2342 |
+
"logps/chosen": -844.0606689453125,
|
2343 |
+
"logps/rejected": -857.6305541992188,
|
2344 |
+
"loss": 0.6096,
|
2345 |
+
"rewards/accuracies": 0.699999988079071,
|
2346 |
+
"rewards/chosen": -2.6733736991882324,
|
2347 |
+
"rewards/margins": 0.4572853147983551,
|
2348 |
+
"rewards/rejected": -3.1306586265563965,
|
2349 |
+
"step": 1550
|
2350 |
+
},
|
2351 |
+
{
|
2352 |
+
"epoch": 0.8836023789294817,
|
2353 |
+
"grad_norm": 23.939306601700995,
|
2354 |
+
"learning_rate": 2.0279426317273835e-08,
|
2355 |
+
"logits/chosen": -0.3317481577396393,
|
2356 |
+
"logits/rejected": -0.2864743173122406,
|
2357 |
+
"logps/chosen": -599.5152587890625,
|
2358 |
+
"logps/rejected": -619.390869140625,
|
2359 |
+
"loss": 0.5896,
|
2360 |
+
"rewards/accuracies": 0.7250000238418579,
|
2361 |
+
"rewards/chosen": -1.6302772760391235,
|
2362 |
+
"rewards/margins": 0.24534249305725098,
|
2363 |
+
"rewards/rejected": -1.875619649887085,
|
2364 |
+
"step": 1560
|
2365 |
+
},
|
2366 |
+
{
|
2367 |
+
"epoch": 0.8892664967431323,
|
2368 |
+
"grad_norm": 17.542398459595375,
|
2369 |
+
"learning_rate": 1.8373218137506004e-08,
|
2370 |
+
"logits/chosen": 0.07265366613864899,
|
2371 |
+
"logits/rejected": 0.0759335532784462,
|
2372 |
+
"logps/chosen": -578.22802734375,
|
2373 |
+
"logps/rejected": -618.4689331054688,
|
2374 |
+
"loss": 0.6003,
|
2375 |
+
"rewards/accuracies": 0.625,
|
2376 |
+
"rewards/chosen": -1.9392379522323608,
|
2377 |
+
"rewards/margins": 0.2717141807079315,
|
2378 |
+
"rewards/rejected": -2.210952043533325,
|
2379 |
+
"step": 1570
|
2380 |
+
},
|
2381 |
+
{
|
2382 |
+
"epoch": 0.8949306145567828,
|
2383 |
+
"grad_norm": 23.722900999719876,
|
2384 |
+
"learning_rate": 1.6557661116088585e-08,
|
2385 |
+
"logits/chosen": -0.03199579566717148,
|
2386 |
+
"logits/rejected": -0.03713482245802879,
|
2387 |
+
"logps/chosen": -615.804443359375,
|
2388 |
+
"logps/rejected": -641.7705688476562,
|
2389 |
+
"loss": 0.6303,
|
2390 |
+
"rewards/accuracies": 0.6000000238418579,
|
2391 |
+
"rewards/chosen": -2.0657472610473633,
|
2392 |
+
"rewards/margins": 0.09946224838495255,
|
2393 |
+
"rewards/rejected": -2.1652092933654785,
|
2394 |
+
"step": 1580
|
2395 |
+
},
|
2396 |
+
{
|
2397 |
+
"epoch": 0.9005947323704333,
|
2398 |
+
"grad_norm": 15.176785050154727,
|
2399 |
+
"learning_rate": 1.4833465802750383e-08,
|
2400 |
+
"logits/chosen": -0.12174008041620255,
|
2401 |
+
"logits/rejected": -0.16289708018302917,
|
2402 |
+
"logps/chosen": -617.8139038085938,
|
2403 |
+
"logps/rejected": -683.48193359375,
|
2404 |
+
"loss": 0.5725,
|
2405 |
+
"rewards/accuracies": 0.7250000238418579,
|
2406 |
+
"rewards/chosen": -1.916107416152954,
|
2407 |
+
"rewards/margins": 0.5993362665176392,
|
2408 |
+
"rewards/rejected": -2.515443801879883,
|
2409 |
+
"step": 1590
|
2410 |
+
},
|
2411 |
+
{
|
2412 |
+
"epoch": 0.9062588501840838,
|
2413 |
+
"grad_norm": 19.26980288418955,
|
2414 |
+
"learning_rate": 1.3201306991231259e-08,
|
2415 |
+
"logits/chosen": -0.20962023735046387,
|
2416 |
+
"logits/rejected": -0.25265225768089294,
|
2417 |
+
"logps/chosen": -677.2613525390625,
|
2418 |
+
"logps/rejected": -693.0345458984375,
|
2419 |
+
"loss": 0.6109,
|
2420 |
+
"rewards/accuracies": 0.75,
|
2421 |
+
"rewards/chosen": -1.8539336919784546,
|
2422 |
+
"rewards/margins": 0.4784063398838043,
|
2423 |
+
"rewards/rejected": -2.3323402404785156,
|
2424 |
+
"step": 1600
|
2425 |
+
},
|
2426 |
+
{
|
2427 |
+
"epoch": 0.9119229679977343,
|
2428 |
+
"grad_norm": 20.376813706730847,
|
2429 |
+
"learning_rate": 1.166182345518979e-08,
|
2430 |
+
"logits/chosen": -0.044456273317337036,
|
2431 |
+
"logits/rejected": -0.03623102977871895,
|
2432 |
+
"logps/chosen": -552.9375610351562,
|
2433 |
+
"logps/rejected": -566.2003784179688,
|
2434 |
+
"loss": 0.6278,
|
2435 |
+
"rewards/accuracies": 0.574999988079071,
|
2436 |
+
"rewards/chosen": -1.7421352863311768,
|
2437 |
+
"rewards/margins": 0.16987690329551697,
|
2438 |
+
"rewards/rejected": -1.9120118618011475,
|
2439 |
+
"step": 1610
|
2440 |
+
},
|
2441 |
+
{
|
2442 |
+
"epoch": 0.9175870858113849,
|
2443 |
+
"grad_norm": 20.363736058976666,
|
2444 |
+
"learning_rate": 1.021561769820814e-08,
|
2445 |
+
"logits/chosen": -0.09859003871679306,
|
2446 |
+
"logits/rejected": -0.07184389978647232,
|
2447 |
+
"logps/chosen": -692.3455810546875,
|
2448 |
+
"logps/rejected": -704.27294921875,
|
2449 |
+
"loss": 0.5858,
|
2450 |
+
"rewards/accuracies": 0.7250000238418579,
|
2451 |
+
"rewards/chosen": -2.0336098670959473,
|
2452 |
+
"rewards/margins": 0.43381887674331665,
|
2453 |
+
"rewards/rejected": -2.467428684234619,
|
2454 |
+
"step": 1620
|
2455 |
+
},
|
2456 |
+
{
|
2457 |
+
"epoch": 0.9232512036250354,
|
2458 |
+
"grad_norm": 23.0620695553384,
|
2459 |
+
"learning_rate": 8.86325571799193e-09,
|
2460 |
+
"logits/chosen": 0.0027242780197411776,
|
2461 |
+
"logits/rejected": -0.07530532777309418,
|
2462 |
+
"logps/chosen": -589.6121215820312,
|
2463 |
+
"logps/rejected": -641.5111694335938,
|
2464 |
+
"loss": 0.6068,
|
2465 |
+
"rewards/accuracies": 0.5249999761581421,
|
2466 |
+
"rewards/chosen": -2.1123204231262207,
|
2467 |
+
"rewards/margins": 0.17711400985717773,
|
2468 |
+
"rewards/rejected": -2.2894344329833984,
|
2469 |
+
"step": 1630
|
2470 |
+
},
|
2471 |
+
{
|
2472 |
+
"epoch": 0.9289153214386859,
|
2473 |
+
"grad_norm": 17.922993935961088,
|
2474 |
+
"learning_rate": 7.60526678485704e-09,
|
2475 |
+
"logits/chosen": -0.12710081040859222,
|
2476 |
+
"logits/rejected": -0.11238690465688705,
|
2477 |
+
"logps/chosen": -620.1683349609375,
|
2478 |
+
"logps/rejected": -702.4774780273438,
|
2479 |
+
"loss": 0.6412,
|
2480 |
+
"rewards/accuracies": 0.800000011920929,
|
2481 |
+
"rewards/chosen": -1.9170653820037842,
|
2482 |
+
"rewards/margins": 0.896059513092041,
|
2483 |
+
"rewards/rejected": -2.813124895095825,
|
2484 |
+
"step": 1640
|
2485 |
+
},
|
2486 |
+
{
|
2487 |
+
"epoch": 0.9345794392523364,
|
2488 |
+
"grad_norm": 14.970821420891314,
|
2489 |
+
"learning_rate": 6.4421432345906915e-09,
|
2490 |
+
"logits/chosen": -0.3055119216442108,
|
2491 |
+
"logits/rejected": -0.28563088178634644,
|
2492 |
+
"logps/chosen": -870.13818359375,
|
2493 |
+
"logps/rejected": -924.6994018554688,
|
2494 |
+
"loss": 0.5671,
|
2495 |
+
"rewards/accuracies": 0.875,
|
2496 |
+
"rewards/chosen": -2.5290729999542236,
|
2497 |
+
"rewards/margins": 0.7610968351364136,
|
2498 |
+
"rewards/rejected": -3.2901699542999268,
|
2499 |
+
"step": 1650
|
2500 |
+
},
|
2501 |
+
{
|
2502 |
+
"epoch": 0.940243557065987,
|
2503 |
+
"grad_norm": 15.233194399042373,
|
2504 |
+
"learning_rate": 5.374340275767136e-09,
|
2505 |
+
"logits/chosen": -0.14172212779521942,
|
2506 |
+
"logits/rejected": -0.286018043756485,
|
2507 |
+
"logps/chosen": -591.59814453125,
|
2508 |
+
"logps/rejected": -662.8147583007812,
|
2509 |
+
"loss": 0.5868,
|
2510 |
+
"rewards/accuracies": 0.75,
|
2511 |
+
"rewards/chosen": -1.7677208185195923,
|
2512 |
+
"rewards/margins": 0.6580394506454468,
|
2513 |
+
"rewards/rejected": -2.425760269165039,
|
2514 |
+
"step": 1660
|
2515 |
+
},
|
2516 |
+
{
|
2517 |
+
"epoch": 0.9459076748796375,
|
2518 |
+
"grad_norm": 22.291131982136136,
|
2519 |
+
"learning_rate": 4.402275811593997e-09,
|
2520 |
+
"logits/chosen": -0.10256578773260117,
|
2521 |
+
"logits/rejected": 0.006013460457324982,
|
2522 |
+
"logps/chosen": -710.4451904296875,
|
2523 |
+
"logps/rejected": -692.7806396484375,
|
2524 |
+
"loss": 0.6044,
|
2525 |
+
"rewards/accuracies": 0.75,
|
2526 |
+
"rewards/chosen": -2.21944522857666,
|
2527 |
+
"rewards/margins": 0.39656540751457214,
|
2528 |
+
"rewards/rejected": -2.6160104274749756,
|
2529 |
+
"step": 1670
|
2530 |
+
},
|
2531 |
+
{
|
2532 |
+
"epoch": 0.9515717926932881,
|
2533 |
+
"grad_norm": 23.960509854892667,
|
2534 |
+
"learning_rate": 3.5263302763585133e-09,
|
2535 |
+
"logits/chosen": -0.2649417519569397,
|
2536 |
+
"logits/rejected": -0.25996100902557373,
|
2537 |
+
"logps/chosen": -680.4586181640625,
|
2538 |
+
"logps/rejected": -733.28564453125,
|
2539 |
+
"loss": 0.5712,
|
2540 |
+
"rewards/accuracies": 0.7250000238418579,
|
2541 |
+
"rewards/chosen": -2.0978713035583496,
|
2542 |
+
"rewards/margins": 0.47943735122680664,
|
2543 |
+
"rewards/rejected": -2.5773086547851562,
|
2544 |
+
"step": 1680
|
2545 |
+
},
|
2546 |
+
{
|
2547 |
+
"epoch": 0.9572359105069386,
|
2548 |
+
"grad_norm": 22.75318099495045,
|
2549 |
+
"learning_rate": 2.7468464865381124e-09,
|
2550 |
+
"logits/chosen": -0.13250204920768738,
|
2551 |
+
"logits/rejected": -0.12757354974746704,
|
2552 |
+
"logps/chosen": -799.2764892578125,
|
2553 |
+
"logps/rejected": -853.9093627929688,
|
2554 |
+
"loss": 0.5613,
|
2555 |
+
"rewards/accuracies": 0.7250000238418579,
|
2556 |
+
"rewards/chosen": -2.5642991065979004,
|
2557 |
+
"rewards/margins": 0.6402150392532349,
|
2558 |
+
"rewards/rejected": -3.204514265060425,
|
2559 |
+
"step": 1690
|
2560 |
+
},
|
2561 |
+
{
|
2562 |
+
"epoch": 0.9629000283205891,
|
2563 |
+
"grad_norm": 18.819415650946482,
|
2564 |
+
"learning_rate": 2.064129506633011e-09,
|
2565 |
+
"logits/chosen": -0.06593064218759537,
|
2566 |
+
"logits/rejected": -0.17728903889656067,
|
2567 |
+
"logps/chosen": -702.3214111328125,
|
2568 |
+
"logps/rejected": -763.5635986328125,
|
2569 |
+
"loss": 0.5817,
|
2570 |
+
"rewards/accuracies": 0.7749999761581421,
|
2571 |
+
"rewards/chosen": -2.0367045402526855,
|
2572 |
+
"rewards/margins": 0.4802042841911316,
|
2573 |
+
"rewards/rejected": -2.516909122467041,
|
2574 |
+
"step": 1700
|
2575 |
+
},
|
2576 |
+
{
|
2577 |
+
"epoch": 0.9685641461342396,
|
2578 |
+
"grad_norm": 22.185275104009445,
|
2579 |
+
"learning_rate": 1.4784465297741632e-09,
|
2580 |
+
"logits/chosen": -0.1805897057056427,
|
2581 |
+
"logits/rejected": -0.153991237282753,
|
2582 |
+
"logps/chosen": -840.6116333007812,
|
2583 |
+
"logps/rejected": -830.1062622070312,
|
2584 |
+
"loss": 0.6013,
|
2585 |
+
"rewards/accuracies": 0.7250000238418579,
|
2586 |
+
"rewards/chosen": -2.6172409057617188,
|
2587 |
+
"rewards/margins": 0.34190982580184937,
|
2588 |
+
"rewards/rejected": -2.959150552749634,
|
2589 |
+
"step": 1710
|
2590 |
+
},
|
2591 |
+
{
|
2592 |
+
"epoch": 0.9742282639478901,
|
2593 |
+
"grad_norm": 17.50705579397618,
|
2594 |
+
"learning_rate": 9.900267731524914e-10,
|
2595 |
+
"logits/chosen": -0.030229410156607628,
|
2596 |
+
"logits/rejected": -0.12910275161266327,
|
2597 |
+
"logps/chosen": -693.0966796875,
|
2598 |
+
"logps/rejected": -743.1204833984375,
|
2599 |
+
"loss": 0.5858,
|
2600 |
+
"rewards/accuracies": 0.7250000238418579,
|
2601 |
+
"rewards/chosen": -2.129164695739746,
|
2602 |
+
"rewards/margins": 0.5310593247413635,
|
2603 |
+
"rewards/rejected": -2.660223960876465,
|
2604 |
+
"step": 1720
|
2605 |
+
},
|
2606 |
+
{
|
2607 |
+
"epoch": 0.9798923817615406,
|
2608 |
+
"grad_norm": 21.78741888106313,
|
2609 |
+
"learning_rate": 5.990613883107565e-10,
|
2610 |
+
"logits/chosen": -0.3164052367210388,
|
2611 |
+
"logits/rejected": -0.26196470856666565,
|
2612 |
+
"logps/chosen": -797.2408447265625,
|
2613 |
+
"logps/rejected": -829.5914306640625,
|
2614 |
+
"loss": 0.6084,
|
2615 |
+
"rewards/accuracies": 0.75,
|
2616 |
+
"rewards/chosen": -2.587916612625122,
|
2617 |
+
"rewards/margins": 0.5189841389656067,
|
2618 |
+
"rewards/rejected": -3.106900691986084,
|
2619 |
+
"step": 1730
|
2620 |
+
},
|
2621 |
+
{
|
2622 |
+
"epoch": 0.9855564995751912,
|
2623 |
+
"grad_norm": 26.170987265565298,
|
2624 |
+
"learning_rate": 3.0570338633312266e-10,
|
2625 |
+
"logits/chosen": -0.04128523916006088,
|
2626 |
+
"logits/rejected": -0.0052064331248402596,
|
2627 |
+
"logps/chosen": -754.0946655273438,
|
2628 |
+
"logps/rejected": -769.3212890625,
|
2629 |
+
"loss": 0.5983,
|
2630 |
+
"rewards/accuracies": 0.5,
|
2631 |
+
"rewards/chosen": -2.7384681701660156,
|
2632 |
+
"rewards/margins": 0.2529481053352356,
|
2633 |
+
"rewards/rejected": -2.9914162158966064,
|
2634 |
+
"step": 1740
|
2635 |
+
},
|
2636 |
+
{
|
2637 |
+
"epoch": 0.9912206173888417,
|
2638 |
+
"grad_norm": 17.716998560167845,
|
2639 |
+
"learning_rate": 1.1006757796153121e-10,
|
2640 |
+
"logits/chosen": -0.19896100461483002,
|
2641 |
+
"logits/rejected": -0.23088839650154114,
|
2642 |
+
"logps/chosen": -755.6218872070312,
|
2643 |
+
"logps/rejected": -767.3886108398438,
|
2644 |
+
"loss": 0.6111,
|
2645 |
+
"rewards/accuracies": 0.75,
|
2646 |
+
"rewards/chosen": -2.123622417449951,
|
2647 |
+
"rewards/margins": 0.8624438047409058,
|
2648 |
+
"rewards/rejected": -2.9860663414001465,
|
2649 |
+
"step": 1750
|
2650 |
+
},
|
2651 |
+
{
|
2652 |
+
"epoch": 0.9968847352024922,
|
2653 |
+
"grad_norm": 21.859141173839067,
|
2654 |
+
"learning_rate": 1.2230528662698913e-11,
|
2655 |
+
"logits/chosen": -0.1374054104089737,
|
2656 |
+
"logits/rejected": -0.1221609115600586,
|
2657 |
+
"logps/chosen": -711.783935546875,
|
2658 |
+
"logps/rejected": -736.2469482421875,
|
2659 |
+
"loss": 0.6195,
|
2660 |
+
"rewards/accuracies": 0.6499999761581421,
|
2661 |
+
"rewards/chosen": -2.2918009757995605,
|
2662 |
+
"rewards/margins": 0.32683807611465454,
|
2663 |
+
"rewards/rejected": -2.6186389923095703,
|
2664 |
+
"step": 1760
|
2665 |
+
},
|
2666 |
+
{
|
2667 |
+
"epoch": 0.9997167941093175,
|
2668 |
+
"step": 1765,
|
2669 |
+
"total_flos": 0.0,
|
2670 |
+
"train_loss": 0.6297533516167919,
|
2671 |
+
"train_runtime": 10521.5341,
|
2672 |
+
"train_samples_per_second": 5.369,
|
2673 |
+
"train_steps_per_second": 0.168
|
2674 |
+
}
|
2675 |
+
],
|
2676 |
+
"logging_steps": 10,
|
2677 |
+
"max_steps": 1765,
|
2678 |
+
"num_input_tokens_seen": 0,
|
2679 |
+
"num_train_epochs": 1,
|
2680 |
+
"save_steps": 100,
|
2681 |
+
"stateful_callbacks": {
|
2682 |
+
"TrainerControl": {
|
2683 |
+
"args": {
|
2684 |
+
"should_epoch_stop": false,
|
2685 |
+
"should_evaluate": false,
|
2686 |
+
"should_log": false,
|
2687 |
+
"should_save": true,
|
2688 |
+
"should_training_stop": true
|
2689 |
+
},
|
2690 |
+
"attributes": {}
|
2691 |
+
}
|
2692 |
+
},
|
2693 |
+
"total_flos": 0.0,
|
2694 |
+
"train_batch_size": 2,
|
2695 |
+
"trial_name": null,
|
2696 |
+
"trial_params": null
|
2697 |
+
}
|