|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9936305732484076, |
|
"eval_steps": 500, |
|
"global_step": 78, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012738853503184714, |
|
"grad_norm": 21.624867146821636, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.737076759338379, |
|
"logits/rejected": -2.736344814300537, |
|
"logps/chosen": -290.1990661621094, |
|
"logps/pi_response": -186.79766845703125, |
|
"logps/ref_response": -186.79766845703125, |
|
"logps/rejected": -404.5589599609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12738853503184713, |
|
"grad_norm": 19.161383473353002, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": -2.6904942989349365, |
|
"logits/rejected": -2.6532483100891113, |
|
"logps/chosen": -229.66769409179688, |
|
"logps/pi_response": -143.10076904296875, |
|
"logps/ref_response": -144.07577514648438, |
|
"logps/rejected": -283.5401916503906, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.5347222089767456, |
|
"rewards/chosen": -0.012149970047175884, |
|
"rewards/margins": 0.016954369843006134, |
|
"rewards/rejected": -0.029104342684149742, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25477707006369427, |
|
"grad_norm": 25.259189818921946, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": -2.657824754714966, |
|
"logits/rejected": -2.625739336013794, |
|
"logps/chosen": -244.0951690673828, |
|
"logps/pi_response": -130.72300720214844, |
|
"logps/ref_response": -137.23435974121094, |
|
"logps/rejected": -318.93115234375, |
|
"loss": 0.5936, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.24313190579414368, |
|
"rewards/margins": 0.3598789572715759, |
|
"rewards/rejected": -0.6030109524726868, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3821656050955414, |
|
"grad_norm": 21.327135622241446, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": -2.723853349685669, |
|
"logits/rejected": -2.673682451248169, |
|
"logps/chosen": -309.9010925292969, |
|
"logps/pi_response": -153.6499481201172, |
|
"logps/ref_response": -154.19497680664062, |
|
"logps/rejected": -397.6539001464844, |
|
"loss": 0.5664, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.5057164430618286, |
|
"rewards/margins": 0.7059683799743652, |
|
"rewards/rejected": -1.2116848230361938, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5095541401273885, |
|
"grad_norm": 23.49582384200601, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": -2.687347650527954, |
|
"logits/rejected": -2.6348681449890137, |
|
"logps/chosen": -253.1380157470703, |
|
"logps/pi_response": -134.76382446289062, |
|
"logps/ref_response": -133.91629028320312, |
|
"logps/rejected": -381.5281066894531, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.3809475302696228, |
|
"rewards/margins": 0.6450485587120056, |
|
"rewards/rejected": -1.0259960889816284, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6369426751592356, |
|
"grad_norm": 25.663450934512035, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": -2.676809787750244, |
|
"logits/rejected": -2.6430976390838623, |
|
"logps/chosen": -275.3895568847656, |
|
"logps/pi_response": -152.5225830078125, |
|
"logps/ref_response": -138.03114318847656, |
|
"logps/rejected": -408.5316162109375, |
|
"loss": 0.5409, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5592767000198364, |
|
"rewards/margins": 0.6597134470939636, |
|
"rewards/rejected": -1.2189903259277344, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7643312101910829, |
|
"grad_norm": 22.272371630156243, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": -2.6719603538513184, |
|
"logits/rejected": -2.6622776985168457, |
|
"logps/chosen": -273.9826965332031, |
|
"logps/pi_response": -162.14956665039062, |
|
"logps/ref_response": -146.4705810546875, |
|
"logps/rejected": -414.38818359375, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.5259829759597778, |
|
"rewards/margins": 0.6048363447189331, |
|
"rewards/rejected": -1.1308192014694214, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.89171974522293, |
|
"grad_norm": 23.694168211855562, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": -2.5099849700927734, |
|
"logits/rejected": -2.475076198577881, |
|
"logps/chosen": -290.50506591796875, |
|
"logps/pi_response": -179.0775146484375, |
|
"logps/ref_response": -147.26612854003906, |
|
"logps/rejected": -441.50421142578125, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.6331243515014648, |
|
"rewards/margins": 0.7736427187919617, |
|
"rewards/rejected": -1.4067671298980713, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9936305732484076, |
|
"step": 78, |
|
"total_flos": 0.0, |
|
"train_loss": 0.562170364917853, |
|
"train_runtime": 1746.7486, |
|
"train_samples_per_second": 5.725, |
|
"train_steps_per_second": 0.045 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 78, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|