ShenaoZhang's picture
Model save
a563fe6 verified
raw
history blame
5.65 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9936305732484076,
"eval_steps": 500,
"global_step": 78,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012738853503184714,
"grad_norm": 21.624867146821636,
"learning_rate": 6.25e-08,
"logits/chosen": -2.737076759338379,
"logits/rejected": -2.736344814300537,
"logps/chosen": -290.1990661621094,
"logps/pi_response": -186.79766845703125,
"logps/ref_response": -186.79766845703125,
"logps/rejected": -404.5589599609375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.12738853503184713,
"grad_norm": 19.161383473353002,
"learning_rate": 4.989935734988097e-07,
"logits/chosen": -2.6904942989349365,
"logits/rejected": -2.6532483100891113,
"logps/chosen": -229.66769409179688,
"logps/pi_response": -143.10076904296875,
"logps/ref_response": -144.07577514648438,
"logps/rejected": -283.5401916503906,
"loss": 0.6834,
"rewards/accuracies": 0.5347222089767456,
"rewards/chosen": -0.012149970047175884,
"rewards/margins": 0.016954369843006134,
"rewards/rejected": -0.029104342684149742,
"step": 10
},
{
"epoch": 0.25477707006369427,
"grad_norm": 25.259189818921946,
"learning_rate": 4.646121984004665e-07,
"logits/chosen": -2.657824754714966,
"logits/rejected": -2.625739336013794,
"logps/chosen": -244.0951690673828,
"logps/pi_response": -130.72300720214844,
"logps/ref_response": -137.23435974121094,
"logps/rejected": -318.93115234375,
"loss": 0.5936,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.24313190579414368,
"rewards/margins": 0.3598789572715759,
"rewards/rejected": -0.6030109524726868,
"step": 20
},
{
"epoch": 0.3821656050955414,
"grad_norm": 21.327135622241446,
"learning_rate": 3.877242453630256e-07,
"logits/chosen": -2.723853349685669,
"logits/rejected": -2.673682451248169,
"logps/chosen": -309.9010925292969,
"logps/pi_response": -153.6499481201172,
"logps/ref_response": -154.19497680664062,
"logps/rejected": -397.6539001464844,
"loss": 0.5664,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.5057164430618286,
"rewards/margins": 0.7059683799743652,
"rewards/rejected": -1.2116848230361938,
"step": 30
},
{
"epoch": 0.5095541401273885,
"grad_norm": 23.49582384200601,
"learning_rate": 2.8355831645441387e-07,
"logits/chosen": -2.687347650527954,
"logits/rejected": -2.6348681449890137,
"logps/chosen": -253.1380157470703,
"logps/pi_response": -134.76382446289062,
"logps/ref_response": -133.91629028320312,
"logps/rejected": -381.5281066894531,
"loss": 0.54,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.3809475302696228,
"rewards/margins": 0.6450485587120056,
"rewards/rejected": -1.0259960889816284,
"step": 40
},
{
"epoch": 0.6369426751592356,
"grad_norm": 25.663450934512035,
"learning_rate": 1.7274575140626315e-07,
"logits/chosen": -2.676809787750244,
"logits/rejected": -2.6430976390838623,
"logps/chosen": -275.3895568847656,
"logps/pi_response": -152.5225830078125,
"logps/ref_response": -138.03114318847656,
"logps/rejected": -408.5316162109375,
"loss": 0.5409,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.5592767000198364,
"rewards/margins": 0.6597134470939636,
"rewards/rejected": -1.2189903259277344,
"step": 50
},
{
"epoch": 0.7643312101910829,
"grad_norm": 22.272371630156243,
"learning_rate": 7.723433775328384e-08,
"logits/chosen": -2.6719603538513184,
"logits/rejected": -2.6622776985168457,
"logps/chosen": -273.9826965332031,
"logps/pi_response": -162.14956665039062,
"logps/ref_response": -146.4705810546875,
"logps/rejected": -414.38818359375,
"loss": 0.5308,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.5259829759597778,
"rewards/margins": 0.6048363447189331,
"rewards/rejected": -1.1308192014694214,
"step": 60
},
{
"epoch": 0.89171974522293,
"grad_norm": 23.694168211855562,
"learning_rate": 1.5941282340065697e-08,
"logits/chosen": -2.5099849700927734,
"logits/rejected": -2.475076198577881,
"logps/chosen": -290.50506591796875,
"logps/pi_response": -179.0775146484375,
"logps/ref_response": -147.26612854003906,
"logps/rejected": -441.50421142578125,
"loss": 0.5225,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.6331243515014648,
"rewards/margins": 0.7736427187919617,
"rewards/rejected": -1.4067671298980713,
"step": 70
},
{
"epoch": 0.9936305732484076,
"step": 78,
"total_flos": 0.0,
"train_loss": 0.562170364917853,
"train_runtime": 1746.7486,
"train_samples_per_second": 5.725,
"train_steps_per_second": 0.045
}
],
"logging_steps": 10,
"max_steps": 78,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}