zephyr-7b-gemma-dpo / trainer_state.json
li-muyang's picture
Model save
613ee94 verified
raw
history blame contribute delete
7.96 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.971563981042654,
"eval_steps": 500,
"global_step": 104,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.018957345971563982,
"grad_norm": 71.69449642421675,
"learning_rate": 4.545454545454545e-08,
"logits/chosen": 228.492431640625,
"logits/rejected": 249.21771240234375,
"logps/chosen": -447.14471435546875,
"logps/rejected": -436.09393310546875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.1895734597156398,
"grad_norm": 78.4118299616628,
"learning_rate": 4.545454545454545e-07,
"logits/chosen": 257.2274475097656,
"logits/rejected": 249.07215881347656,
"logps/chosen": -381.537353515625,
"logps/rejected": -444.5811767578125,
"loss": 0.71,
"rewards/accuracies": 0.4861111044883728,
"rewards/chosen": 0.024299899116158485,
"rewards/margins": 0.044586196541786194,
"rewards/rejected": -0.020286299288272858,
"step": 10
},
{
"epoch": 0.3791469194312796,
"grad_norm": 99.84300510877705,
"learning_rate": 4.885348141000122e-07,
"logits/chosen": 248.119384765625,
"logits/rejected": 246.07846069335938,
"logps/chosen": -386.1551818847656,
"logps/rejected": -414.88385009765625,
"loss": 0.6619,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.11353431642055511,
"rewards/margins": 0.10505084693431854,
"rewards/rejected": -0.21858516335487366,
"step": 20
},
{
"epoch": 0.5687203791469194,
"grad_norm": 56.97411905200688,
"learning_rate": 4.5025027361734613e-07,
"logits/chosen": 247.26358032226562,
"logits/rejected": 244.18240356445312,
"logps/chosen": -341.32952880859375,
"logps/rejected": -402.8668518066406,
"loss": 0.5739,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.05143852159380913,
"rewards/margins": 0.7519260048866272,
"rewards/rejected": -0.8033644556999207,
"step": 30
},
{
"epoch": 0.7582938388625592,
"grad_norm": 57.580210507052286,
"learning_rate": 3.893311157806091e-07,
"logits/chosen": 247.73263549804688,
"logits/rejected": 259.1477966308594,
"logps/chosen": -369.2883605957031,
"logps/rejected": -386.1142883300781,
"loss": 0.5751,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 0.09320323169231415,
"rewards/margins": 0.881219744682312,
"rewards/rejected": -0.7880164384841919,
"step": 40
},
{
"epoch": 0.9478672985781991,
"grad_norm": 77.32585608048733,
"learning_rate": 3.126631330646801e-07,
"logits/chosen": 246.6822052001953,
"logits/rejected": 251.89175415039062,
"logps/chosen": -397.3800354003906,
"logps/rejected": -417.54400634765625,
"loss": 0.5055,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.00946036446839571,
"rewards/margins": 0.9901891946792603,
"rewards/rejected": -0.980728805065155,
"step": 50
},
{
"epoch": 0.985781990521327,
"eval_logits/chosen": 314.65277099609375,
"eval_logits/rejected": 314.80328369140625,
"eval_logps/chosen": -371.4647521972656,
"eval_logps/rejected": -432.01666259765625,
"eval_loss": 0.5193939208984375,
"eval_rewards/accuracies": 0.7446808218955994,
"eval_rewards/chosen": -0.17477014660835266,
"eval_rewards/margins": 0.8593912124633789,
"eval_rewards/rejected": -1.0341612100601196,
"eval_runtime": 91.3038,
"eval_samples_per_second": 8.214,
"eval_steps_per_second": 0.515,
"step": 52
},
{
"epoch": 1.1374407582938388,
"grad_norm": 32.08842708767771,
"learning_rate": 2.2891223348923882e-07,
"logits/chosen": 243.8182373046875,
"logits/rejected": 242.9388885498047,
"logps/chosen": -341.7854919433594,
"logps/rejected": -424.074951171875,
"loss": 0.3496,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": 0.382510781288147,
"rewards/margins": 1.8218187093734741,
"rewards/rejected": -1.4393078088760376,
"step": 60
},
{
"epoch": 1.3270142180094786,
"grad_norm": 32.178505554608634,
"learning_rate": 1.4754491880085317e-07,
"logits/chosen": 250.7259063720703,
"logits/rejected": 248.94216918945312,
"logps/chosen": -350.2694091796875,
"logps/rejected": -398.77850341796875,
"loss": 0.2373,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 0.50797039270401,
"rewards/margins": 2.4789700508117676,
"rewards/rejected": -1.9709994792938232,
"step": 70
},
{
"epoch": 1.5165876777251186,
"grad_norm": 24.814991686693443,
"learning_rate": 7.775827023107834e-08,
"logits/chosen": 254.3613739013672,
"logits/rejected": 242.96975708007812,
"logps/chosen": -387.6805725097656,
"logps/rejected": -463.39471435546875,
"loss": 0.2168,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 0.2538264989852905,
"rewards/margins": 2.580048084259033,
"rewards/rejected": -2.326221227645874,
"step": 80
},
{
"epoch": 1.7061611374407581,
"grad_norm": 34.610883975273474,
"learning_rate": 2.7440387297912122e-08,
"logits/chosen": 246.24142456054688,
"logits/rejected": 237.8177947998047,
"logps/chosen": -372.5790100097656,
"logps/rejected": -471.82489013671875,
"loss": 0.2116,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 0.15084370970726013,
"rewards/margins": 2.670081615447998,
"rewards/rejected": -2.519237995147705,
"step": 90
},
{
"epoch": 1.8957345971563981,
"grad_norm": 26.75346235433071,
"learning_rate": 2.27878296044029e-09,
"logits/chosen": 250.3442840576172,
"logits/rejected": 250.7106475830078,
"logps/chosen": -373.6303405761719,
"logps/rejected": -428.4443359375,
"loss": 0.2167,
"rewards/accuracies": 0.90625,
"rewards/chosen": -0.0009252533200196922,
"rewards/margins": 2.5818238258361816,
"rewards/rejected": -2.5827488899230957,
"step": 100
},
{
"epoch": 1.971563981042654,
"eval_logits/chosen": 314.4089660644531,
"eval_logits/rejected": 314.86077880859375,
"eval_logps/chosen": -382.4696350097656,
"eval_logps/rejected": -446.88519287109375,
"eval_loss": 0.5040754079818726,
"eval_rewards/accuracies": 0.7659574747085571,
"eval_rewards/chosen": -0.7250128984451294,
"eval_rewards/margins": 1.0525743961334229,
"eval_rewards/rejected": -1.7775872945785522,
"eval_runtime": 86.82,
"eval_samples_per_second": 8.639,
"eval_steps_per_second": 0.541,
"step": 104
},
{
"epoch": 1.971563981042654,
"step": 104,
"total_flos": 0.0,
"train_loss": 0.4161795240182143,
"train_runtime": 4732.7297,
"train_samples_per_second": 2.852,
"train_steps_per_second": 0.022
}
],
"logging_steps": 10,
"max_steps": 104,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}