|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.971563981042654, |
|
"eval_steps": 500, |
|
"global_step": 104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018957345971563982, |
|
"grad_norm": 71.69449642421675, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": 228.492431640625, |
|
"logits/rejected": 249.21771240234375, |
|
"logps/chosen": -447.14471435546875, |
|
"logps/rejected": -436.09393310546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 78.4118299616628, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": 257.2274475097656, |
|
"logits/rejected": 249.07215881347656, |
|
"logps/chosen": -381.537353515625, |
|
"logps/rejected": -444.5811767578125, |
|
"loss": 0.71, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.024299899116158485, |
|
"rewards/margins": 0.044586196541786194, |
|
"rewards/rejected": -0.020286299288272858, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 99.84300510877705, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": 248.119384765625, |
|
"logits/rejected": 246.07846069335938, |
|
"logps/chosen": -386.1551818847656, |
|
"logps/rejected": -414.88385009765625, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11353431642055511, |
|
"rewards/margins": 0.10505084693431854, |
|
"rewards/rejected": -0.21858516335487366, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 56.97411905200688, |
|
"learning_rate": 4.5025027361734613e-07, |
|
"logits/chosen": 247.26358032226562, |
|
"logits/rejected": 244.18240356445312, |
|
"logps/chosen": -341.32952880859375, |
|
"logps/rejected": -402.8668518066406, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.05143852159380913, |
|
"rewards/margins": 0.7519260048866272, |
|
"rewards/rejected": -0.8033644556999207, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 57.580210507052286, |
|
"learning_rate": 3.893311157806091e-07, |
|
"logits/chosen": 247.73263549804688, |
|
"logits/rejected": 259.1477966308594, |
|
"logps/chosen": -369.2883605957031, |
|
"logps/rejected": -386.1142883300781, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.09320323169231415, |
|
"rewards/margins": 0.881219744682312, |
|
"rewards/rejected": -0.7880164384841919, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 77.32585608048733, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": 246.6822052001953, |
|
"logits/rejected": 251.89175415039062, |
|
"logps/chosen": -397.3800354003906, |
|
"logps/rejected": -417.54400634765625, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.00946036446839571, |
|
"rewards/margins": 0.9901891946792603, |
|
"rewards/rejected": -0.980728805065155, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.985781990521327, |
|
"eval_logits/chosen": 314.65277099609375, |
|
"eval_logits/rejected": 314.80328369140625, |
|
"eval_logps/chosen": -371.4647521972656, |
|
"eval_logps/rejected": -432.01666259765625, |
|
"eval_loss": 0.5193939208984375, |
|
"eval_rewards/accuracies": 0.7446808218955994, |
|
"eval_rewards/chosen": -0.17477014660835266, |
|
"eval_rewards/margins": 0.8593912124633789, |
|
"eval_rewards/rejected": -1.0341612100601196, |
|
"eval_runtime": 91.3038, |
|
"eval_samples_per_second": 8.214, |
|
"eval_steps_per_second": 0.515, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 32.08842708767771, |
|
"learning_rate": 2.2891223348923882e-07, |
|
"logits/chosen": 243.8182373046875, |
|
"logits/rejected": 242.9388885498047, |
|
"logps/chosen": -341.7854919433594, |
|
"logps/rejected": -424.074951171875, |
|
"loss": 0.3496, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.382510781288147, |
|
"rewards/margins": 1.8218187093734741, |
|
"rewards/rejected": -1.4393078088760376, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 32.178505554608634, |
|
"learning_rate": 1.4754491880085317e-07, |
|
"logits/chosen": 250.7259063720703, |
|
"logits/rejected": 248.94216918945312, |
|
"logps/chosen": -350.2694091796875, |
|
"logps/rejected": -398.77850341796875, |
|
"loss": 0.2373, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.50797039270401, |
|
"rewards/margins": 2.4789700508117676, |
|
"rewards/rejected": -1.9709994792938232, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 24.814991686693443, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": 254.3613739013672, |
|
"logits/rejected": 242.96975708007812, |
|
"logps/chosen": -387.6805725097656, |
|
"logps/rejected": -463.39471435546875, |
|
"loss": 0.2168, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.2538264989852905, |
|
"rewards/margins": 2.580048084259033, |
|
"rewards/rejected": -2.326221227645874, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 34.610883975273474, |
|
"learning_rate": 2.7440387297912122e-08, |
|
"logits/chosen": 246.24142456054688, |
|
"logits/rejected": 237.8177947998047, |
|
"logps/chosen": -372.5790100097656, |
|
"logps/rejected": -471.82489013671875, |
|
"loss": 0.2116, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.15084370970726013, |
|
"rewards/margins": 2.670081615447998, |
|
"rewards/rejected": -2.519237995147705, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 26.75346235433071, |
|
"learning_rate": 2.27878296044029e-09, |
|
"logits/chosen": 250.3442840576172, |
|
"logits/rejected": 250.7106475830078, |
|
"logps/chosen": -373.6303405761719, |
|
"logps/rejected": -428.4443359375, |
|
"loss": 0.2167, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.0009252533200196922, |
|
"rewards/margins": 2.5818238258361816, |
|
"rewards/rejected": -2.5827488899230957, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.971563981042654, |
|
"eval_logits/chosen": 314.4089660644531, |
|
"eval_logits/rejected": 314.86077880859375, |
|
"eval_logps/chosen": -382.4696350097656, |
|
"eval_logps/rejected": -446.88519287109375, |
|
"eval_loss": 0.5040754079818726, |
|
"eval_rewards/accuracies": 0.7659574747085571, |
|
"eval_rewards/chosen": -0.7250128984451294, |
|
"eval_rewards/margins": 1.0525743961334229, |
|
"eval_rewards/rejected": -1.7775872945785522, |
|
"eval_runtime": 86.82, |
|
"eval_samples_per_second": 8.639, |
|
"eval_steps_per_second": 0.541, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.971563981042654, |
|
"step": 104, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4161795240182143, |
|
"train_runtime": 4732.7297, |
|
"train_samples_per_second": 2.852, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 104, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|