| { | |
| "best_metric": 0.20319828391075134, | |
| "best_model_checkpoint": "./results/checkpoint-3500", | |
| "epoch": 2.708978328173375, | |
| "eval_steps": 500, | |
| "global_step": 3500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019999868960045492, | |
| "loss": 0.7358, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00019845929936213215, | |
| "loss": 0.7562, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 0.481257826089859, | |
| "eval_runtime": 49.5763, | |
| "eval_samples_per_second": 12.183, | |
| "eval_steps_per_second": 3.046, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00019405971991583108, | |
| "loss": 0.7465, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.000186927756656608, | |
| "loss": 0.71, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00017727058924629164, | |
| "loss": 0.71, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 0.40068519115448, | |
| "eval_runtime": 49.5272, | |
| "eval_samples_per_second": 12.195, | |
| "eval_steps_per_second": 3.049, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00016536875315675275, | |
| "loss": 0.6628, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00015156799026670633, | |
| "loss": 0.6006, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_loss": 0.3466373383998871, | |
| "eval_runtime": 49.4923, | |
| "eval_samples_per_second": 12.204, | |
| "eval_steps_per_second": 3.051, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00013626920524778533, | |
| "loss": 0.5302, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00011991681950141926, | |
| "loss": 0.5161, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00010298586095833151, | |
| "loss": 0.4935, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_loss": 0.28296753764152527, | |
| "eval_runtime": 49.4347, | |
| "eval_samples_per_second": 12.218, | |
| "eval_steps_per_second": 3.055, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 8.596816477497136e-05, | |
| "loss": 0.5046, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 6.93580857891615e-05, | |
| "loss": 0.5042, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_loss": 0.2403416633605957, | |
| "eval_runtime": 49.3424, | |
| "eval_samples_per_second": 12.241, | |
| "eval_steps_per_second": 3.06, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 5.3638137780368736e-05, | |
| "loss": 0.4561, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.9264976706293624e-05, | |
| "loss": 0.3455, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 2.6656135095147604e-05, | |
| "loss": 0.356, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_loss": 0.2102939337491989, | |
| "eval_runtime": 49.4972, | |
| "eval_samples_per_second": 12.203, | |
| "eval_steps_per_second": 3.051, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 1.6177892952323237e-05, | |
| "loss": 0.3659, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 8.134637525034839e-06, | |
| "loss": 0.3393, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_loss": 0.20319828391075134, | |
| "eval_runtime": 49.6088, | |
| "eval_samples_per_second": 12.175, | |
| "eval_steps_per_second": 3.044, | |
| "step": 3500 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 3876, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 7247048796733440.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |