{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9887640449438202, "eval_steps": 100, "global_step": 66, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0749063670411985, "grad_norm": 1.5548935152564476, "learning_rate": 1.4285714285714287e-05, "loss": 1.4458, "mean_token_accuracy": 0.6562816568352667, "step": 5 }, { "epoch": 0.149812734082397, "grad_norm": 1.8830484605836868, "learning_rate": 1.9872683547213446e-05, "loss": 1.3445, "mean_token_accuracy": 0.6695685059143874, "step": 10 }, { "epoch": 0.2247191011235955, "grad_norm": 0.8654100123580895, "learning_rate": 1.9106347728549134e-05, "loss": 1.226, "mean_token_accuracy": 0.6923269238251538, "step": 15 }, { "epoch": 0.299625468164794, "grad_norm": 0.5943096378692382, "learning_rate": 1.7698339834299064e-05, "loss": 1.1571, "mean_token_accuracy": 0.7064256188642666, "step": 20 }, { "epoch": 0.37453183520599254, "grad_norm": 0.4676209085255989, "learning_rate": 1.5747874102144073e-05, "loss": 1.1288, "mean_token_accuracy": 0.711906644971702, "step": 25 }, { "epoch": 0.449438202247191, "grad_norm": 0.4382156327540379, "learning_rate": 1.3392388661180303e-05, "loss": 1.0824, "mean_token_accuracy": 0.720971662772769, "step": 30 }, { "epoch": 0.5243445692883895, "grad_norm": 0.3820951165479697, "learning_rate": 1.0797861055530832e-05, "loss": 1.0568, "mean_token_accuracy": 0.7265031930562844, "step": 35 }, { "epoch": 0.599250936329588, "grad_norm": 0.3610180887247636, "learning_rate": 8.147112759128859e-06, "loss": 1.0555, "mean_token_accuracy": 0.726724873033217, "step": 40 }, { "epoch": 0.6741573033707865, "grad_norm": 0.3461477286333746, "learning_rate": 5.626926795411447e-06, "loss": 1.031, "mean_token_accuracy": 0.7318156111871186, "step": 45 }, { "epoch": 0.7490636704119851, "grad_norm": 0.33255510793828263, "learning_rate": 3.414886209349615e-06, "loss": 1.0255, "mean_token_accuracy": 0.7329844427728794, "step": 50 }, { "epoch": 0.8239700374531835, "grad_norm": 0.31534489370179297, "learning_rate": 1.6668608091748495e-06, "loss": 1.0237, "mean_token_accuracy": 0.7332181534901535, "step": 55 }, { "epoch": 0.898876404494382, "grad_norm": 0.3119780254657091, "learning_rate": 5.060239153161872e-07, "loss": 1.024, "mean_token_accuracy": 0.7329648884764794, "step": 60 }, { "epoch": 0.9737827715355806, "grad_norm": 0.3095384399368516, "learning_rate": 1.4173043232380557e-08, "loss": 1.0252, "mean_token_accuracy": 0.7327120227338457, "step": 65 }, { "epoch": 0.9887640449438202, "mean_token_accuracy": 0.7354095790887372, "step": 66, "total_flos": 30026049257472.0, "train_loss": 1.1239717241489526, "train_runtime": 525.5519, "train_samples_per_second": 16.251, "train_steps_per_second": 0.126 } ], "logging_steps": 5, "max_steps": 66, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 30026049257472.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }