{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5131494547787043, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0025657472738935213, "eval_loss": 1.6426724195480347, "eval_runtime": 8.0379, "eval_samples_per_second": 20.528, "eval_steps_per_second": 10.326, "step": 1 }, { "epoch": 0.025657472738935216, "grad_norm": 6.0922040939331055, "learning_rate": 0.0002, "loss": 5.4096, "step": 10 }, { "epoch": 0.05131494547787043, "grad_norm": 4.473964214324951, "learning_rate": 0.0002, "loss": 4.5736, "step": 20 }, { "epoch": 0.07697241821680564, "grad_norm": 5.79656457901001, "learning_rate": 0.0002, "loss": 4.236, "step": 30 }, { "epoch": 0.10262989095574086, "grad_norm": 5.566458702087402, "learning_rate": 0.0002, "loss": 4.4715, "step": 40 }, { "epoch": 0.12828736369467608, "grad_norm": 3.1581058502197266, "learning_rate": 0.0002, "loss": 4.2905, "step": 50 }, { "epoch": 0.12828736369467608, "eval_loss": 0.9886534810066223, "eval_runtime": 7.9175, "eval_samples_per_second": 20.84, "eval_steps_per_second": 10.483, "step": 50 }, { "epoch": 0.1539448364336113, "grad_norm": 5.599130630493164, "learning_rate": 0.0002, "loss": 4.1246, "step": 60 }, { "epoch": 0.1796023091725465, "grad_norm": 6.4759297370910645, "learning_rate": 0.0002, "loss": 4.1768, "step": 70 }, { "epoch": 0.20525978191148173, "grad_norm": 3.1259078979492188, "learning_rate": 0.0002, "loss": 3.9254, "step": 80 }, { "epoch": 0.23091725465041693, "grad_norm": 3.3770580291748047, "learning_rate": 0.0002, "loss": 4.1994, "step": 90 }, { "epoch": 0.25657472738935216, "grad_norm": 3.869525194168091, "learning_rate": 0.0002, "loss": 4.144, "step": 100 }, { "epoch": 0.25657472738935216, "eval_loss": 0.9652090072631836, "eval_runtime": 7.9298, "eval_samples_per_second": 20.807, "eval_steps_per_second": 10.467, "step": 100 }, { "epoch": 0.28223220012828737, "grad_norm": 5.021260738372803, "learning_rate": 0.0002, "loss": 4.0535, "step": 110 }, { "epoch": 0.3078896728672226, "grad_norm": 3.946540355682373, "learning_rate": 0.0002, "loss": 4.2911, "step": 120 }, { "epoch": 0.3335471456061578, "grad_norm": 5.100417137145996, "learning_rate": 0.0002, "loss": 4.1604, "step": 130 }, { "epoch": 0.359204618345093, "grad_norm": 3.0885791778564453, "learning_rate": 0.0002, "loss": 3.846, "step": 140 }, { "epoch": 0.38486209108402825, "grad_norm": 3.596226453781128, "learning_rate": 0.0002, "loss": 4.1906, "step": 150 }, { "epoch": 0.38486209108402825, "eval_loss": 0.9527939558029175, "eval_runtime": 7.9098, "eval_samples_per_second": 20.86, "eval_steps_per_second": 10.493, "step": 150 }, { "epoch": 0.41051956382296345, "grad_norm": 3.3079276084899902, "learning_rate": 0.0002, "loss": 3.9429, "step": 160 }, { "epoch": 0.43617703656189866, "grad_norm": 2.9778153896331787, "learning_rate": 0.0002, "loss": 4.0369, "step": 170 }, { "epoch": 0.46183450930083386, "grad_norm": 3.6058528423309326, "learning_rate": 0.0002, "loss": 4.5255, "step": 180 }, { "epoch": 0.48749198203976907, "grad_norm": 5.9715447425842285, "learning_rate": 0.0002, "loss": 3.9174, "step": 190 }, { "epoch": 0.5131494547787043, "grad_norm": 4.206804275512695, "learning_rate": 0.0002, "loss": 3.9483, "step": 200 }, { "epoch": 0.5131494547787043, "eval_loss": 0.9494011998176575, "eval_runtime": 7.9027, "eval_samples_per_second": 20.879, "eval_steps_per_second": 10.503, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.110150213926912e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }