{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.963855421686747, "eval_steps": 10, "global_step": 186, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1606425702811245, "grad_norm": 8.837871551513672, "learning_rate": 0.000189247311827957, "loss": 8.4808, "step": 10 }, { "epoch": 0.1606425702811245, "eval_loss": 0.47449949383735657, "eval_runtime": 44.2521, "eval_samples_per_second": 11.276, "eval_steps_per_second": 2.825, "step": 10 }, { "epoch": 0.321285140562249, "grad_norm": 3.9745054244995117, "learning_rate": 0.00017849462365591398, "loss": 3.0872, "step": 20 }, { "epoch": 0.321285140562249, "eval_loss": 0.3382565677165985, "eval_runtime": 48.3781, "eval_samples_per_second": 10.315, "eval_steps_per_second": 2.584, "step": 20 }, { "epoch": 0.4819277108433735, "grad_norm": 3.2844247817993164, "learning_rate": 0.00016774193548387098, "loss": 2.7654, "step": 30 }, { "epoch": 0.4819277108433735, "eval_loss": 0.3172107934951782, "eval_runtime": 50.1957, "eval_samples_per_second": 9.941, "eval_steps_per_second": 2.49, "step": 30 }, { "epoch": 0.642570281124498, "grad_norm": 2.986614465713501, "learning_rate": 0.00015698924731182796, "loss": 2.4167, "step": 40 }, { "epoch": 0.642570281124498, "eval_loss": 0.3065057694911957, "eval_runtime": 49.9091, "eval_samples_per_second": 9.998, "eval_steps_per_second": 2.505, "step": 40 }, { "epoch": 0.8032128514056225, "grad_norm": 2.47356915473938, "learning_rate": 0.00014623655913978496, "loss": 2.5307, "step": 50 }, { "epoch": 0.8032128514056225, "eval_loss": 0.296438992023468, "eval_runtime": 45.7506, "eval_samples_per_second": 10.907, "eval_steps_per_second": 2.732, "step": 50 }, { "epoch": 0.963855421686747, "grad_norm": 2.9974865913391113, "learning_rate": 0.00013548387096774193, "loss": 2.48, "step": 60 }, { "epoch": 0.963855421686747, "eval_loss": 0.2858414649963379, "eval_runtime": 47.1672, "eval_samples_per_second": 10.579, "eval_steps_per_second": 2.65, "step": 60 }, { "epoch": 1.1124497991967872, "grad_norm": 2.8883159160614014, "learning_rate": 0.00012473118279569893, "loss": 1.9267, "step": 70 }, { "epoch": 1.1124497991967872, "eval_loss": 0.28903448581695557, "eval_runtime": 45.0342, "eval_samples_per_second": 11.08, "eval_steps_per_second": 2.776, "step": 70 }, { "epoch": 1.2730923694779117, "grad_norm": 2.6527364253997803, "learning_rate": 0.00011397849462365593, "loss": 1.9188, "step": 80 }, { "epoch": 1.2730923694779117, "eval_loss": 0.284964382648468, "eval_runtime": 44.0349, "eval_samples_per_second": 11.332, "eval_steps_per_second": 2.839, "step": 80 }, { "epoch": 1.4337349397590362, "grad_norm": 3.009997606277466, "learning_rate": 0.0001032258064516129, "loss": 1.9806, "step": 90 }, { "epoch": 1.4337349397590362, "eval_loss": 0.2806684672832489, "eval_runtime": 46.0655, "eval_samples_per_second": 10.832, "eval_steps_per_second": 2.714, "step": 90 }, { "epoch": 1.5943775100401605, "grad_norm": 2.434161424636841, "learning_rate": 9.247311827956989e-05, "loss": 1.8809, "step": 100 }, { "epoch": 1.5943775100401605, "eval_loss": 0.27644357085227966, "eval_runtime": 44.1705, "eval_samples_per_second": 11.297, "eval_steps_per_second": 2.83, "step": 100 }, { "epoch": 1.7550200803212852, "grad_norm": 3.0396993160247803, "learning_rate": 8.172043010752689e-05, "loss": 1.8046, "step": 110 }, { "epoch": 1.7550200803212852, "eval_loss": 0.27500617504119873, "eval_runtime": 44.3244, "eval_samples_per_second": 11.258, "eval_steps_per_second": 2.82, "step": 110 }, { "epoch": 1.9156626506024095, "grad_norm": 2.961737632751465, "learning_rate": 7.096774193548388e-05, "loss": 1.929, "step": 120 }, { "epoch": 1.9156626506024095, "eval_loss": 0.2731240689754486, "eval_runtime": 43.8996, "eval_samples_per_second": 11.367, "eval_steps_per_second": 2.847, "step": 120 }, { "epoch": 2.0642570281124497, "grad_norm": 2.265627145767212, "learning_rate": 6.021505376344086e-05, "loss": 1.5674, "step": 130 }, { "epoch": 2.0642570281124497, "eval_loss": 0.2714921832084656, "eval_runtime": 45.3105, "eval_samples_per_second": 11.013, "eval_steps_per_second": 2.759, "step": 130 }, { "epoch": 2.2248995983935744, "grad_norm": 2.743725061416626, "learning_rate": 4.9462365591397855e-05, "loss": 1.5314, "step": 140 }, { "epoch": 2.2248995983935744, "eval_loss": 0.28298166394233704, "eval_runtime": 47.8967, "eval_samples_per_second": 10.418, "eval_steps_per_second": 2.61, "step": 140 }, { "epoch": 2.3855421686746987, "grad_norm": 3.021275043487549, "learning_rate": 3.870967741935484e-05, "loss": 1.5405, "step": 150 }, { "epoch": 2.3855421686746987, "eval_loss": 0.2852790653705597, "eval_runtime": 47.6731, "eval_samples_per_second": 10.467, "eval_steps_per_second": 2.622, "step": 150 }, { "epoch": 2.5461847389558234, "grad_norm": 2.859989881515503, "learning_rate": 2.7956989247311828e-05, "loss": 1.4902, "step": 160 }, { "epoch": 2.5461847389558234, "eval_loss": 0.28039097785949707, "eval_runtime": 49.8843, "eval_samples_per_second": 10.003, "eval_steps_per_second": 2.506, "step": 160 }, { "epoch": 2.7068273092369477, "grad_norm": 2.8204994201660156, "learning_rate": 1.7204301075268818e-05, "loss": 1.4331, "step": 170 }, { "epoch": 2.7068273092369477, "eval_loss": 0.27951765060424805, "eval_runtime": 49.9628, "eval_samples_per_second": 9.987, "eval_steps_per_second": 2.502, "step": 170 }, { "epoch": 2.8674698795180724, "grad_norm": 2.8627824783325195, "learning_rate": 6.451612903225806e-06, "loss": 1.4442, "step": 180 }, { "epoch": 2.8674698795180724, "eval_loss": 0.280029833316803, "eval_runtime": 49.9016, "eval_samples_per_second": 10.0, "eval_steps_per_second": 2.505, "step": 180 } ], "logging_steps": 10, "max_steps": 186, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.632546051948544e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }