{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.23494860499265785, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011747430249632892, "grad_norm": 1.5699902772903442, "learning_rate": 0.00027, "loss": 3.0983, "step": 10 }, { "epoch": 0.023494860499265784, "grad_norm": 1.6029695272445679, "learning_rate": 0.00029991523567092526, "loss": 2.062, "step": 20 }, { "epoch": 0.03524229074889868, "grad_norm": 1.593436360359192, "learning_rate": 0.00029962234616583063, "loss": 1.2074, "step": 30 }, { "epoch": 0.04698972099853157, "grad_norm": 0.5851414799690247, "learning_rate": 0.00029912069357315393, "loss": 0.888, "step": 40 }, { "epoch": 0.05873715124816446, "grad_norm": 0.25992292165756226, "learning_rate": 0.0002984109778320875, "loss": 0.7685, "step": 50 }, { "epoch": 0.07048458149779736, "grad_norm": 0.21082307398319244, "learning_rate": 0.00029749418918542057, "loss": 0.7096, "step": 60 }, { "epoch": 0.08223201174743025, "grad_norm": 0.16843102872371674, "learning_rate": 0.0002963716067978866, "loss": 0.6901, "step": 70 }, { "epoch": 0.09397944199706314, "grad_norm": 0.12076722830533981, "learning_rate": 0.000295044796971387, "loss": 0.6702, "step": 80 }, { "epoch": 0.10572687224669604, "grad_norm": 0.21371866762638092, "learning_rate": 0.000293515610959582, "loss": 0.6353, "step": 90 }, { "epoch": 0.11747430249632893, "grad_norm": 0.13458965718746185, "learning_rate": 0.0002917861823848985, "loss": 0.6479, "step": 100 }, { "epoch": 0.12922173274596183, "grad_norm": 0.265765517950058, "learning_rate": 0.0002898589242615568, "loss": 0.6244, "step": 110 }, { "epoch": 0.14096916299559473, "grad_norm": 0.1473032385110855, "learning_rate": 0.0002877365256287728, "loss": 0.6217, "step": 120 }, { "epoch": 0.1527165932452276, "grad_norm": 0.1591167151927948, "learning_rate": 0.00028542194779883047, "loss": 0.6022, "step": 130 }, { "epoch": 0.1644640234948605, "grad_norm": 0.13270772993564606, "learning_rate": 0.00028291842022526133, "loss": 0.6098, "step": 140 }, { "epoch": 0.1762114537444934, "grad_norm": 0.1444919854402542, "learning_rate": 0.0002802294359968954, "loss": 0.5971, "step": 150 }, { "epoch": 0.18795888399412627, "grad_norm": 0.1571902334690094, "learning_rate": 0.0002773587469640702, "loss": 0.5937, "step": 160 }, { "epoch": 0.19970631424375918, "grad_norm": 0.11585285514593124, "learning_rate": 0.0002743103585037989, "loss": 0.6054, "step": 170 }, { "epoch": 0.21145374449339208, "grad_norm": 0.10303252190351486, "learning_rate": 0.0002710885239312008, "loss": 0.5708, "step": 180 }, { "epoch": 0.22320117474302498, "grad_norm": 0.09355439245700836, "learning_rate": 0.00026769773856499167, "loss": 0.5806, "step": 190 }, { "epoch": 0.23494860499265785, "grad_norm": 0.09288550913333893, "learning_rate": 0.0002641427334553158, "loss": 0.5747, "step": 200 } ], "logging_steps": 10, "max_steps": 851, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.2381453081706496e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }