{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2925402242808386, "eval_steps": 500, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005850804485616773, "grad_norm": 4.189145565032959, "learning_rate": 1.9607843137254904e-07, "loss": 0.6022, "step": 1 }, { "epoch": 0.011701608971233545, "grad_norm": 4.088385105133057, "learning_rate": 3.921568627450981e-07, "loss": 0.6105, "step": 2 }, { "epoch": 0.017552413456850317, "grad_norm": 4.105137348175049, "learning_rate": 5.882352941176471e-07, "loss": 0.6234, "step": 3 }, { "epoch": 0.02340321794246709, "grad_norm": 4.010756015777588, "learning_rate": 7.843137254901962e-07, "loss": 0.5629, "step": 4 }, { "epoch": 0.02925402242808386, "grad_norm": 4.201730728149414, "learning_rate": 9.80392156862745e-07, "loss": 0.6236, "step": 5 }, { "epoch": 0.035104826913700635, "grad_norm": 4.13097620010376, "learning_rate": 1.1764705882352942e-06, "loss": 0.6058, "step": 6 }, { "epoch": 0.040955631399317405, "grad_norm": 3.753781318664551, "learning_rate": 1.3725490196078434e-06, "loss": 0.5798, "step": 7 }, { "epoch": 0.04680643588493418, "grad_norm": 3.1203114986419678, "learning_rate": 1.5686274509803923e-06, "loss": 0.5575, "step": 8 }, { "epoch": 0.05265724037055095, "grad_norm": 3.1326870918273926, "learning_rate": 1.7647058823529414e-06, "loss": 0.5794, "step": 9 }, { "epoch": 0.05850804485616772, "grad_norm": 3.01350736618042, "learning_rate": 1.96078431372549e-06, "loss": 0.5721, "step": 10 }, { "epoch": 0.0643588493417845, "grad_norm": 2.0586817264556885, "learning_rate": 2.1568627450980393e-06, "loss": 0.5389, "step": 11 }, { "epoch": 0.07020965382740127, "grad_norm": 2.056138753890991, "learning_rate": 2.3529411764705885e-06, "loss": 0.5578, "step": 12 }, { "epoch": 0.07606045831301804, "grad_norm": 1.8458319902420044, "learning_rate": 2.549019607843137e-06, "loss": 0.5432, "step": 13 }, { "epoch": 0.08191126279863481, "grad_norm": 1.3385547399520874, "learning_rate": 2.7450980392156867e-06, "loss": 0.5375, "step": 14 }, { "epoch": 0.08776206728425158, "grad_norm": 2.10184383392334, "learning_rate": 2.9411764705882355e-06, "loss": 0.4834, "step": 15 }, { "epoch": 0.09361287176986836, "grad_norm": 2.354717254638672, "learning_rate": 3.1372549019607846e-06, "loss": 0.5087, "step": 16 }, { "epoch": 0.09946367625548513, "grad_norm": 2.4186935424804688, "learning_rate": 3.3333333333333333e-06, "loss": 0.5408, "step": 17 }, { "epoch": 0.1053144807411019, "grad_norm": 2.02093243598938, "learning_rate": 3.529411764705883e-06, "loss": 0.4967, "step": 18 }, { "epoch": 0.11116528522671867, "grad_norm": 1.9769740104675293, "learning_rate": 3.7254901960784316e-06, "loss": 0.5429, "step": 19 }, { "epoch": 0.11701608971233544, "grad_norm": 1.4087600708007812, "learning_rate": 3.92156862745098e-06, "loss": 0.4855, "step": 20 }, { "epoch": 0.12286689419795221, "grad_norm": 1.4071195125579834, "learning_rate": 4.11764705882353e-06, "loss": 0.4956, "step": 21 }, { "epoch": 0.128717698683569, "grad_norm": 1.4400174617767334, "learning_rate": 4.313725490196079e-06, "loss": 0.4966, "step": 22 }, { "epoch": 0.13456850316918575, "grad_norm": 1.2176562547683716, "learning_rate": 4.509803921568628e-06, "loss": 0.4892, "step": 23 }, { "epoch": 0.14041930765480254, "grad_norm": 1.0557763576507568, "learning_rate": 4.705882352941177e-06, "loss": 0.4664, "step": 24 }, { "epoch": 0.1462701121404193, "grad_norm": 1.0654219388961792, "learning_rate": 4.901960784313726e-06, "loss": 0.4427, "step": 25 }, { "epoch": 0.15212091662603608, "grad_norm": 0.8639155626296997, "learning_rate": 5.098039215686274e-06, "loss": 0.4676, "step": 26 }, { "epoch": 0.15797172111165286, "grad_norm": 0.8091264963150024, "learning_rate": 5.294117647058824e-06, "loss": 0.4339, "step": 27 }, { "epoch": 0.16382252559726962, "grad_norm": 0.7697594165802002, "learning_rate": 5.4901960784313735e-06, "loss": 0.4164, "step": 28 }, { "epoch": 0.1696733300828864, "grad_norm": 0.8522382378578186, "learning_rate": 5.686274509803922e-06, "loss": 0.4512, "step": 29 }, { "epoch": 0.17552413456850316, "grad_norm": 0.7640376687049866, "learning_rate": 5.882352941176471e-06, "loss": 0.432, "step": 30 }, { "epoch": 0.18137493905411994, "grad_norm": 0.6247867941856384, "learning_rate": 6.07843137254902e-06, "loss": 0.408, "step": 31 }, { "epoch": 0.18722574353973673, "grad_norm": 0.6288900971412659, "learning_rate": 6.274509803921569e-06, "loss": 0.4611, "step": 32 }, { "epoch": 0.19307654802535348, "grad_norm": 0.6182562708854675, "learning_rate": 6.470588235294119e-06, "loss": 0.4257, "step": 33 }, { "epoch": 0.19892735251097027, "grad_norm": 0.6193389892578125, "learning_rate": 6.666666666666667e-06, "loss": 0.4063, "step": 34 }, { "epoch": 0.20477815699658702, "grad_norm": 0.6892727017402649, "learning_rate": 6.862745098039216e-06, "loss": 0.3967, "step": 35 }, { "epoch": 0.2106289614822038, "grad_norm": 0.6725057363510132, "learning_rate": 7.058823529411766e-06, "loss": 0.4428, "step": 36 }, { "epoch": 0.21647976596782056, "grad_norm": 0.5203535556793213, "learning_rate": 7.2549019607843145e-06, "loss": 0.4151, "step": 37 }, { "epoch": 0.22233057045343735, "grad_norm": 0.45232418179512024, "learning_rate": 7.450980392156863e-06, "loss": 0.3666, "step": 38 }, { "epoch": 0.22818137493905413, "grad_norm": 0.5872768759727478, "learning_rate": 7.647058823529411e-06, "loss": 0.4144, "step": 39 }, { "epoch": 0.2340321794246709, "grad_norm": 0.526172399520874, "learning_rate": 7.84313725490196e-06, "loss": 0.4346, "step": 40 }, { "epoch": 0.23988298391028767, "grad_norm": 0.5474228858947754, "learning_rate": 8.03921568627451e-06, "loss": 0.3965, "step": 41 }, { "epoch": 0.24573378839590443, "grad_norm": 0.46727877855300903, "learning_rate": 8.23529411764706e-06, "loss": 0.4417, "step": 42 }, { "epoch": 0.2515845928815212, "grad_norm": 0.40532198548316956, "learning_rate": 8.43137254901961e-06, "loss": 0.3851, "step": 43 }, { "epoch": 0.257435397367138, "grad_norm": 0.4897397458553314, "learning_rate": 8.627450980392157e-06, "loss": 0.4013, "step": 44 }, { "epoch": 0.26328620185275475, "grad_norm": 0.4565890431404114, "learning_rate": 8.823529411764707e-06, "loss": 0.3745, "step": 45 }, { "epoch": 0.2691370063383715, "grad_norm": 0.38417261838912964, "learning_rate": 9.019607843137256e-06, "loss": 0.3783, "step": 46 }, { "epoch": 0.2749878108239883, "grad_norm": 0.40912356972694397, "learning_rate": 9.215686274509804e-06, "loss": 0.3879, "step": 47 }, { "epoch": 0.2808386153096051, "grad_norm": 0.42792415618896484, "learning_rate": 9.411764705882354e-06, "loss": 0.3837, "step": 48 }, { "epoch": 0.28668941979522183, "grad_norm": 0.4394405484199524, "learning_rate": 9.607843137254903e-06, "loss": 0.4004, "step": 49 }, { "epoch": 0.2925402242808386, "grad_norm": 0.4622238576412201, "learning_rate": 9.803921568627451e-06, "loss": 0.409, "step": 50 } ], "logging_steps": 1, "max_steps": 510, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.284609818000097e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }