{ "best_metric": 5.5685906410217285, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.13689253935660506, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0027378507871321013, "grad_norm": 2869.560302734375, "learning_rate": 1e-05, "loss": 18.9407, "step": 1 }, { "epoch": 0.0027378507871321013, "eval_loss": 5.184251308441162, "eval_runtime": 2.1423, "eval_samples_per_second": 287.075, "eval_steps_per_second": 71.886, "step": 1 }, { "epoch": 0.0054757015742642025, "grad_norm": 3299.45068359375, "learning_rate": 2e-05, "loss": 17.9501, "step": 2 }, { "epoch": 0.008213552361396304, "grad_norm": 2877.494140625, "learning_rate": 3e-05, "loss": 18.3079, "step": 3 }, { "epoch": 0.010951403148528405, "grad_norm": 3389.11767578125, "learning_rate": 4e-05, "loss": 19.2225, "step": 4 }, { "epoch": 0.013689253935660506, "grad_norm": 4282.4208984375, "learning_rate": 5e-05, "loss": 19.2442, "step": 5 }, { "epoch": 0.01642710472279261, "grad_norm": 3912.53466796875, "learning_rate": 6e-05, "loss": 18.8629, "step": 6 }, { "epoch": 0.019164955509924708, "grad_norm": 4039.3466796875, "learning_rate": 7e-05, "loss": 18.9577, "step": 7 }, { "epoch": 0.02190280629705681, "grad_norm": 5384.4765625, "learning_rate": 8e-05, "loss": 18.5148, "step": 8 }, { "epoch": 0.024640657084188913, "grad_norm": 3950.795654296875, "learning_rate": 9e-05, "loss": 18.7446, "step": 9 }, { "epoch": 0.02737850787132101, "grad_norm": 4083.84619140625, "learning_rate": 0.0001, "loss": 17.6605, "step": 10 }, { "epoch": 0.030116358658453114, "grad_norm": 3851.057861328125, "learning_rate": 9.999316524962345e-05, "loss": 19.1159, "step": 11 }, { "epoch": 0.03285420944558522, "grad_norm": 4973.96875, "learning_rate": 9.997266286704631e-05, "loss": 18.5552, "step": 12 }, { "epoch": 0.03559206023271732, "grad_norm": 3997.625732421875, "learning_rate": 9.993849845741524e-05, "loss": 18.5181, "step": 13 }, { "epoch": 0.038329911019849415, "grad_norm": 4821.0908203125, "learning_rate": 9.989068136093873e-05, "loss": 18.9077, "step": 14 }, { "epoch": 0.04106776180698152, "grad_norm": 7009.69091796875, "learning_rate": 9.98292246503335e-05, "loss": 18.1293, "step": 15 }, { "epoch": 0.04380561259411362, "grad_norm": 5173.1787109375, "learning_rate": 9.975414512725057e-05, "loss": 18.3298, "step": 16 }, { "epoch": 0.04654346338124572, "grad_norm": 5275.22705078125, "learning_rate": 9.966546331768191e-05, "loss": 17.488, "step": 17 }, { "epoch": 0.049281314168377825, "grad_norm": 6161.54345703125, "learning_rate": 9.956320346634876e-05, "loss": 18.125, "step": 18 }, { "epoch": 0.05201916495550993, "grad_norm": 5380.9541015625, "learning_rate": 9.944739353007344e-05, "loss": 19.6235, "step": 19 }, { "epoch": 0.05475701574264202, "grad_norm": 7076.9326171875, "learning_rate": 9.931806517013612e-05, "loss": 19.6551, "step": 20 }, { "epoch": 0.057494866529774126, "grad_norm": 9411.6328125, "learning_rate": 9.917525374361912e-05, "loss": 19.1774, "step": 21 }, { "epoch": 0.06023271731690623, "grad_norm": 5219.16845703125, "learning_rate": 9.901899829374047e-05, "loss": 17.0145, "step": 22 }, { "epoch": 0.06297056810403832, "grad_norm": 5389.611328125, "learning_rate": 9.884934153917997e-05, "loss": 18.715, "step": 23 }, { "epoch": 0.06570841889117043, "grad_norm": 4909.1533203125, "learning_rate": 9.86663298624003e-05, "loss": 18.0538, "step": 24 }, { "epoch": 0.06844626967830253, "grad_norm": 4762.9892578125, "learning_rate": 9.847001329696653e-05, "loss": 18.122, "step": 25 }, { "epoch": 0.07118412046543464, "grad_norm": 5046.6279296875, "learning_rate": 9.826044551386744e-05, "loss": 16.9833, "step": 26 }, { "epoch": 0.07392197125256673, "grad_norm": 5887.14208984375, "learning_rate": 9.803768380684242e-05, "loss": 18.4829, "step": 27 }, { "epoch": 0.07665982203969883, "grad_norm": 7543.8603515625, "learning_rate": 9.780178907671789e-05, "loss": 19.2867, "step": 28 }, { "epoch": 0.07939767282683094, "grad_norm": 4809.97705078125, "learning_rate": 9.755282581475769e-05, "loss": 18.0179, "step": 29 }, { "epoch": 0.08213552361396304, "grad_norm": 4606.07666015625, "learning_rate": 9.729086208503174e-05, "loss": 18.5569, "step": 30 }, { "epoch": 0.08487337440109514, "grad_norm": 4569.7158203125, "learning_rate": 9.701596950580806e-05, "loss": 18.9656, "step": 31 }, { "epoch": 0.08761122518822724, "grad_norm": 4704.12060546875, "learning_rate": 9.672822322997305e-05, "loss": 18.4432, "step": 32 }, { "epoch": 0.09034907597535935, "grad_norm": 5654.9912109375, "learning_rate": 9.642770192448536e-05, "loss": 18.5611, "step": 33 }, { "epoch": 0.09308692676249145, "grad_norm": 6667.42626953125, "learning_rate": 9.611448774886924e-05, "loss": 19.0092, "step": 34 }, { "epoch": 0.09582477754962354, "grad_norm": 9212.4580078125, "learning_rate": 9.578866633275288e-05, "loss": 19.2947, "step": 35 }, { "epoch": 0.09856262833675565, "grad_norm": 5465.796875, "learning_rate": 9.545032675245813e-05, "loss": 18.93, "step": 36 }, { "epoch": 0.10130047912388775, "grad_norm": 7496.95654296875, "learning_rate": 9.509956150664796e-05, "loss": 21.2021, "step": 37 }, { "epoch": 0.10403832991101986, "grad_norm": 8689.849609375, "learning_rate": 9.473646649103818e-05, "loss": 20.4252, "step": 38 }, { "epoch": 0.10677618069815195, "grad_norm": 7595.74365234375, "learning_rate": 9.43611409721806e-05, "loss": 21.0276, "step": 39 }, { "epoch": 0.10951403148528405, "grad_norm": 7750.20166015625, "learning_rate": 9.397368756032445e-05, "loss": 21.0263, "step": 40 }, { "epoch": 0.11225188227241616, "grad_norm": 9827.46875, "learning_rate": 9.357421218136386e-05, "loss": 21.9128, "step": 41 }, { "epoch": 0.11498973305954825, "grad_norm": 6157.37744140625, "learning_rate": 9.316282404787871e-05, "loss": 20.8637, "step": 42 }, { "epoch": 0.11772758384668036, "grad_norm": 7301.2265625, "learning_rate": 9.273963562927695e-05, "loss": 21.7574, "step": 43 }, { "epoch": 0.12046543463381246, "grad_norm": 7452.45654296875, "learning_rate": 9.230476262104677e-05, "loss": 21.4877, "step": 44 }, { "epoch": 0.12320328542094455, "grad_norm": 6658.0244140625, "learning_rate": 9.185832391312644e-05, "loss": 21.5988, "step": 45 }, { "epoch": 0.12594113620807665, "grad_norm": 9002.0439453125, "learning_rate": 9.140044155740101e-05, "loss": 21.8134, "step": 46 }, { "epoch": 0.12867898699520877, "grad_norm": 8484.044921875, "learning_rate": 9.093124073433463e-05, "loss": 22.6028, "step": 47 }, { "epoch": 0.13141683778234087, "grad_norm": 5393.41650390625, "learning_rate": 9.045084971874738e-05, "loss": 21.4441, "step": 48 }, { "epoch": 0.13415468856947296, "grad_norm": 7081.9052734375, "learning_rate": 8.995939984474624e-05, "loss": 21.2952, "step": 49 }, { "epoch": 0.13689253935660506, "grad_norm": 8458.6767578125, "learning_rate": 8.945702546981969e-05, "loss": 21.0256, "step": 50 }, { "epoch": 0.13689253935660506, "eval_loss": 5.5685906410217285, "eval_runtime": 2.1364, "eval_samples_per_second": 287.871, "eval_steps_per_second": 72.085, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 84793224069120.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }