|
{ |
|
"best_metric": 2.5834617614746094, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.008263097008758883, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00016526194017517766, |
|
"grad_norm": 1.594321608543396, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4819, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00016526194017517766, |
|
"eval_loss": 3.2956507205963135, |
|
"eval_runtime": 298.3784, |
|
"eval_samples_per_second": 34.155, |
|
"eval_steps_per_second": 8.539, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003305238803503553, |
|
"grad_norm": 1.8362373113632202, |
|
"learning_rate": 2e-05, |
|
"loss": 2.474, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0004957858205255329, |
|
"grad_norm": 2.0558102130889893, |
|
"learning_rate": 3e-05, |
|
"loss": 2.5933, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0006610477607007106, |
|
"grad_norm": 1.5579073429107666, |
|
"learning_rate": 4e-05, |
|
"loss": 2.42, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0008263097008758883, |
|
"grad_norm": 2.087095260620117, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5554, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0009915716410510659, |
|
"grad_norm": 2.1744778156280518, |
|
"learning_rate": 6e-05, |
|
"loss": 2.7619, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0011568335812262437, |
|
"grad_norm": 2.49554705619812, |
|
"learning_rate": 7e-05, |
|
"loss": 2.7488, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0013220955214014213, |
|
"grad_norm": 2.244565010070801, |
|
"learning_rate": 8e-05, |
|
"loss": 2.6717, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.001487357461576599, |
|
"grad_norm": 1.63128662109375, |
|
"learning_rate": 9e-05, |
|
"loss": 2.5385, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0016526194017517765, |
|
"grad_norm": 1.5725308656692505, |
|
"learning_rate": 0.0001, |
|
"loss": 2.5384, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0018178813419269541, |
|
"grad_norm": 2.130584478378296, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 2.8119, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0019831432821021317, |
|
"grad_norm": 1.7600144147872925, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 2.3037, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0021484052222773093, |
|
"grad_norm": 1.920034646987915, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 2.515, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0023136671624524874, |
|
"grad_norm": 1.5650461912155151, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 2.5834, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.002478929102627665, |
|
"grad_norm": 1.933816909790039, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 2.5565, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0026441910428028426, |
|
"grad_norm": 1.9310052394866943, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 2.3907, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00280945298297802, |
|
"grad_norm": 2.0040125846862793, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 2.4109, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.002974714923153198, |
|
"grad_norm": 1.7697948217391968, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 2.4493, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0031399768633283754, |
|
"grad_norm": 1.7867257595062256, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 2.6587, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.003305238803503553, |
|
"grad_norm": 1.7235246896743774, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 2.4252, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0034705007436787306, |
|
"grad_norm": 1.6747289896011353, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 2.5149, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0036357626838539082, |
|
"grad_norm": 1.5265719890594482, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 2.3857, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0038010246240290863, |
|
"grad_norm": 1.76333749294281, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 2.6925, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0039662865642042635, |
|
"grad_norm": 1.7076408863067627, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 2.5469, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0041315485043794415, |
|
"grad_norm": 1.6395620107650757, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 2.4239, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004296810444554619, |
|
"grad_norm": 1.6570926904678345, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 2.3486, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.004462072384729797, |
|
"grad_norm": 1.6805280447006226, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 2.395, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.004627334324904975, |
|
"grad_norm": 1.8204387426376343, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 2.4308, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.004792596265080152, |
|
"grad_norm": 1.7184354066848755, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 2.3483, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.00495785820525533, |
|
"grad_norm": 2.094357490539551, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 2.4923, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005123120145430507, |
|
"grad_norm": 1.8471448421478271, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 2.4706, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.005288382085605685, |
|
"grad_norm": 2.124126434326172, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 2.5576, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.005453644025780862, |
|
"grad_norm": 1.85663902759552, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 2.4436, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.00561890596595604, |
|
"grad_norm": 1.9243381023406982, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 2.7482, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.005784167906131218, |
|
"grad_norm": 2.0597476959228516, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 2.5625, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.005949429846306396, |
|
"grad_norm": 1.9038246870040894, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 2.6131, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.006114691786481574, |
|
"grad_norm": 1.9353429079055786, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 2.4935, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.006279953726656751, |
|
"grad_norm": 1.7112752199172974, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 2.6013, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.006445215666831929, |
|
"grad_norm": 2.026658535003662, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 2.5474, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.006610477607007106, |
|
"grad_norm": 1.8708208799362183, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 2.5254, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.006775739547182284, |
|
"grad_norm": 1.9076058864593506, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 2.5782, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.006941001487357461, |
|
"grad_norm": 1.8290849924087524, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 2.591, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.007106263427532639, |
|
"grad_norm": 2.0316317081451416, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 2.6142, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0072715253677078165, |
|
"grad_norm": 1.8418331146240234, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 2.3819, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0074367873078829945, |
|
"grad_norm": 2.284447431564331, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 2.6443, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.007602049248058173, |
|
"grad_norm": 2.4346821308135986, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 2.5674, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.00776731118823335, |
|
"grad_norm": 2.468438148498535, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 2.5521, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.007932573128408527, |
|
"grad_norm": 2.068291664123535, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 2.391, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.008097835068583706, |
|
"grad_norm": 2.661067008972168, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 2.8375, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.008263097008758883, |
|
"grad_norm": 2.414609670639038, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 2.7058, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008263097008758883, |
|
"eval_loss": 2.5834617614746094, |
|
"eval_runtime": 298.8144, |
|
"eval_samples_per_second": 34.105, |
|
"eval_steps_per_second": 8.527, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3607236534272e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|