|
{ |
|
"best_metric": 0.899099588394165, |
|
"best_model_checkpoint": "FuturixAI-hate-speech-2/checkpoint-300", |
|
"epoch": 13.714285714285714, |
|
"eval_steps": 50, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 3.3626, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 3.2143, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 2.778, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 2.1442, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00015, |
|
"loss": 1.4083, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 1.2041112184524536, |
|
"eval_runtime": 31.844, |
|
"eval_samples_per_second": 6.281, |
|
"eval_steps_per_second": 0.785, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 1.1391, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 1.061, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 1.0083, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.00027, |
|
"loss": 0.9991, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.0003, |
|
"loss": 0.9615, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_loss": 0.9786862134933472, |
|
"eval_runtime": 31.7381, |
|
"eval_samples_per_second": 6.302, |
|
"eval_steps_per_second": 0.788, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.000285, |
|
"loss": 0.953, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.00027, |
|
"loss": 0.9031, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.00025499999999999996, |
|
"loss": 0.8762, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.8833, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.000225, |
|
"loss": 0.875, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"eval_loss": 0.904224693775177, |
|
"eval_runtime": 32.9474, |
|
"eval_samples_per_second": 6.07, |
|
"eval_steps_per_second": 0.759, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.8946, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.000195, |
|
"loss": 0.8751, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.8854, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.000165, |
|
"loss": 0.8708, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.00015, |
|
"loss": 0.8683, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"eval_loss": 0.8998382091522217, |
|
"eval_runtime": 31.7282, |
|
"eval_samples_per_second": 6.304, |
|
"eval_steps_per_second": 0.788, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.000135, |
|
"loss": 0.8628, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 0.857, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 0.00010499999999999999, |
|
"loss": 0.8688, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 0.8527, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.8495, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"eval_loss": 0.8998110890388489, |
|
"eval_runtime": 32.1816, |
|
"eval_samples_per_second": 6.215, |
|
"eval_steps_per_second": 0.777, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 0.8639, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 0.8696, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 0.8524, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 1.4999999999999999e-05, |
|
"loss": 0.83, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 0.0, |
|
"loss": 0.8545, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"eval_loss": 0.899099588394165, |
|
"eval_runtime": 32.5062, |
|
"eval_samples_per_second": 6.153, |
|
"eval_steps_per_second": 0.769, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_train_epochs": 15, |
|
"save_steps": 50, |
|
"total_flos": 2.6513261434503168e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|