|
{ |
|
"best_metric": 0.5159673470694772, |
|
"best_model_checkpoint": "./xlnet-base-cased/fine_tuned_models/checkpoint-1072", |
|
"epoch": 10.0, |
|
"global_step": 2680, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6209, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.6063559651374817, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 5.3348, |
|
"eval_samples_per_second": 195.508, |
|
"eval_steps_per_second": 24.556, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.5596, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.5895028114318848, |
|
"eval_matthews_correlation": 0.33728969959232674, |
|
"eval_runtime": 5.3138, |
|
"eval_samples_per_second": 196.282, |
|
"eval_steps_per_second": 24.653, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.555555555555556e-05, |
|
"loss": 0.4363, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.6392635107040405, |
|
"eval_matthews_correlation": 0.3799227380360581, |
|
"eval_runtime": 5.2969, |
|
"eval_samples_per_second": 196.909, |
|
"eval_steps_per_second": 24.732, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.3313, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.5387118458747864, |
|
"eval_matthews_correlation": 0.5159673470694772, |
|
"eval_runtime": 5.2925, |
|
"eval_samples_per_second": 197.072, |
|
"eval_steps_per_second": 24.752, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 0.2597, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.7145249843597412, |
|
"eval_matthews_correlation": 0.48924710895559326, |
|
"eval_runtime": 5.3082, |
|
"eval_samples_per_second": 196.49, |
|
"eval_steps_per_second": 24.679, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.1937, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.8948436379432678, |
|
"eval_matthews_correlation": 0.4772761198584598, |
|
"eval_runtime": 5.3286, |
|
"eval_samples_per_second": 195.735, |
|
"eval_steps_per_second": 24.584, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.1567, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.9947971105575562, |
|
"eval_matthews_correlation": 0.47467239745267154, |
|
"eval_runtime": 5.3053, |
|
"eval_samples_per_second": 196.596, |
|
"eval_steps_per_second": 24.692, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.1292, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.9624822735786438, |
|
"eval_matthews_correlation": 0.5089710686381267, |
|
"eval_runtime": 5.3084, |
|
"eval_samples_per_second": 196.481, |
|
"eval_steps_per_second": 24.678, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.1037, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.052783489227295, |
|
"eval_matthews_correlation": 0.507992911858168, |
|
"eval_runtime": 5.3049, |
|
"eval_samples_per_second": 196.61, |
|
"eval_steps_per_second": 24.694, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0943, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.1543114185333252, |
|
"eval_matthews_correlation": 0.5100509365200628, |
|
"eval_runtime": 5.3208, |
|
"eval_samples_per_second": 196.022, |
|
"eval_steps_per_second": 24.62, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2680, |
|
"total_flos": 3851987691901440.0, |
|
"train_loss": 0.28853215957755474, |
|
"train_runtime": 1321.6062, |
|
"train_samples_per_second": 64.702, |
|
"train_steps_per_second": 2.028 |
|
} |
|
], |
|
"max_steps": 2680, |
|
"num_train_epochs": 10, |
|
"total_flos": 3851987691901440.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|