|
{ |
|
"best_metric": 0.918527364730835, |
|
"best_model_checkpoint": "bert_uncased_L-2_H-128_A-2_stsb/checkpoint-414", |
|
"epoch": 23.0, |
|
"eval_steps": 500, |
|
"global_step": 529, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 35.8241081237793, |
|
"learning_rate": 4.9e-05, |
|
"loss": 8.7461, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_combined_score": 0.2543994206838386, |
|
"eval_loss": 5.905221462249756, |
|
"eval_pearson": 0.2839173055187544, |
|
"eval_runtime": 0.4658, |
|
"eval_samples_per_second": 3220.244, |
|
"eval_spearmanr": 0.22488153584892281, |
|
"eval_steps_per_second": 12.881, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 33.65388870239258, |
|
"learning_rate": 4.8e-05, |
|
"loss": 6.3503, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_combined_score": 0.4252769604902483, |
|
"eval_loss": 4.150002479553223, |
|
"eval_pearson": 0.4510107795982848, |
|
"eval_runtime": 0.4611, |
|
"eval_samples_per_second": 3253.207, |
|
"eval_spearmanr": 0.39954314138221186, |
|
"eval_steps_per_second": 13.013, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 21.51675033569336, |
|
"learning_rate": 4.7e-05, |
|
"loss": 4.6275, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_combined_score": 0.5461725752860105, |
|
"eval_loss": 3.162104845046997, |
|
"eval_pearson": 0.5516136456186445, |
|
"eval_runtime": 0.4638, |
|
"eval_samples_per_second": 3234.086, |
|
"eval_spearmanr": 0.5407315049533766, |
|
"eval_steps_per_second": 12.936, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 21.66692543029785, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 3.6391, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_combined_score": 0.6311217425834987, |
|
"eval_loss": 2.6587579250335693, |
|
"eval_pearson": 0.6167930969982379, |
|
"eval_runtime": 0.4654, |
|
"eval_samples_per_second": 3223.095, |
|
"eval_spearmanr": 0.6454503881687595, |
|
"eval_steps_per_second": 12.892, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 13.800877571105957, |
|
"learning_rate": 4.5e-05, |
|
"loss": 3.0189, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_combined_score": 0.6956593323873657, |
|
"eval_loss": 2.3421335220336914, |
|
"eval_pearson": 0.6721601561878875, |
|
"eval_runtime": 0.4632, |
|
"eval_samples_per_second": 3238.19, |
|
"eval_spearmanr": 0.7191585085868439, |
|
"eval_steps_per_second": 12.953, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 14.926322937011719, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 2.59, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_combined_score": 0.706586085974285, |
|
"eval_loss": 2.046689033508301, |
|
"eval_pearson": 0.6785407842560279, |
|
"eval_runtime": 0.463, |
|
"eval_samples_per_second": 3239.605, |
|
"eval_spearmanr": 0.734631387692542, |
|
"eval_steps_per_second": 12.958, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 13.939260482788086, |
|
"learning_rate": 4.3e-05, |
|
"loss": 2.172, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_combined_score": 0.6480161859933391, |
|
"eval_loss": 1.6885243654251099, |
|
"eval_pearson": 0.668603391634993, |
|
"eval_runtime": 0.4775, |
|
"eval_samples_per_second": 3141.434, |
|
"eval_spearmanr": 0.627428980351685, |
|
"eval_steps_per_second": 12.566, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 14.067152976989746, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.7948, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_combined_score": 0.6732786795239201, |
|
"eval_loss": 1.431281566619873, |
|
"eval_pearson": 0.6900186409269958, |
|
"eval_runtime": 0.465, |
|
"eval_samples_per_second": 3225.633, |
|
"eval_spearmanr": 0.6565387181208443, |
|
"eval_steps_per_second": 12.903, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.382194519042969, |
|
"learning_rate": 4.1e-05, |
|
"loss": 1.5153, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_combined_score": 0.7034603993460962, |
|
"eval_loss": 1.2854173183441162, |
|
"eval_pearson": 0.7049413388446882, |
|
"eval_runtime": 0.4605, |
|
"eval_samples_per_second": 3257.256, |
|
"eval_spearmanr": 0.7019794598475042, |
|
"eval_steps_per_second": 13.029, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 10.295894622802734, |
|
"learning_rate": 4e-05, |
|
"loss": 1.3213, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_combined_score": 0.7219787043070662, |
|
"eval_loss": 1.195310115814209, |
|
"eval_pearson": 0.7135603026926362, |
|
"eval_runtime": 0.4605, |
|
"eval_samples_per_second": 3257.027, |
|
"eval_spearmanr": 0.7303971059214962, |
|
"eval_steps_per_second": 13.028, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 6.82961893081665, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 1.1482, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_combined_score": 0.7035309966478992, |
|
"eval_loss": 1.1936986446380615, |
|
"eval_pearson": 0.7065767660319117, |
|
"eval_runtime": 0.475, |
|
"eval_samples_per_second": 3157.585, |
|
"eval_spearmanr": 0.7004852272638867, |
|
"eval_steps_per_second": 12.63, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 9.198720932006836, |
|
"learning_rate": 3.8e-05, |
|
"loss": 1.0318, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_combined_score": 0.7553056915239181, |
|
"eval_loss": 1.0680350065231323, |
|
"eval_pearson": 0.7378893718267723, |
|
"eval_runtime": 0.4687, |
|
"eval_samples_per_second": 3200.135, |
|
"eval_spearmanr": 0.772722011221064, |
|
"eval_steps_per_second": 12.801, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 5.821434020996094, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.9444, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_combined_score": 0.7661149952144268, |
|
"eval_loss": 1.0874642133712769, |
|
"eval_pearson": 0.7444940160791875, |
|
"eval_runtime": 0.456, |
|
"eval_samples_per_second": 3289.734, |
|
"eval_spearmanr": 0.7877359743496662, |
|
"eval_steps_per_second": 13.159, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 9.378460884094238, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.8957, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_combined_score": 0.769212238777504, |
|
"eval_loss": 1.056565523147583, |
|
"eval_pearson": 0.7515123921681258, |
|
"eval_runtime": 0.4697, |
|
"eval_samples_per_second": 3193.676, |
|
"eval_spearmanr": 0.7869120853868821, |
|
"eval_steps_per_second": 12.775, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 6.185004234313965, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.8101, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_combined_score": 0.7779950729846328, |
|
"eval_loss": 1.0416558980941772, |
|
"eval_pearson": 0.7612691239446224, |
|
"eval_runtime": 0.4606, |
|
"eval_samples_per_second": 3256.887, |
|
"eval_spearmanr": 0.7947210220246432, |
|
"eval_steps_per_second": 13.028, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.701705455780029, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.7743, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_combined_score": 0.7826611808218381, |
|
"eval_loss": 0.995955228805542, |
|
"eval_pearson": 0.7708312642938056, |
|
"eval_runtime": 0.4619, |
|
"eval_samples_per_second": 3247.634, |
|
"eval_spearmanr": 0.7944910973498707, |
|
"eval_steps_per_second": 12.991, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 5.949688911437988, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.7407, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_combined_score": 0.7954325942361553, |
|
"eval_loss": 0.934400737285614, |
|
"eval_pearson": 0.784695317917729, |
|
"eval_runtime": 0.4623, |
|
"eval_samples_per_second": 3244.713, |
|
"eval_spearmanr": 0.8061698705545818, |
|
"eval_steps_per_second": 12.979, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 7.805674076080322, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.6842, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_combined_score": 0.7995901521740367, |
|
"eval_loss": 0.918527364730835, |
|
"eval_pearson": 0.7913849353032759, |
|
"eval_runtime": 0.4747, |
|
"eval_samples_per_second": 3159.71, |
|
"eval_spearmanr": 0.8077953690447974, |
|
"eval_steps_per_second": 12.639, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.837308883666992, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.6628, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_combined_score": 0.7907113801358363, |
|
"eval_loss": 0.9989282488822937, |
|
"eval_pearson": 0.7835673289998374, |
|
"eval_runtime": 0.4603, |
|
"eval_samples_per_second": 3258.409, |
|
"eval_spearmanr": 0.7978554312718351, |
|
"eval_steps_per_second": 13.034, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 5.843783378601074, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6402, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_combined_score": 0.8017138257057612, |
|
"eval_loss": 0.9198980927467346, |
|
"eval_pearson": 0.7951929564911904, |
|
"eval_runtime": 0.4786, |
|
"eval_samples_per_second": 3134.09, |
|
"eval_spearmanr": 0.8082346949203321, |
|
"eval_steps_per_second": 12.536, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 4.551052093505859, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.6215, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_combined_score": 0.8026880504005123, |
|
"eval_loss": 0.9275719523429871, |
|
"eval_pearson": 0.7954047530436522, |
|
"eval_runtime": 0.4632, |
|
"eval_samples_per_second": 3238.005, |
|
"eval_spearmanr": 0.8099713477573725, |
|
"eval_steps_per_second": 12.952, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 4.18523645401001, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.6069, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_combined_score": 0.801715014465225, |
|
"eval_loss": 0.9502547383308411, |
|
"eval_pearson": 0.7955890873354261, |
|
"eval_runtime": 0.4659, |
|
"eval_samples_per_second": 3219.757, |
|
"eval_spearmanr": 0.8078409415950238, |
|
"eval_steps_per_second": 12.879, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 7.212981700897217, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.6101, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_combined_score": 0.8053246776548248, |
|
"eval_loss": 0.978871762752533, |
|
"eval_pearson": 0.7972127596124666, |
|
"eval_runtime": 0.4621, |
|
"eval_samples_per_second": 3245.905, |
|
"eval_spearmanr": 0.8134365956971832, |
|
"eval_steps_per_second": 12.984, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"step": 529, |
|
"total_flos": 83970144932352.0, |
|
"train_loss": 1.997670799212104, |
|
"train_runtime": 60.6946, |
|
"train_samples_per_second": 4736.003, |
|
"train_steps_per_second": 18.947 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 83970144932352.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|