{ "best_metric": 0.5645565390586853, "best_model_checkpoint": "bert_uncased_L-4_H-512_A-8_stsb/checkpoint-414", "epoch": 23.0, "eval_steps": 500, "global_step": 529, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 6.830661773681641, "learning_rate": 4.9e-05, "loss": 2.5878, "step": 23 }, { "epoch": 1.0, "eval_combined_score": 0.7701561984669311, "eval_loss": 0.9754181504249573, "eval_pearson": 0.7823353387435045, "eval_runtime": 0.6067, "eval_samples_per_second": 2472.44, "eval_spearmanr": 0.7579770581903577, "eval_steps_per_second": 9.89, "step": 23 }, { "epoch": 2.0, "grad_norm": 5.863245487213135, "learning_rate": 4.8e-05, "loss": 0.797, "step": 46 }, { "epoch": 2.0, "eval_combined_score": 0.8474045759571504, "eval_loss": 0.7765601277351379, "eval_pearson": 0.8466114574812236, "eval_runtime": 0.5993, "eval_samples_per_second": 2502.798, "eval_spearmanr": 0.8481976944330771, "eval_steps_per_second": 10.011, "step": 46 }, { "epoch": 3.0, "grad_norm": 5.94612979888916, "learning_rate": 4.7e-05, "loss": 0.5786, "step": 69 }, { "epoch": 3.0, "eval_combined_score": 0.8595259972012628, "eval_loss": 0.6313804984092712, "eval_pearson": 0.8603476034751536, "eval_runtime": 0.5837, "eval_samples_per_second": 2569.683, "eval_spearmanr": 0.8587043909273718, "eval_steps_per_second": 10.279, "step": 69 }, { "epoch": 4.0, "grad_norm": 5.815054416656494, "learning_rate": 4.600000000000001e-05, "loss": 0.4961, "step": 92 }, { "epoch": 4.0, "eval_combined_score": 0.863972207025754, "eval_loss": 0.6342185139656067, "eval_pearson": 0.864253629360168, "eval_runtime": 0.5999, "eval_samples_per_second": 2500.544, "eval_spearmanr": 0.8636907846913399, "eval_steps_per_second": 10.002, "step": 92 }, { "epoch": 5.0, "grad_norm": 5.26311731338501, "learning_rate": 4.5e-05, "loss": 0.3944, "step": 115 }, { "epoch": 5.0, "eval_combined_score": 0.8688625775742702, "eval_loss": 0.6018186807632446, "eval_pearson": 0.8694015311403887, "eval_runtime": 0.5948, "eval_samples_per_second": 2521.799, "eval_spearmanr": 0.8683236240081519, "eval_steps_per_second": 10.087, "step": 115 }, { "epoch": 6.0, "grad_norm": 5.787060737609863, "learning_rate": 4.4000000000000006e-05, "loss": 0.3362, "step": 138 }, { "epoch": 6.0, "eval_combined_score": 0.8657998136907656, "eval_loss": 0.6101198792457581, "eval_pearson": 0.8658762558868609, "eval_runtime": 0.596, "eval_samples_per_second": 2516.574, "eval_spearmanr": 0.8657233714946704, "eval_steps_per_second": 10.066, "step": 138 }, { "epoch": 7.0, "grad_norm": 11.731350898742676, "learning_rate": 4.3e-05, "loss": 0.2932, "step": 161 }, { "epoch": 7.0, "eval_combined_score": 0.867202263600297, "eval_loss": 0.6055566072463989, "eval_pearson": 0.8677929726947374, "eval_runtime": 0.6269, "eval_samples_per_second": 2392.606, "eval_spearmanr": 0.8666115545058564, "eval_steps_per_second": 9.57, "step": 161 }, { "epoch": 8.0, "grad_norm": 6.97740364074707, "learning_rate": 4.2e-05, "loss": 0.2495, "step": 184 }, { "epoch": 8.0, "eval_combined_score": 0.8675369208706127, "eval_loss": 0.6255138516426086, "eval_pearson": 0.8678571378034932, "eval_runtime": 0.6161, "eval_samples_per_second": 2434.767, "eval_spearmanr": 0.8672167039377323, "eval_steps_per_second": 9.739, "step": 184 }, { "epoch": 9.0, "grad_norm": 4.306568145751953, "learning_rate": 4.1e-05, "loss": 0.2268, "step": 207 }, { "epoch": 9.0, "eval_combined_score": 0.8691801970155717, "eval_loss": 0.5970388054847717, "eval_pearson": 0.8699055489590755, "eval_runtime": 0.6258, "eval_samples_per_second": 2396.782, "eval_spearmanr": 0.8684548450720679, "eval_steps_per_second": 9.587, "step": 207 }, { "epoch": 10.0, "grad_norm": 8.294599533081055, "learning_rate": 4e-05, "loss": 0.2037, "step": 230 }, { "epoch": 10.0, "eval_combined_score": 0.868154308441244, "eval_loss": 0.6516512632369995, "eval_pearson": 0.869129623680379, "eval_runtime": 0.6109, "eval_samples_per_second": 2455.261, "eval_spearmanr": 0.8671789932021089, "eval_steps_per_second": 9.821, "step": 230 }, { "epoch": 11.0, "grad_norm": 4.237338542938232, "learning_rate": 3.9000000000000006e-05, "loss": 0.191, "step": 253 }, { "epoch": 11.0, "eval_combined_score": 0.8693045282961145, "eval_loss": 0.601725697517395, "eval_pearson": 0.8709460268802508, "eval_runtime": 0.6389, "eval_samples_per_second": 2347.804, "eval_spearmanr": 0.8676630297119781, "eval_steps_per_second": 9.391, "step": 253 }, { "epoch": 12.0, "grad_norm": 3.5681674480438232, "learning_rate": 3.8e-05, "loss": 0.1678, "step": 276 }, { "epoch": 12.0, "eval_combined_score": 0.8694683064851385, "eval_loss": 0.6096729636192322, "eval_pearson": 0.8704163838060811, "eval_runtime": 0.61, "eval_samples_per_second": 2458.83, "eval_spearmanr": 0.8685202291641958, "eval_steps_per_second": 9.835, "step": 276 }, { "epoch": 13.0, "grad_norm": 3.010383367538452, "learning_rate": 3.7e-05, "loss": 0.1546, "step": 299 }, { "epoch": 13.0, "eval_combined_score": 0.8707355017307861, "eval_loss": 0.6051801443099976, "eval_pearson": 0.8713328676887442, "eval_runtime": 0.6283, "eval_samples_per_second": 2387.444, "eval_spearmanr": 0.8701381357728278, "eval_steps_per_second": 9.55, "step": 299 }, { "epoch": 14.0, "grad_norm": 4.628120422363281, "learning_rate": 3.6e-05, "loss": 0.1486, "step": 322 }, { "epoch": 14.0, "eval_combined_score": 0.8701183473370655, "eval_loss": 0.5913792252540588, "eval_pearson": 0.8713640146825573, "eval_runtime": 0.6333, "eval_samples_per_second": 2368.584, "eval_spearmanr": 0.8688726799915738, "eval_steps_per_second": 9.474, "step": 322 }, { "epoch": 15.0, "grad_norm": 3.274272918701172, "learning_rate": 3.5e-05, "loss": 0.1372, "step": 345 }, { "epoch": 15.0, "eval_combined_score": 0.8719789615058817, "eval_loss": 0.6174795031547546, "eval_pearson": 0.8737840000829027, "eval_runtime": 0.6364, "eval_samples_per_second": 2357.008, "eval_spearmanr": 0.8701739229288608, "eval_steps_per_second": 9.428, "step": 345 }, { "epoch": 16.0, "grad_norm": 2.2427499294281006, "learning_rate": 3.4000000000000007e-05, "loss": 0.131, "step": 368 }, { "epoch": 16.0, "eval_combined_score": 0.8714552917935461, "eval_loss": 0.5825785994529724, "eval_pearson": 0.8726691216431404, "eval_runtime": 0.6147, "eval_samples_per_second": 2440.092, "eval_spearmanr": 0.8702414619439519, "eval_steps_per_second": 9.76, "step": 368 }, { "epoch": 17.0, "grad_norm": 3.5106945037841797, "learning_rate": 3.3e-05, "loss": 0.1216, "step": 391 }, { "epoch": 17.0, "eval_combined_score": 0.8701714131214551, "eval_loss": 0.577867865562439, "eval_pearson": 0.8717439862070048, "eval_runtime": 0.5982, "eval_samples_per_second": 2507.56, "eval_spearmanr": 0.8685988400359054, "eval_steps_per_second": 10.03, "step": 391 }, { "epoch": 18.0, "grad_norm": 4.543298721313477, "learning_rate": 3.2000000000000005e-05, "loss": 0.1145, "step": 414 }, { "epoch": 18.0, "eval_combined_score": 0.8721440455693346, "eval_loss": 0.5645565390586853, "eval_pearson": 0.873854128781376, "eval_runtime": 0.6076, "eval_samples_per_second": 2468.855, "eval_spearmanr": 0.8704339623572931, "eval_steps_per_second": 9.875, "step": 414 }, { "epoch": 19.0, "grad_norm": 4.314229965209961, "learning_rate": 3.1e-05, "loss": 0.1158, "step": 437 }, { "epoch": 19.0, "eval_combined_score": 0.8724136267007434, "eval_loss": 0.5810861587524414, "eval_pearson": 0.8737710324523099, "eval_runtime": 0.6002, "eval_samples_per_second": 2499.237, "eval_spearmanr": 0.871056220949177, "eval_steps_per_second": 9.997, "step": 437 }, { "epoch": 20.0, "grad_norm": 5.715078830718994, "learning_rate": 3e-05, "loss": 0.109, "step": 460 }, { "epoch": 20.0, "eval_combined_score": 0.8741776682213974, "eval_loss": 0.5896406173706055, "eval_pearson": 0.876321941342814, "eval_runtime": 0.599, "eval_samples_per_second": 2504.307, "eval_spearmanr": 0.8720333950999808, "eval_steps_per_second": 10.017, "step": 460 }, { "epoch": 21.0, "grad_norm": 3.1881017684936523, "learning_rate": 2.9e-05, "loss": 0.105, "step": 483 }, { "epoch": 21.0, "eval_combined_score": 0.8720974137486168, "eval_loss": 0.5863229036331177, "eval_pearson": 0.8737022748828657, "eval_runtime": 0.5928, "eval_samples_per_second": 2530.409, "eval_spearmanr": 0.8704925526143681, "eval_steps_per_second": 10.122, "step": 483 }, { "epoch": 22.0, "grad_norm": 3.1537728309631348, "learning_rate": 2.8000000000000003e-05, "loss": 0.0995, "step": 506 }, { "epoch": 22.0, "eval_combined_score": 0.8721144188715132, "eval_loss": 0.5757761001586914, "eval_pearson": 0.8740875105899543, "eval_runtime": 0.6179, "eval_samples_per_second": 2427.472, "eval_spearmanr": 0.8701413271530721, "eval_steps_per_second": 9.71, "step": 506 }, { "epoch": 23.0, "grad_norm": 2.4976413249969482, "learning_rate": 2.7000000000000002e-05, "loss": 0.0971, "step": 529 }, { "epoch": 23.0, "eval_combined_score": 0.8730602670988505, "eval_loss": 0.5780627727508545, "eval_pearson": 0.8748200461236054, "eval_runtime": 0.613, "eval_samples_per_second": 2446.918, "eval_spearmanr": 0.8713004880740955, "eval_steps_per_second": 9.788, "step": 529 }, { "epoch": 23.0, "step": 529, "total_flos": 2614663011045888.0, "train_loss": 0.3415615337782862, "train_runtime": 113.3247, "train_samples_per_second": 2536.516, "train_steps_per_second": 10.148 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2614663011045888.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }