|
{ |
|
"best_metric": 0.8754395915831782, |
|
"best_model_checkpoint": "v3v2dqwsyn8103.san-roberta-large", |
|
"epoch": 3.0, |
|
"global_step": 6126, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"eval_avg_sts": 0.8149644786494956, |
|
"eval_sickr_spearman": 0.7907241592935935, |
|
"eval_stsb_spearman": 0.8392047980053978, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_avg_sts": 0.8388577709591086, |
|
"eval_sickr_spearman": 0.8233415039069986, |
|
"eval_stsb_spearman": 0.8543740380112187, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_avg_sts": 0.8396589199542435, |
|
"eval_sickr_spearman": 0.8129205800767828, |
|
"eval_stsb_spearman": 0.8663972598317042, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.183806725432584e-06, |
|
"loss": 0.5341, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_avg_sts": 0.8363342200594388, |
|
"eval_sickr_spearman": 0.8093077766888108, |
|
"eval_stsb_spearman": 0.8633606634300668, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_avg_sts": 0.8338329341909468, |
|
"eval_sickr_spearman": 0.8055060189477813, |
|
"eval_stsb_spearman": 0.8621598494341124, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_avg_sts": 0.839609299977649, |
|
"eval_sickr_spearman": 0.8169147023790718, |
|
"eval_stsb_spearman": 0.8623038975762263, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_avg_sts": 0.846074071178897, |
|
"eval_sickr_spearman": 0.8259940214839182, |
|
"eval_stsb_spearman": 0.8661541208738758, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.367613450865165e-06, |
|
"loss": 0.1279, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_avg_sts": 0.8453144553836196, |
|
"eval_sickr_spearman": 0.8210699209955844, |
|
"eval_stsb_spearman": 0.8695589897716549, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_avg_sts": 0.845320179475549, |
|
"eval_sickr_spearman": 0.8282974490155858, |
|
"eval_stsb_spearman": 0.8623429099355123, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_avg_sts": 0.8460474498271838, |
|
"eval_sickr_spearman": 0.826672508822833, |
|
"eval_stsb_spearman": 0.8654223908315346, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_avg_sts": 0.8426410233010573, |
|
"eval_sickr_spearman": 0.8190030466384995, |
|
"eval_stsb_spearman": 0.8662789999636149, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.551420176297748e-06, |
|
"loss": 0.1108, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_avg_sts": 0.8549386812398281, |
|
"eval_sickr_spearman": 0.8434020296403363, |
|
"eval_stsb_spearman": 0.8664753328393199, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_avg_sts": 0.8501103601570379, |
|
"eval_sickr_spearman": 0.834787891752918, |
|
"eval_stsb_spearman": 0.8654328285611577, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_avg_sts": 0.849271502533093, |
|
"eval_sickr_spearman": 0.8330568988881898, |
|
"eval_stsb_spearman": 0.8654861061779964, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_avg_sts": 0.8536431760480087, |
|
"eval_sickr_spearman": 0.8382815780093236, |
|
"eval_stsb_spearman": 0.8690047740866937, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.735226901730331e-06, |
|
"loss": 0.0995, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_avg_sts": 0.859307976655676, |
|
"eval_sickr_spearman": 0.8446237487365161, |
|
"eval_stsb_spearman": 0.873992204574836, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_avg_sts": 0.8539432294379701, |
|
"eval_sickr_spearman": 0.839737448725014, |
|
"eval_stsb_spearman": 0.868149010150926, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_avg_sts": 0.8550783980122398, |
|
"eval_sickr_spearman": 0.8398799570029809, |
|
"eval_stsb_spearman": 0.8702768390214988, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_avg_sts": 0.8513744867439872, |
|
"eval_sickr_spearman": 0.8382596758270676, |
|
"eval_stsb_spearman": 0.8644892976609067, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.919033627162912e-06, |
|
"loss": 0.0755, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_avg_sts": 0.852586624774087, |
|
"eval_sickr_spearman": 0.8348869799151848, |
|
"eval_stsb_spearman": 0.8702862696329892, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_avg_sts": 0.8536153767856529, |
|
"eval_sickr_spearman": 0.8408841432407476, |
|
"eval_stsb_spearman": 0.8663466103305583, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_avg_sts": 0.8516843556326315, |
|
"eval_sickr_spearman": 0.8374268645592322, |
|
"eval_stsb_spearman": 0.8659418467060308, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_avg_sts": 0.8415420172889546, |
|
"eval_sickr_spearman": 0.819215344106856, |
|
"eval_stsb_spearman": 0.8638686904710532, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.102840352595495e-06, |
|
"loss": 0.0701, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_avg_sts": 0.8561622122931456, |
|
"eval_sickr_spearman": 0.8410006666928366, |
|
"eval_stsb_spearman": 0.8713237578934547, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_avg_sts": 0.8482745103445485, |
|
"eval_sickr_spearman": 0.8300929476495453, |
|
"eval_stsb_spearman": 0.8664560730395516, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_avg_sts": 0.8509009129630769, |
|
"eval_sickr_spearman": 0.8311688923528606, |
|
"eval_stsb_spearman": 0.8706329335732933, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_avg_sts": 0.8546193575409455, |
|
"eval_sickr_spearman": 0.833799123498713, |
|
"eval_stsb_spearman": 0.8754395915831782, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.286647078028077e-06, |
|
"loss": 0.0691, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_avg_sts": 0.8480763616470857, |
|
"eval_sickr_spearman": 0.8280656989511468, |
|
"eval_stsb_spearman": 0.8680870243430245, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_avg_sts": 0.8510804038712448, |
|
"eval_sickr_spearman": 0.8315828243850542, |
|
"eval_stsb_spearman": 0.8705779833574355, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_avg_sts": 0.8481254894982377, |
|
"eval_sickr_spearman": 0.8284941363759749, |
|
"eval_stsb_spearman": 0.8677568426205006, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_avg_sts": 0.8518486456802068, |
|
"eval_sickr_spearman": 0.8313096234800742, |
|
"eval_stsb_spearman": 0.8723876678803395, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.47045380346066e-06, |
|
"loss": 0.0639, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_avg_sts": 0.8494848749262629, |
|
"eval_sickr_spearman": 0.8275968673700096, |
|
"eval_stsb_spearman": 0.8713728824825163, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_avg_sts": 0.8478683507440115, |
|
"eval_sickr_spearman": 0.8257009357029429, |
|
"eval_stsb_spearman": 0.87003576578508, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_avg_sts": 0.8511953137011732, |
|
"eval_sickr_spearman": 0.8320414253415853, |
|
"eval_stsb_spearman": 0.870349202060761, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_avg_sts": 0.8501760957050806, |
|
"eval_sickr_spearman": 0.8315510758270036, |
|
"eval_stsb_spearman": 0.8688011155831574, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.654260528893242e-06, |
|
"loss": 0.0525, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_avg_sts": 0.8496234541718779, |
|
"eval_sickr_spearman": 0.8301971271085646, |
|
"eval_stsb_spearman": 0.8690497812351913, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_avg_sts": 0.8437167774343648, |
|
"eval_sickr_spearman": 0.820443979681643, |
|
"eval_stsb_spearman": 0.8669895751870865, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_avg_sts": 0.8439814114677218, |
|
"eval_sickr_spearman": 0.8205907627276378, |
|
"eval_stsb_spearman": 0.8673720602078059, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_avg_sts": 0.848080574777744, |
|
"eval_sickr_spearman": 0.8265265903366641, |
|
"eval_stsb_spearman": 0.869634559218824, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.8380672543258246e-06, |
|
"loss": 0.0462, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_avg_sts": 0.8452623360298752, |
|
"eval_sickr_spearman": 0.8238323337325946, |
|
"eval_stsb_spearman": 0.866692338327156, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_avg_sts": 0.847977497396065, |
|
"eval_sickr_spearman": 0.8265478681146011, |
|
"eval_stsb_spearman": 0.8694071266775288, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_avg_sts": 0.8479098375907992, |
|
"eval_sickr_spearman": 0.826889897587942, |
|
"eval_stsb_spearman": 0.8689297775936565, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_avg_sts": 0.8457732999493977, |
|
"eval_sickr_spearman": 0.8235308425088674, |
|
"eval_stsb_spearman": 0.8680157573899278, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.021873979758407e-06, |
|
"loss": 0.0454, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_avg_sts": 0.8463330711965237, |
|
"eval_sickr_spearman": 0.824173882894921, |
|
"eval_stsb_spearman": 0.8684922594981264, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_avg_sts": 0.8475412183069994, |
|
"eval_sickr_spearman": 0.8259262015686877, |
|
"eval_stsb_spearman": 0.869156235045311, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_avg_sts": 0.8474056652640968, |
|
"eval_sickr_spearman": 0.8258597265242973, |
|
"eval_stsb_spearman": 0.8689516040038963, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_avg_sts": 0.846709994814922, |
|
"eval_sickr_spearman": 0.8247918990771251, |
|
"eval_stsb_spearman": 0.868628090552719, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.0568070519098922e-07, |
|
"loss": 0.0456, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_avg_sts": 0.8476613552908148, |
|
"eval_sickr_spearman": 0.8262576161686107, |
|
"eval_stsb_spearman": 0.8690650944130188, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_avg_sts": 0.8477936113942104, |
|
"eval_sickr_spearman": 0.8264356674616418, |
|
"eval_stsb_spearman": 0.8691515553267789, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 6126, |
|
"train_runtime": 4478.0897, |
|
"train_samples_per_second": 1.368 |
|
} |
|
], |
|
"max_steps": 6126, |
|
"num_train_epochs": 3, |
|
"total_flos": 213963637016494080, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|