|
{ |
|
"best_metric": 0.9701426609443169, |
|
"best_model_checkpoint": "mit-b4-finetuned-stroke-binary/checkpoint-1700", |
|
"epoch": 11.930232558139535, |
|
"eval_steps": 100, |
|
"global_step": 1932, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.31007751937984496, |
|
"grad_norm": 2.3132071495056152, |
|
"learning_rate": 5.154639175257732e-06, |
|
"loss": 0.6722, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6201550387596899, |
|
"grad_norm": 6.47659969329834, |
|
"learning_rate": 1.0309278350515464e-05, |
|
"loss": 0.5714, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6201550387596899, |
|
"eval_accuracy": 0.7878787878787878, |
|
"eval_f1": 0.7800219254126745, |
|
"eval_loss": 0.477566123008728, |
|
"eval_precision": 0.7900169125975578, |
|
"eval_recall": 0.7878787878787878, |
|
"eval_runtime": 56.589, |
|
"eval_samples_per_second": 39.071, |
|
"eval_steps_per_second": 4.895, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 4.08937406539917, |
|
"learning_rate": 1.5360824742268042e-05, |
|
"loss": 0.4471, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.235658914728682, |
|
"grad_norm": 8.325615882873535, |
|
"learning_rate": 1.9999738610404825e-05, |
|
"loss": 0.3897, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.235658914728682, |
|
"eval_accuracy": 0.8715513342379014, |
|
"eval_f1": 0.8704450429260716, |
|
"eval_loss": 0.3238992393016815, |
|
"eval_precision": 0.8710997928702509, |
|
"eval_recall": 0.8715513342379014, |
|
"eval_runtime": 56.7955, |
|
"eval_samples_per_second": 38.929, |
|
"eval_steps_per_second": 4.877, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.5457364341085271, |
|
"grad_norm": 8.041431427001953, |
|
"learning_rate": 1.9952399350448247e-05, |
|
"loss": 0.3177, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.8558139534883722, |
|
"grad_norm": 7.284913063049316, |
|
"learning_rate": 1.9823819633544185e-05, |
|
"loss": 0.2951, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8558139534883722, |
|
"eval_accuracy": 0.8765264586160109, |
|
"eval_f1": 0.8723990420158877, |
|
"eval_loss": 0.31197357177734375, |
|
"eval_precision": 0.885806076698763, |
|
"eval_recall": 0.8765264586160109, |
|
"eval_runtime": 56.5628, |
|
"eval_samples_per_second": 39.089, |
|
"eval_steps_per_second": 4.897, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.1612403100775195, |
|
"grad_norm": 3.927116870880127, |
|
"learning_rate": 1.9615049043274207e-05, |
|
"loss": 0.2786, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.471317829457364, |
|
"grad_norm": 13.424771308898926, |
|
"learning_rate": 1.932779175343134e-05, |
|
"loss": 0.23, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.471317829457364, |
|
"eval_accuracy": 0.9280868385345997, |
|
"eval_f1": 0.9271058471970156, |
|
"eval_loss": 0.1993584781885147, |
|
"eval_precision": 0.9303851930689407, |
|
"eval_recall": 0.9280868385345997, |
|
"eval_runtime": 56.4712, |
|
"eval_samples_per_second": 39.153, |
|
"eval_steps_per_second": 4.905, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.781395348837209, |
|
"grad_norm": 11.889705657958984, |
|
"learning_rate": 1.8964392617017013e-05, |
|
"loss": 0.2424, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.0868217054263565, |
|
"grad_norm": 14.436951637268066, |
|
"learning_rate": 1.8527818025436662e-05, |
|
"loss": 0.2135, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0868217054263565, |
|
"eval_accuracy": 0.9280868385345997, |
|
"eval_f1": 0.9266634060111499, |
|
"eval_loss": 0.2157154679298401, |
|
"eval_precision": 0.9332581537403823, |
|
"eval_recall": 0.9280868385345997, |
|
"eval_runtime": 56.599, |
|
"eval_samples_per_second": 39.064, |
|
"eval_steps_per_second": 4.894, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.3968992248062015, |
|
"grad_norm": 16.79993438720703, |
|
"learning_rate": 1.802163169413846e-05, |
|
"loss": 0.1746, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.7069767441860466, |
|
"grad_norm": 14.388134002685547, |
|
"learning_rate": 1.7449965572354675e-05, |
|
"loss": 0.2106, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.7069767441860466, |
|
"eval_accuracy": 0.9380370872908186, |
|
"eval_f1": 0.9382102726921231, |
|
"eval_loss": 0.18085584044456482, |
|
"eval_precision": 0.9386737275368563, |
|
"eval_recall": 0.9380370872908186, |
|
"eval_runtime": 56.6285, |
|
"eval_samples_per_second": 39.044, |
|
"eval_steps_per_second": 4.892, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.0124031007751935, |
|
"grad_norm": 14.440871238708496, |
|
"learning_rate": 1.68174861144065e-05, |
|
"loss": 0.1802, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.322480620155039, |
|
"grad_norm": 6.46437931060791, |
|
"learning_rate": 1.612935618789643e-05, |
|
"loss": 0.1576, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.322480620155039, |
|
"eval_accuracy": 0.9402985074626866, |
|
"eval_f1": 0.9403501047780813, |
|
"eval_loss": 0.1628771722316742, |
|
"eval_precision": 0.9404274341623059, |
|
"eval_recall": 0.9402985074626866, |
|
"eval_runtime": 56.5976, |
|
"eval_samples_per_second": 39.065, |
|
"eval_steps_per_second": 4.894, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.632558139534884, |
|
"grad_norm": 12.626237869262695, |
|
"learning_rate": 1.5391192929727884e-05, |
|
"loss": 0.1752, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.942635658914728, |
|
"grad_norm": 20.061859130859375, |
|
"learning_rate": 1.460902189396916e-05, |
|
"loss": 0.1434, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.942635658914728, |
|
"eval_accuracy": 0.9543193125282677, |
|
"eval_f1": 0.9541868361381168, |
|
"eval_loss": 0.1526043862104416, |
|
"eval_precision": 0.9543134613523963, |
|
"eval_recall": 0.9543193125282677, |
|
"eval_runtime": 56.4759, |
|
"eval_samples_per_second": 39.149, |
|
"eval_steps_per_second": 4.905, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.248062015503876, |
|
"grad_norm": 11.535951614379883, |
|
"learning_rate": 1.3789227865848282e-05, |
|
"loss": 0.1601, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.558139534883721, |
|
"grad_norm": 9.65259838104248, |
|
"learning_rate": 1.2938502743379212e-05, |
|
"loss": 0.1391, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.558139534883721, |
|
"eval_accuracy": 0.9574853007688828, |
|
"eval_f1": 0.9574664394664735, |
|
"eval_loss": 0.1268271952867508, |
|
"eval_precision": 0.9574541746977356, |
|
"eval_recall": 0.9574853007688828, |
|
"eval_runtime": 56.6517, |
|
"eval_samples_per_second": 39.028, |
|
"eval_steps_per_second": 4.89, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.868217054263566, |
|
"grad_norm": 7.883506774902344, |
|
"learning_rate": 1.2063790912056577e-05, |
|
"loss": 0.1605, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 6.173643410852713, |
|
"grad_norm": 9.489595413208008, |
|
"learning_rate": 1.1172232558519983e-05, |
|
"loss": 0.1048, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.173643410852713, |
|
"eval_accuracy": 0.9556761646313885, |
|
"eval_f1": 0.9555092018481103, |
|
"eval_loss": 0.14889651536941528, |
|
"eval_precision": 0.9557688265871488, |
|
"eval_recall": 0.9556761646313885, |
|
"eval_runtime": 56.5428, |
|
"eval_samples_per_second": 39.103, |
|
"eval_steps_per_second": 4.899, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.4837209302325585, |
|
"grad_norm": 5.59963846206665, |
|
"learning_rate": 1.0271105385912779e-05, |
|
"loss": 0.116, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.793798449612403, |
|
"grad_norm": 4.873887538909912, |
|
"learning_rate": 9.367765206707174e-06, |
|
"loss": 0.1271, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.793798449612403, |
|
"eval_accuracy": 0.9570330167345092, |
|
"eval_f1": 0.9566191699282054, |
|
"eval_loss": 0.14482761919498444, |
|
"eval_precision": 0.9586051623091093, |
|
"eval_recall": 0.9570330167345092, |
|
"eval_runtime": 56.8397, |
|
"eval_samples_per_second": 38.899, |
|
"eval_steps_per_second": 4.873, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.09922480620155, |
|
"grad_norm": 4.528378963470459, |
|
"learning_rate": 8.469585897930557e-06, |
|
"loss": 0.1333, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 7.409302325581395, |
|
"grad_norm": 1.5636117458343506, |
|
"learning_rate": 7.583899208932648e-06, |
|
"loss": 0.091, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.409302325581395, |
|
"eval_accuracy": 0.9570330167345092, |
|
"eval_f1": 0.9567056534394107, |
|
"eval_loss": 0.14507929980754852, |
|
"eval_precision": 0.9579537259191305, |
|
"eval_recall": 0.9570330167345092, |
|
"eval_runtime": 56.7, |
|
"eval_samples_per_second": 38.995, |
|
"eval_steps_per_second": 4.885, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.7193798449612405, |
|
"grad_norm": 5.480973720550537, |
|
"learning_rate": 6.7179349130367235e-06, |
|
"loss": 0.1089, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 8.024806201550387, |
|
"grad_norm": 6.6158223152160645, |
|
"learning_rate": 5.878761791611129e-06, |
|
"loss": 0.1159, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.024806201550387, |
|
"eval_accuracy": 0.9629127091813658, |
|
"eval_f1": 0.9626761639787419, |
|
"eval_loss": 0.1205127015709877, |
|
"eval_precision": 0.9635871559570208, |
|
"eval_recall": 0.9629127091813658, |
|
"eval_runtime": 56.6004, |
|
"eval_samples_per_second": 39.063, |
|
"eval_steps_per_second": 4.894, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.334883720930232, |
|
"grad_norm": 5.2470316886901855, |
|
"learning_rate": 5.073229932302277e-06, |
|
"loss": 0.0886, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 8.644961240310078, |
|
"grad_norm": 6.15119743347168, |
|
"learning_rate": 4.307914812442993e-06, |
|
"loss": 0.1151, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.644961240310078, |
|
"eval_accuracy": 0.9665309814563546, |
|
"eval_f1": 0.9664379980687814, |
|
"eval_loss": 0.11242391169071198, |
|
"eval_precision": 0.9665871538113867, |
|
"eval_recall": 0.9665309814563546, |
|
"eval_runtime": 56.6713, |
|
"eval_samples_per_second": 39.014, |
|
"eval_steps_per_second": 4.888, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.955038759689922, |
|
"grad_norm": 12.04592227935791, |
|
"learning_rate": 3.589063624077802e-06, |
|
"loss": 0.0798, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 9.26046511627907, |
|
"grad_norm": 13.314713478088379, |
|
"learning_rate": 2.922544278748801e-06, |
|
"loss": 0.0735, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.26046511627907, |
|
"eval_accuracy": 0.9642695612844867, |
|
"eval_f1": 0.9641123145223969, |
|
"eval_loss": 0.11749936640262604, |
|
"eval_precision": 0.9645326112328965, |
|
"eval_recall": 0.9642695612844867, |
|
"eval_runtime": 56.8212, |
|
"eval_samples_per_second": 38.912, |
|
"eval_steps_per_second": 4.875, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.570542635658915, |
|
"grad_norm": 1.456084132194519, |
|
"learning_rate": 2.3137975083109153e-06, |
|
"loss": 0.0746, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 9.88062015503876, |
|
"grad_norm": 2.1265344619750977, |
|
"learning_rate": 1.7677924527729228e-06, |
|
"loss": 0.0537, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.88062015503876, |
|
"eval_accuracy": 0.9678878335594754, |
|
"eval_f1": 0.9678102645900477, |
|
"eval_loss": 0.11535227298736572, |
|
"eval_precision": 0.9679181198554704, |
|
"eval_recall": 0.9678878335594754, |
|
"eval_runtime": 56.5576, |
|
"eval_samples_per_second": 39.093, |
|
"eval_steps_per_second": 4.898, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 10.186046511627907, |
|
"grad_norm": 12.95783805847168, |
|
"learning_rate": 1.2889860976963542e-06, |
|
"loss": 0.0857, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 10.496124031007753, |
|
"grad_norm": 3.627340078353882, |
|
"learning_rate": 8.812868922607565e-07, |
|
"loss": 0.0666, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 10.496124031007753, |
|
"eval_accuracy": 0.9701492537313433, |
|
"eval_f1": 0.9701426609443169, |
|
"eval_loss": 0.11616706103086472, |
|
"eval_precision": 0.9701377402873191, |
|
"eval_recall": 0.9701492537313433, |
|
"eval_runtime": 56.4987, |
|
"eval_samples_per_second": 39.134, |
|
"eval_steps_per_second": 4.903, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 10.806201550387597, |
|
"grad_norm": 9.41781997680664, |
|
"learning_rate": 5.480228449774882e-07, |
|
"loss": 0.0722, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 11.111627906976745, |
|
"grad_norm": 12.687678337097168, |
|
"learning_rate": 2.9626582353969756e-07, |
|
"loss": 0.0732, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 11.111627906976745, |
|
"eval_accuracy": 0.9678878335594754, |
|
"eval_f1": 0.9678179490403084, |
|
"eval_loss": 0.11334193497896194, |
|
"eval_precision": 0.9678997125749722, |
|
"eval_recall": 0.9678878335594754, |
|
"eval_runtime": 56.7681, |
|
"eval_samples_per_second": 38.948, |
|
"eval_steps_per_second": 4.88, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 11.421705426356588, |
|
"grad_norm": 13.630194664001465, |
|
"learning_rate": 1.1780223451346994e-07, |
|
"loss": 0.0868, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 11.731782945736434, |
|
"grad_norm": 17.37832260131836, |
|
"learning_rate": 2.0006053801937543e-08, |
|
"loss": 0.0775, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 11.731782945736434, |
|
"eval_accuracy": 0.968340117593849, |
|
"eval_f1": 0.9682674370094175, |
|
"eval_loss": 0.112978994846344, |
|
"eval_precision": 0.9683629530510336, |
|
"eval_recall": 0.968340117593849, |
|
"eval_runtime": 56.6602, |
|
"eval_samples_per_second": 39.022, |
|
"eval_steps_per_second": 4.889, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 11.930232558139535, |
|
"step": 1932, |
|
"total_flos": 1.7667354394198278e+19, |
|
"train_loss": 0.17970547851321614, |
|
"train_runtime": 5653.5088, |
|
"train_samples_per_second": 10.948, |
|
"train_steps_per_second": 0.342 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1932, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7667354394198278e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|