{ "best_metric": 0.9701426609443169, "best_model_checkpoint": "mit-b4-finetuned-stroke-binary/checkpoint-1700", "epoch": 11.930232558139535, "eval_steps": 100, "global_step": 1932, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.31007751937984496, "grad_norm": 2.3132071495056152, "learning_rate": 5.154639175257732e-06, "loss": 0.6722, "step": 50 }, { "epoch": 0.6201550387596899, "grad_norm": 6.47659969329834, "learning_rate": 1.0309278350515464e-05, "loss": 0.5714, "step": 100 }, { "epoch": 0.6201550387596899, "eval_accuracy": 0.7878787878787878, "eval_f1": 0.7800219254126745, "eval_loss": 0.477566123008728, "eval_precision": 0.7900169125975578, "eval_recall": 0.7878787878787878, "eval_runtime": 56.589, "eval_samples_per_second": 39.071, "eval_steps_per_second": 4.895, "step": 100 }, { "epoch": 0.9302325581395349, "grad_norm": 4.08937406539917, "learning_rate": 1.5360824742268042e-05, "loss": 0.4471, "step": 150 }, { "epoch": 1.235658914728682, "grad_norm": 8.325615882873535, "learning_rate": 1.9999738610404825e-05, "loss": 0.3897, "step": 200 }, { "epoch": 1.235658914728682, "eval_accuracy": 0.8715513342379014, "eval_f1": 0.8704450429260716, "eval_loss": 0.3238992393016815, "eval_precision": 0.8710997928702509, "eval_recall": 0.8715513342379014, "eval_runtime": 56.7955, "eval_samples_per_second": 38.929, "eval_steps_per_second": 4.877, "step": 200 }, { "epoch": 1.5457364341085271, "grad_norm": 8.041431427001953, "learning_rate": 1.9952399350448247e-05, "loss": 0.3177, "step": 250 }, { "epoch": 1.8558139534883722, "grad_norm": 7.284913063049316, "learning_rate": 1.9823819633544185e-05, "loss": 0.2951, "step": 300 }, { "epoch": 1.8558139534883722, "eval_accuracy": 0.8765264586160109, "eval_f1": 0.8723990420158877, "eval_loss": 0.31197357177734375, "eval_precision": 0.885806076698763, "eval_recall": 0.8765264586160109, "eval_runtime": 56.5628, "eval_samples_per_second": 39.089, "eval_steps_per_second": 4.897, "step": 300 }, { "epoch": 2.1612403100775195, "grad_norm": 3.927116870880127, "learning_rate": 1.9615049043274207e-05, "loss": 0.2786, "step": 350 }, { "epoch": 2.471317829457364, "grad_norm": 13.424771308898926, "learning_rate": 1.932779175343134e-05, "loss": 0.23, "step": 400 }, { "epoch": 2.471317829457364, "eval_accuracy": 0.9280868385345997, "eval_f1": 0.9271058471970156, "eval_loss": 0.1993584781885147, "eval_precision": 0.9303851930689407, "eval_recall": 0.9280868385345997, "eval_runtime": 56.4712, "eval_samples_per_second": 39.153, "eval_steps_per_second": 4.905, "step": 400 }, { "epoch": 2.781395348837209, "grad_norm": 11.889705657958984, "learning_rate": 1.8964392617017013e-05, "loss": 0.2424, "step": 450 }, { "epoch": 3.0868217054263565, "grad_norm": 14.436951637268066, "learning_rate": 1.8527818025436662e-05, "loss": 0.2135, "step": 500 }, { "epoch": 3.0868217054263565, "eval_accuracy": 0.9280868385345997, "eval_f1": 0.9266634060111499, "eval_loss": 0.2157154679298401, "eval_precision": 0.9332581537403823, "eval_recall": 0.9280868385345997, "eval_runtime": 56.599, "eval_samples_per_second": 39.064, "eval_steps_per_second": 4.894, "step": 500 }, { "epoch": 3.3968992248062015, "grad_norm": 16.79993438720703, "learning_rate": 1.802163169413846e-05, "loss": 0.1746, "step": 550 }, { "epoch": 3.7069767441860466, "grad_norm": 14.388134002685547, "learning_rate": 1.7449965572354675e-05, "loss": 0.2106, "step": 600 }, { "epoch": 3.7069767441860466, "eval_accuracy": 0.9380370872908186, "eval_f1": 0.9382102726921231, "eval_loss": 0.18085584044456482, "eval_precision": 0.9386737275368563, "eval_recall": 0.9380370872908186, "eval_runtime": 56.6285, "eval_samples_per_second": 39.044, "eval_steps_per_second": 4.892, "step": 600 }, { "epoch": 4.0124031007751935, "grad_norm": 14.440871238708496, "learning_rate": 1.68174861144065e-05, "loss": 0.1802, "step": 650 }, { "epoch": 4.322480620155039, "grad_norm": 6.46437931060791, "learning_rate": 1.612935618789643e-05, "loss": 0.1576, "step": 700 }, { "epoch": 4.322480620155039, "eval_accuracy": 0.9402985074626866, "eval_f1": 0.9403501047780813, "eval_loss": 0.1628771722316742, "eval_precision": 0.9404274341623059, "eval_recall": 0.9402985074626866, "eval_runtime": 56.5976, "eval_samples_per_second": 39.065, "eval_steps_per_second": 4.894, "step": 700 }, { "epoch": 4.632558139534884, "grad_norm": 12.626237869262695, "learning_rate": 1.5391192929727884e-05, "loss": 0.1752, "step": 750 }, { "epoch": 4.942635658914728, "grad_norm": 20.061859130859375, "learning_rate": 1.460902189396916e-05, "loss": 0.1434, "step": 800 }, { "epoch": 4.942635658914728, "eval_accuracy": 0.9543193125282677, "eval_f1": 0.9541868361381168, "eval_loss": 0.1526043862104416, "eval_precision": 0.9543134613523963, "eval_recall": 0.9543193125282677, "eval_runtime": 56.4759, "eval_samples_per_second": 39.149, "eval_steps_per_second": 4.905, "step": 800 }, { "epoch": 5.248062015503876, "grad_norm": 11.535951614379883, "learning_rate": 1.3789227865848282e-05, "loss": 0.1601, "step": 850 }, { "epoch": 5.558139534883721, "grad_norm": 9.65259838104248, "learning_rate": 1.2938502743379212e-05, "loss": 0.1391, "step": 900 }, { "epoch": 5.558139534883721, "eval_accuracy": 0.9574853007688828, "eval_f1": 0.9574664394664735, "eval_loss": 0.1268271952867508, "eval_precision": 0.9574541746977356, "eval_recall": 0.9574853007688828, "eval_runtime": 56.6517, "eval_samples_per_second": 39.028, "eval_steps_per_second": 4.89, "step": 900 }, { "epoch": 5.868217054263566, "grad_norm": 7.883506774902344, "learning_rate": 1.2063790912056577e-05, "loss": 0.1605, "step": 950 }, { "epoch": 6.173643410852713, "grad_norm": 9.489595413208008, "learning_rate": 1.1172232558519983e-05, "loss": 0.1048, "step": 1000 }, { "epoch": 6.173643410852713, "eval_accuracy": 0.9556761646313885, "eval_f1": 0.9555092018481103, "eval_loss": 0.14889651536941528, "eval_precision": 0.9557688265871488, "eval_recall": 0.9556761646313885, "eval_runtime": 56.5428, "eval_samples_per_second": 39.103, "eval_steps_per_second": 4.899, "step": 1000 }, { "epoch": 6.4837209302325585, "grad_norm": 5.59963846206665, "learning_rate": 1.0271105385912779e-05, "loss": 0.116, "step": 1050 }, { "epoch": 6.793798449612403, "grad_norm": 4.873887538909912, "learning_rate": 9.367765206707174e-06, "loss": 0.1271, "step": 1100 }, { "epoch": 6.793798449612403, "eval_accuracy": 0.9570330167345092, "eval_f1": 0.9566191699282054, "eval_loss": 0.14482761919498444, "eval_precision": 0.9586051623091093, "eval_recall": 0.9570330167345092, "eval_runtime": 56.8397, "eval_samples_per_second": 38.899, "eval_steps_per_second": 4.873, "step": 1100 }, { "epoch": 7.09922480620155, "grad_norm": 4.528378963470459, "learning_rate": 8.469585897930557e-06, "loss": 0.1333, "step": 1150 }, { "epoch": 7.409302325581395, "grad_norm": 1.5636117458343506, "learning_rate": 7.583899208932648e-06, "loss": 0.091, "step": 1200 }, { "epoch": 7.409302325581395, "eval_accuracy": 0.9570330167345092, "eval_f1": 0.9567056534394107, "eval_loss": 0.14507929980754852, "eval_precision": 0.9579537259191305, "eval_recall": 0.9570330167345092, "eval_runtime": 56.7, "eval_samples_per_second": 38.995, "eval_steps_per_second": 4.885, "step": 1200 }, { "epoch": 7.7193798449612405, "grad_norm": 5.480973720550537, "learning_rate": 6.7179349130367235e-06, "loss": 0.1089, "step": 1250 }, { "epoch": 8.024806201550387, "grad_norm": 6.6158223152160645, "learning_rate": 5.878761791611129e-06, "loss": 0.1159, "step": 1300 }, { "epoch": 8.024806201550387, "eval_accuracy": 0.9629127091813658, "eval_f1": 0.9626761639787419, "eval_loss": 0.1205127015709877, "eval_precision": 0.9635871559570208, "eval_recall": 0.9629127091813658, "eval_runtime": 56.6004, "eval_samples_per_second": 39.063, "eval_steps_per_second": 4.894, "step": 1300 }, { "epoch": 8.334883720930232, "grad_norm": 5.2470316886901855, "learning_rate": 5.073229932302277e-06, "loss": 0.0886, "step": 1350 }, { "epoch": 8.644961240310078, "grad_norm": 6.15119743347168, "learning_rate": 4.307914812442993e-06, "loss": 0.1151, "step": 1400 }, { "epoch": 8.644961240310078, "eval_accuracy": 0.9665309814563546, "eval_f1": 0.9664379980687814, "eval_loss": 0.11242391169071198, "eval_precision": 0.9665871538113867, "eval_recall": 0.9665309814563546, "eval_runtime": 56.6713, "eval_samples_per_second": 39.014, "eval_steps_per_second": 4.888, "step": 1400 }, { "epoch": 8.955038759689922, "grad_norm": 12.04592227935791, "learning_rate": 3.589063624077802e-06, "loss": 0.0798, "step": 1450 }, { "epoch": 9.26046511627907, "grad_norm": 13.314713478088379, "learning_rate": 2.922544278748801e-06, "loss": 0.0735, "step": 1500 }, { "epoch": 9.26046511627907, "eval_accuracy": 0.9642695612844867, "eval_f1": 0.9641123145223969, "eval_loss": 0.11749936640262604, "eval_precision": 0.9645326112328965, "eval_recall": 0.9642695612844867, "eval_runtime": 56.8212, "eval_samples_per_second": 38.912, "eval_steps_per_second": 4.875, "step": 1500 }, { "epoch": 9.570542635658915, "grad_norm": 1.456084132194519, "learning_rate": 2.3137975083109153e-06, "loss": 0.0746, "step": 1550 }, { "epoch": 9.88062015503876, "grad_norm": 2.1265344619750977, "learning_rate": 1.7677924527729228e-06, "loss": 0.0537, "step": 1600 }, { "epoch": 9.88062015503876, "eval_accuracy": 0.9678878335594754, "eval_f1": 0.9678102645900477, "eval_loss": 0.11535227298736572, "eval_precision": 0.9679181198554704, "eval_recall": 0.9678878335594754, "eval_runtime": 56.5576, "eval_samples_per_second": 39.093, "eval_steps_per_second": 4.898, "step": 1600 }, { "epoch": 10.186046511627907, "grad_norm": 12.95783805847168, "learning_rate": 1.2889860976963542e-06, "loss": 0.0857, "step": 1650 }, { "epoch": 10.496124031007753, "grad_norm": 3.627340078353882, "learning_rate": 8.812868922607565e-07, "loss": 0.0666, "step": 1700 }, { "epoch": 10.496124031007753, "eval_accuracy": 0.9701492537313433, "eval_f1": 0.9701426609443169, "eval_loss": 0.11616706103086472, "eval_precision": 0.9701377402873191, "eval_recall": 0.9701492537313433, "eval_runtime": 56.4987, "eval_samples_per_second": 39.134, "eval_steps_per_second": 4.903, "step": 1700 }, { "epoch": 10.806201550387597, "grad_norm": 9.41781997680664, "learning_rate": 5.480228449774882e-07, "loss": 0.0722, "step": 1750 }, { "epoch": 11.111627906976745, "grad_norm": 12.687678337097168, "learning_rate": 2.9626582353969756e-07, "loss": 0.0732, "step": 1800 }, { "epoch": 11.111627906976745, "eval_accuracy": 0.9678878335594754, "eval_f1": 0.9678179490403084, "eval_loss": 0.11334193497896194, "eval_precision": 0.9678997125749722, "eval_recall": 0.9678878335594754, "eval_runtime": 56.7681, "eval_samples_per_second": 38.948, "eval_steps_per_second": 4.88, "step": 1800 }, { "epoch": 11.421705426356588, "grad_norm": 13.630194664001465, "learning_rate": 1.1780223451346994e-07, "loss": 0.0868, "step": 1850 }, { "epoch": 11.731782945736434, "grad_norm": 17.37832260131836, "learning_rate": 2.0006053801937543e-08, "loss": 0.0775, "step": 1900 }, { "epoch": 11.731782945736434, "eval_accuracy": 0.968340117593849, "eval_f1": 0.9682674370094175, "eval_loss": 0.112978994846344, "eval_precision": 0.9683629530510336, "eval_recall": 0.968340117593849, "eval_runtime": 56.6602, "eval_samples_per_second": 39.022, "eval_steps_per_second": 4.889, "step": 1900 }, { "epoch": 11.930232558139535, "step": 1932, "total_flos": 1.7667354394198278e+19, "train_loss": 0.17970547851321614, "train_runtime": 5653.5088, "train_samples_per_second": 10.948, "train_steps_per_second": 0.342 } ], "logging_steps": 50, "max_steps": 1932, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7667354394198278e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }