{ "best_metric": 0.7092775390280552, "best_model_checkpoint": "levit_128.fb_dist_in1k-finetuned-stroke-binary/checkpoint-600", "epoch": 5.558139534883721, "eval_steps": 100, "global_step": 900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.31007751937984496, "grad_norm": 175.24969482421875, "learning_rate": 1.413793103448276e-06, "loss": 0.6951, "step": 50 }, { "epoch": 0.6201550387596899, "grad_norm": 146.73788452148438, "learning_rate": 3.137931034482759e-06, "loss": 0.695, "step": 100 }, { "epoch": 0.6201550387596899, "eval_accuracy": 0.5364088647670737, "eval_f1": 0.5420516079700721, "eval_loss": NaN, "eval_precision": 0.5540979766391916, "eval_recall": 0.5364088647670737, "eval_runtime": 6.9382, "eval_samples_per_second": 318.669, "eval_steps_per_second": 39.924, "step": 100 }, { "epoch": 0.9302325581395349, "grad_norm": 127.28129577636719, "learning_rate": 4.793103448275862e-06, "loss": 0.6942, "step": 150 }, { "epoch": 1.235658914728682, "grad_norm": 751.1776123046875, "learning_rate": 6.482758620689655e-06, "loss": 0.6804, "step": 200 }, { "epoch": 1.235658914728682, "eval_accuracy": 0.5798281320669381, "eval_f1": 0.5832968695954936, "eval_loss": NaN, "eval_precision": 0.6277845389250567, "eval_recall": 0.5798281320669381, "eval_runtime": 6.9012, "eval_samples_per_second": 320.38, "eval_steps_per_second": 40.138, "step": 200 }, { "epoch": 1.5457364341085271, "grad_norm": 141.23553466796875, "learning_rate": 8.206896551724138e-06, "loss": 0.6896, "step": 250 }, { "epoch": 1.8558139534883722, "grad_norm": 39.63947296142578, "learning_rate": 9.931034482758622e-06, "loss": 0.6821, "step": 300 }, { "epoch": 1.8558139534883722, "eval_accuracy": 0.6232473993668024, "eval_f1": 0.6279682812403823, "eval_loss": NaN, "eval_precision": 0.6412845278993262, "eval_recall": 0.6232473993668024, "eval_runtime": 6.5774, "eval_samples_per_second": 336.153, "eval_steps_per_second": 42.114, "step": 300 }, { "epoch": 2.1612403100775195, "grad_norm": 176.00396728515625, "learning_rate": 1.1655172413793106e-05, "loss": 0.6687, "step": 350 }, { "epoch": 2.471317829457364, "grad_norm": 245.2242889404297, "learning_rate": 1.3379310344827587e-05, "loss": 0.6726, "step": 400 }, { "epoch": 2.471317829457364, "eval_accuracy": 0.6671189507010402, "eval_f1": 0.6711377069586024, "eval_loss": NaN, "eval_precision": 0.6829477528090242, "eval_recall": 0.6671189507010402, "eval_runtime": 6.8482, "eval_samples_per_second": 322.86, "eval_steps_per_second": 40.449, "step": 400 }, { "epoch": 2.781395348837209, "grad_norm": 17503.568359375, "learning_rate": 1.5068965517241381e-05, "loss": 0.6682, "step": 450 }, { "epoch": 3.0868217054263565, "grad_norm": 69.87994384765625, "learning_rate": 1.6793103448275862e-05, "loss": 0.6546, "step": 500 }, { "epoch": 3.0868217054263565, "eval_accuracy": 0.70239710538218, "eval_f1": 0.7020627244507841, "eval_loss": NaN, "eval_precision": 0.7017573042623244, "eval_recall": 0.70239710538218, "eval_runtime": 6.8785, "eval_samples_per_second": 321.438, "eval_steps_per_second": 40.271, "step": 500 }, { "epoch": 3.3968992248062015, "grad_norm": 101.73799133300781, "learning_rate": 1.8517241379310348e-05, "loss": 0.6508, "step": 550 }, { "epoch": 3.7069767441860466, "grad_norm": 103.15814971923828, "learning_rate": 1.9999934702491228e-05, "loss": 0.647, "step": 600 }, { "epoch": 3.7069767441860466, "eval_accuracy": 0.7064676616915423, "eval_f1": 0.7092775390280552, "eval_loss": NaN, "eval_precision": 0.7158812934752514, "eval_recall": 0.7064676616915423, "eval_runtime": 6.9098, "eval_samples_per_second": 319.98, "eval_steps_per_second": 40.088, "step": 600 }, { "epoch": 4.0124031007751935, "grad_norm": 169.00445556640625, "learning_rate": 1.9994312394497127e-05, "loss": 0.6255, "step": 650 }, { "epoch": 4.322480620155039, "grad_norm": 8452.361328125, "learning_rate": 1.997962682133918e-05, "loss": 0.6263, "step": 700 }, { "epoch": 4.322480620155039, "eval_accuracy": 0.6956128448665763, "eval_f1": 0.6991433808448707, "eval_loss": NaN, "eval_precision": 0.709618757975013, "eval_recall": 0.6956128448665763, "eval_runtime": 6.9194, "eval_samples_per_second": 319.537, "eval_steps_per_second": 40.032, "step": 700 }, { "epoch": 4.632558139534884, "grad_norm": 53.048866271972656, "learning_rate": 1.9955891300516223e-05, "loss": 0.6144, "step": 750 }, { "epoch": 4.942635658914728, "grad_norm": 66.52362823486328, "learning_rate": 1.9923127356402373e-05, "loss": 0.6112, "step": 800 }, { "epoch": 4.942635658914728, "eval_accuracy": 0.6766169154228856, "eval_f1": 0.6806907989098179, "eval_loss": NaN, "eval_precision": 0.7122760837780127, "eval_recall": 0.6766169154228856, "eval_runtime": 6.6311, "eval_samples_per_second": 333.428, "eval_steps_per_second": 41.773, "step": 800 }, { "epoch": 5.248062015503876, "grad_norm": 39.8150634765625, "learning_rate": 1.9881364700727827e-05, "loss": 0.5889, "step": 850 }, { "epoch": 5.558139534883721, "grad_norm": 156.32704162597656, "learning_rate": 1.9830641205634987e-05, "loss": 0.5704, "step": 900 }, { "epoch": 5.558139534883721, "eval_accuracy": 0.6974219810040706, "eval_f1": 0.7013769772020675, "eval_loss": NaN, "eval_precision": 0.7297910740241028, "eval_recall": 0.6974219810040706, "eval_runtime": 6.7688, "eval_samples_per_second": 326.644, "eval_steps_per_second": 40.923, "step": 900 }, { "epoch": 5.558139534883721, "step": 900, "total_flos": 2.1868569076064256e+17, "train_loss": 0.6519456799825033, "train_runtime": 297.561, "train_samples_per_second": 624.033, "train_steps_per_second": 19.478 } ], "logging_steps": 50, "max_steps": 5796, "num_input_tokens_seen": 0, "num_train_epochs": 36, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.1868569076064256e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }