|
{ |
|
"best_metric": 0.26184663, |
|
"best_model_checkpoint": "/home/patrickbarker/output/qwen2-vl-7b-instruct/v0-20241218-000939/checkpoint-100", |
|
"epoch": 5.0, |
|
"eval_steps": 200, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"acc": 0.78614354, |
|
"epoch": 0.05, |
|
"grad_norm": 32.0172921873113, |
|
"learning_rate": 0.0, |
|
"loss": 1.30341828, |
|
"memory(GiB)": 54.56, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.023493 |
|
}, |
|
{ |
|
"acc": 0.80234951, |
|
"epoch": 0.25, |
|
"grad_norm": 37.62154000246541, |
|
"learning_rate": 1e-05, |
|
"loss": 1.23205733, |
|
"memory(GiB)": 74.75, |
|
"step": 5, |
|
"train_speed(iter/s)": 0.030873 |
|
}, |
|
{ |
|
"acc": 0.83633699, |
|
"epoch": 0.5, |
|
"grad_norm": 74.22787353731742, |
|
"learning_rate": 9.93181333636191e-06, |
|
"loss": 0.93288889, |
|
"memory(GiB)": 74.75, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.032122 |
|
}, |
|
{ |
|
"acc": 0.84369278, |
|
"epoch": 0.75, |
|
"grad_norm": 5.440459167288553, |
|
"learning_rate": 9.729113299882324e-06, |
|
"loss": 0.84061708, |
|
"memory(GiB)": 74.75, |
|
"step": 15, |
|
"train_speed(iter/s)": 0.032554 |
|
}, |
|
{ |
|
"acc": 0.86311598, |
|
"epoch": 1.0, |
|
"grad_norm": 7.873547600114794, |
|
"learning_rate": 9.397429019156841e-06, |
|
"loss": 0.72119961, |
|
"memory(GiB)": 74.75, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.032776 |
|
}, |
|
{ |
|
"acc": 0.885886, |
|
"epoch": 1.25, |
|
"grad_norm": 5.745656860997658, |
|
"learning_rate": 8.94580797672727e-06, |
|
"loss": 0.77134295, |
|
"memory(GiB)": 74.75, |
|
"step": 25, |
|
"train_speed(iter/s)": 0.032863 |
|
}, |
|
{ |
|
"acc": 0.8865592, |
|
"epoch": 1.5, |
|
"grad_norm": 16.573461756273073, |
|
"learning_rate": 8.386569217342893e-06, |
|
"loss": 0.61206017, |
|
"memory(GiB)": 74.75, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.032959 |
|
}, |
|
{ |
|
"acc": 0.89316502, |
|
"epoch": 1.75, |
|
"grad_norm": 5.34427710104793, |
|
"learning_rate": 7.734967316533076e-06, |
|
"loss": 0.64310627, |
|
"memory(GiB)": 74.75, |
|
"step": 35, |
|
"train_speed(iter/s)": 0.033027 |
|
}, |
|
{ |
|
"acc": 0.89337797, |
|
"epoch": 2.0, |
|
"grad_norm": 3.3194243420102083, |
|
"learning_rate": 7.008776275552522e-06, |
|
"loss": 0.58372121, |
|
"memory(GiB)": 74.75, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.03308 |
|
}, |
|
{ |
|
"acc": 0.91168728, |
|
"epoch": 2.25, |
|
"grad_norm": 1.99209188413019, |
|
"learning_rate": 6.2278046929604265e-06, |
|
"loss": 0.4987381, |
|
"memory(GiB)": 74.75, |
|
"step": 45, |
|
"train_speed(iter/s)": 0.033097 |
|
}, |
|
{ |
|
"acc": 0.90439634, |
|
"epoch": 2.5, |
|
"grad_norm": 7.169335908534055, |
|
"learning_rate": 5.413355437688926e-06, |
|
"loss": 0.54837952, |
|
"memory(GiB)": 74.75, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.033134 |
|
}, |
|
{ |
|
"acc": 0.90367451, |
|
"epoch": 2.75, |
|
"grad_norm": 5.395325589957545, |
|
"learning_rate": 4.587644562311076e-06, |
|
"loss": 0.53503237, |
|
"memory(GiB)": 74.75, |
|
"step": 55, |
|
"train_speed(iter/s)": 0.033163 |
|
}, |
|
{ |
|
"acc": 0.92254467, |
|
"epoch": 3.0, |
|
"grad_norm": 2.5695200478983646, |
|
"learning_rate": 3.773195307039575e-06, |
|
"loss": 0.40150399, |
|
"memory(GiB)": 74.75, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.033186 |
|
}, |
|
{ |
|
"acc": 0.9301857, |
|
"epoch": 3.25, |
|
"grad_norm": 3.1556048007324367, |
|
"learning_rate": 2.9922237244474807e-06, |
|
"loss": 0.38734879, |
|
"memory(GiB)": 74.75, |
|
"step": 65, |
|
"train_speed(iter/s)": 0.033189 |
|
}, |
|
{ |
|
"acc": 0.91883011, |
|
"epoch": 3.5, |
|
"grad_norm": 17.287645910961686, |
|
"learning_rate": 2.266032683466928e-06, |
|
"loss": 0.44510117, |
|
"memory(GiB)": 74.75, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.033209 |
|
}, |
|
{ |
|
"acc": 0.93858633, |
|
"epoch": 3.75, |
|
"grad_norm": 5.109693472015538, |
|
"learning_rate": 1.6144307826571085e-06, |
|
"loss": 0.36709385, |
|
"memory(GiB)": 74.75, |
|
"step": 75, |
|
"train_speed(iter/s)": 0.033226 |
|
}, |
|
{ |
|
"acc": 0.94115381, |
|
"epoch": 4.0, |
|
"grad_norm": 3.535417356581457, |
|
"learning_rate": 1.0551920232727309e-06, |
|
"loss": 0.34772708, |
|
"memory(GiB)": 74.75, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.03324 |
|
}, |
|
{ |
|
"acc": 0.94864588, |
|
"epoch": 4.25, |
|
"grad_norm": 2.6450151758477927, |
|
"learning_rate": 6.035709808431585e-07, |
|
"loss": 0.30039248, |
|
"memory(GiB)": 74.75, |
|
"step": 85, |
|
"train_speed(iter/s)": 0.033238 |
|
}, |
|
{ |
|
"acc": 0.95026588, |
|
"epoch": 4.5, |
|
"grad_norm": 2.3681008746117964, |
|
"learning_rate": 2.7188670011767715e-07, |
|
"loss": 0.28691387, |
|
"memory(GiB)": 74.75, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.033251 |
|
}, |
|
{ |
|
"acc": 0.94560108, |
|
"epoch": 4.75, |
|
"grad_norm": 4.066631026473562, |
|
"learning_rate": 6.918666363808976e-08, |
|
"loss": 0.29291582, |
|
"memory(GiB)": 74.75, |
|
"step": 95, |
|
"train_speed(iter/s)": 0.033261 |
|
}, |
|
{ |
|
"acc": 0.95544872, |
|
"epoch": 5.0, |
|
"grad_norm": 3.609028127209203, |
|
"learning_rate": 1e-09, |
|
"loss": 0.26392193, |
|
"memory(GiB)": 74.75, |
|
"step": 100, |
|
"train_speed(iter/s)": 0.033271 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_acc": 0.8952164009111617, |
|
"eval_loss": 0.2618466317653656, |
|
"eval_runtime": 23.5294, |
|
"eval_samples_per_second": 1.488, |
|
"eval_steps_per_second": 0.213, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 240537459228672.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|