|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.963855421686747, |
|
"eval_steps": 10, |
|
"global_step": 186, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1606425702811245, |
|
"grad_norm": 8.837871551513672, |
|
"learning_rate": 0.000189247311827957, |
|
"loss": 8.4808, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1606425702811245, |
|
"eval_loss": 0.47449949383735657, |
|
"eval_runtime": 44.2521, |
|
"eval_samples_per_second": 11.276, |
|
"eval_steps_per_second": 2.825, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.321285140562249, |
|
"grad_norm": 3.9745054244995117, |
|
"learning_rate": 0.00017849462365591398, |
|
"loss": 3.0872, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.321285140562249, |
|
"eval_loss": 0.3382565677165985, |
|
"eval_runtime": 48.3781, |
|
"eval_samples_per_second": 10.315, |
|
"eval_steps_per_second": 2.584, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4819277108433735, |
|
"grad_norm": 3.2844247817993164, |
|
"learning_rate": 0.00016774193548387098, |
|
"loss": 2.7654, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4819277108433735, |
|
"eval_loss": 0.3172107934951782, |
|
"eval_runtime": 50.1957, |
|
"eval_samples_per_second": 9.941, |
|
"eval_steps_per_second": 2.49, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.642570281124498, |
|
"grad_norm": 2.986614465713501, |
|
"learning_rate": 0.00015698924731182796, |
|
"loss": 2.4167, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.642570281124498, |
|
"eval_loss": 0.3065057694911957, |
|
"eval_runtime": 49.9091, |
|
"eval_samples_per_second": 9.998, |
|
"eval_steps_per_second": 2.505, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8032128514056225, |
|
"grad_norm": 2.47356915473938, |
|
"learning_rate": 0.00014623655913978496, |
|
"loss": 2.5307, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8032128514056225, |
|
"eval_loss": 0.296438992023468, |
|
"eval_runtime": 45.7506, |
|
"eval_samples_per_second": 10.907, |
|
"eval_steps_per_second": 2.732, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.963855421686747, |
|
"grad_norm": 2.9974865913391113, |
|
"learning_rate": 0.00013548387096774193, |
|
"loss": 2.48, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.963855421686747, |
|
"eval_loss": 0.2858414649963379, |
|
"eval_runtime": 47.1672, |
|
"eval_samples_per_second": 10.579, |
|
"eval_steps_per_second": 2.65, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.1124497991967872, |
|
"grad_norm": 2.8883159160614014, |
|
"learning_rate": 0.00012473118279569893, |
|
"loss": 1.9267, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1124497991967872, |
|
"eval_loss": 0.28903448581695557, |
|
"eval_runtime": 45.0342, |
|
"eval_samples_per_second": 11.08, |
|
"eval_steps_per_second": 2.776, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.2730923694779117, |
|
"grad_norm": 2.6527364253997803, |
|
"learning_rate": 0.00011397849462365593, |
|
"loss": 1.9188, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2730923694779117, |
|
"eval_loss": 0.284964382648468, |
|
"eval_runtime": 44.0349, |
|
"eval_samples_per_second": 11.332, |
|
"eval_steps_per_second": 2.839, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.4337349397590362, |
|
"grad_norm": 3.009997606277466, |
|
"learning_rate": 0.0001032258064516129, |
|
"loss": 1.9806, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4337349397590362, |
|
"eval_loss": 0.2806684672832489, |
|
"eval_runtime": 46.0655, |
|
"eval_samples_per_second": 10.832, |
|
"eval_steps_per_second": 2.714, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.5943775100401605, |
|
"grad_norm": 2.434161424636841, |
|
"learning_rate": 9.247311827956989e-05, |
|
"loss": 1.8809, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5943775100401605, |
|
"eval_loss": 0.27644357085227966, |
|
"eval_runtime": 44.1705, |
|
"eval_samples_per_second": 11.297, |
|
"eval_steps_per_second": 2.83, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7550200803212852, |
|
"grad_norm": 3.0396993160247803, |
|
"learning_rate": 8.172043010752689e-05, |
|
"loss": 1.8046, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.7550200803212852, |
|
"eval_loss": 0.27500617504119873, |
|
"eval_runtime": 44.3244, |
|
"eval_samples_per_second": 11.258, |
|
"eval_steps_per_second": 2.82, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.9156626506024095, |
|
"grad_norm": 2.961737632751465, |
|
"learning_rate": 7.096774193548388e-05, |
|
"loss": 1.929, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.9156626506024095, |
|
"eval_loss": 0.2731240689754486, |
|
"eval_runtime": 43.8996, |
|
"eval_samples_per_second": 11.367, |
|
"eval_steps_per_second": 2.847, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0642570281124497, |
|
"grad_norm": 2.265627145767212, |
|
"learning_rate": 6.021505376344086e-05, |
|
"loss": 1.5674, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.0642570281124497, |
|
"eval_loss": 0.2714921832084656, |
|
"eval_runtime": 45.3105, |
|
"eval_samples_per_second": 11.013, |
|
"eval_steps_per_second": 2.759, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.2248995983935744, |
|
"grad_norm": 2.743725061416626, |
|
"learning_rate": 4.9462365591397855e-05, |
|
"loss": 1.5314, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.2248995983935744, |
|
"eval_loss": 0.28298166394233704, |
|
"eval_runtime": 47.8967, |
|
"eval_samples_per_second": 10.418, |
|
"eval_steps_per_second": 2.61, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.3855421686746987, |
|
"grad_norm": 3.021275043487549, |
|
"learning_rate": 3.870967741935484e-05, |
|
"loss": 1.5405, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.3855421686746987, |
|
"eval_loss": 0.2852790653705597, |
|
"eval_runtime": 47.6731, |
|
"eval_samples_per_second": 10.467, |
|
"eval_steps_per_second": 2.622, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.5461847389558234, |
|
"grad_norm": 2.859989881515503, |
|
"learning_rate": 2.7956989247311828e-05, |
|
"loss": 1.4902, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.5461847389558234, |
|
"eval_loss": 0.28039097785949707, |
|
"eval_runtime": 49.8843, |
|
"eval_samples_per_second": 10.003, |
|
"eval_steps_per_second": 2.506, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.7068273092369477, |
|
"grad_norm": 2.8204994201660156, |
|
"learning_rate": 1.7204301075268818e-05, |
|
"loss": 1.4331, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.7068273092369477, |
|
"eval_loss": 0.27951765060424805, |
|
"eval_runtime": 49.9628, |
|
"eval_samples_per_second": 9.987, |
|
"eval_steps_per_second": 2.502, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.8674698795180724, |
|
"grad_norm": 2.8627824783325195, |
|
"learning_rate": 6.451612903225806e-06, |
|
"loss": 1.4442, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.8674698795180724, |
|
"eval_loss": 0.280029833316803, |
|
"eval_runtime": 49.9016, |
|
"eval_samples_per_second": 10.0, |
|
"eval_steps_per_second": 2.505, |
|
"step": 180 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 186, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.632546051948544e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|