|
{ |
|
"best_metric": 1.1881097555160522, |
|
"best_model_checkpoint": "../facebook/nllb-200-3.3B-finetuned/checkpoint-1000", |
|
"epoch": 5000.0, |
|
"eval_steps": 500, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 500.0, |
|
"grad_norm": 13.488256454467773, |
|
"learning_rate": 9.758186548415274e-06, |
|
"loss": 4.34, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"eval_gen_len": 26.0, |
|
"eval_loss": 2.901174306869507, |
|
"eval_rouge": 0.05, |
|
"eval_runtime": 7.0444, |
|
"eval_samples_per_second": 0.284, |
|
"eval_steps_per_second": 0.142, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1000.0, |
|
"grad_norm": 0.9894864559173584, |
|
"learning_rate": 9.050617527877911e-06, |
|
"loss": 0.609, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1000.0, |
|
"eval_gen_len": 26.0, |
|
"eval_loss": 1.1881097555160522, |
|
"eval_rouge": 0.098, |
|
"eval_runtime": 1.1556, |
|
"eval_samples_per_second": 1.731, |
|
"eval_steps_per_second": 0.865, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1500.0, |
|
"grad_norm": 0.1642373949289322, |
|
"learning_rate": 7.946545841400035e-06, |
|
"loss": 0.0196, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1500.0, |
|
"eval_gen_len": 30.5, |
|
"eval_loss": 1.5324629545211792, |
|
"eval_rouge": 0.1493, |
|
"eval_runtime": 6.9442, |
|
"eval_samples_per_second": 0.288, |
|
"eval_steps_per_second": 0.144, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2000.0, |
|
"grad_norm": 0.08364015817642212, |
|
"learning_rate": 6.554045718173867e-06, |
|
"loss": 0.0061, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2000.0, |
|
"eval_gen_len": 33.5, |
|
"eval_loss": 1.5447587966918945, |
|
"eval_rouge": 0.125, |
|
"eval_runtime": 6.9455, |
|
"eval_samples_per_second": 0.288, |
|
"eval_steps_per_second": 0.144, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2500.0, |
|
"grad_norm": 0.05517512932419777, |
|
"learning_rate": 5.0094247723796405e-06, |
|
"loss": 0.0036, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2500.0, |
|
"eval_gen_len": 33.5, |
|
"eval_loss": 1.5625241994857788, |
|
"eval_rouge": 0.125, |
|
"eval_runtime": 6.9319, |
|
"eval_samples_per_second": 0.289, |
|
"eval_steps_per_second": 0.144, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3000.0, |
|
"grad_norm": 0.04180261120200157, |
|
"learning_rate": 3.463881264198645e-06, |
|
"loss": 0.0025, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3000.0, |
|
"eval_gen_len": 33.5, |
|
"eval_loss": 1.5641270875930786, |
|
"eval_rouge": 0.125, |
|
"eval_runtime": 6.9474, |
|
"eval_samples_per_second": 0.288, |
|
"eval_steps_per_second": 0.144, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3500.0, |
|
"grad_norm": 0.03480682149529457, |
|
"learning_rate": 2.0687037606464554e-06, |
|
"loss": 0.002, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3500.0, |
|
"eval_gen_len": 33.5, |
|
"eval_loss": 1.5625765323638916, |
|
"eval_rouge": 0.125, |
|
"eval_runtime": 6.9229, |
|
"eval_samples_per_second": 0.289, |
|
"eval_steps_per_second": 0.144, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4000.0, |
|
"grad_norm": 0.031249279156327248, |
|
"learning_rate": 9.60461956544021e-07, |
|
"loss": 0.0017, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4000.0, |
|
"eval_gen_len": 24.5, |
|
"eval_loss": 1.4340063333511353, |
|
"eval_rouge": 0.0357, |
|
"eval_runtime": 6.9464, |
|
"eval_samples_per_second": 0.288, |
|
"eval_steps_per_second": 0.144, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4500.0, |
|
"grad_norm": 0.029812365770339966, |
|
"learning_rate": 2.4763828125157654e-07, |
|
"loss": 0.0016, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4500.0, |
|
"eval_gen_len": 24.5, |
|
"eval_loss": 1.4486483335494995, |
|
"eval_rouge": 0.0357, |
|
"eval_runtime": 6.9642, |
|
"eval_samples_per_second": 0.287, |
|
"eval_steps_per_second": 0.144, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5000.0, |
|
"grad_norm": 0.029535507783293724, |
|
"learning_rate": 8.882641330809627e-12, |
|
"loss": 0.0016, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5000.0, |
|
"eval_gen_len": 24.5, |
|
"eval_loss": 1.4524964094161987, |
|
"eval_rouge": 0.0357, |
|
"eval_runtime": 6.9555, |
|
"eval_samples_per_second": 0.288, |
|
"eval_steps_per_second": 0.144, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5000, |
|
"save_steps": 500, |
|
"total_flos": 3.181132972032e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|