|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.017106079072850514, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.553039536425257e-05, |
|
"eval_loss": 2.3803133964538574, |
|
"eval_runtime": 144.7679, |
|
"eval_samples_per_second": 34.006, |
|
"eval_steps_per_second": 17.007, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008553039536425257, |
|
"grad_norm": 5.870095252990723, |
|
"learning_rate": 0.0002, |
|
"loss": 8.9446, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0017106079072850514, |
|
"grad_norm": 4.383564472198486, |
|
"learning_rate": 0.0002, |
|
"loss": 8.8167, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002565911860927577, |
|
"grad_norm": 7.525699615478516, |
|
"learning_rate": 0.0002, |
|
"loss": 9.0408, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003421215814570103, |
|
"grad_norm": 4.683404922485352, |
|
"learning_rate": 0.0002, |
|
"loss": 8.1013, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004276519768212628, |
|
"grad_norm": 7.45261812210083, |
|
"learning_rate": 0.0002, |
|
"loss": 8.7849, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004276519768212628, |
|
"eval_loss": 2.1148810386657715, |
|
"eval_runtime": 142.9461, |
|
"eval_samples_per_second": 34.44, |
|
"eval_steps_per_second": 17.223, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005131823721855154, |
|
"grad_norm": 4.496049880981445, |
|
"learning_rate": 0.0002, |
|
"loss": 8.26, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.00598712767549768, |
|
"grad_norm": 3.857893466949463, |
|
"learning_rate": 0.0002, |
|
"loss": 8.4083, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.006842431629140206, |
|
"grad_norm": 5.40677547454834, |
|
"learning_rate": 0.0002, |
|
"loss": 8.095, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.007697735582782732, |
|
"grad_norm": 5.3933563232421875, |
|
"learning_rate": 0.0002, |
|
"loss": 8.202, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.008553039536425257, |
|
"grad_norm": 5.276631832122803, |
|
"learning_rate": 0.0002, |
|
"loss": 8.2085, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.008553039536425257, |
|
"eval_loss": 2.0359315872192383, |
|
"eval_runtime": 143.1344, |
|
"eval_samples_per_second": 34.394, |
|
"eval_steps_per_second": 17.201, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.009408343490067783, |
|
"grad_norm": 5.116215705871582, |
|
"learning_rate": 0.0002, |
|
"loss": 7.4807, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.010263647443710309, |
|
"grad_norm": 6.443859100341797, |
|
"learning_rate": 0.0002, |
|
"loss": 8.0507, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.011118951397352835, |
|
"grad_norm": 4.91663122177124, |
|
"learning_rate": 0.0002, |
|
"loss": 8.4953, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01197425535099536, |
|
"grad_norm": 4.3198699951171875, |
|
"learning_rate": 0.0002, |
|
"loss": 7.7724, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.012829559304637885, |
|
"grad_norm": 5.600128650665283, |
|
"learning_rate": 0.0002, |
|
"loss": 7.558, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.012829559304637885, |
|
"eval_loss": 1.9934526681900024, |
|
"eval_runtime": 143.0887, |
|
"eval_samples_per_second": 34.405, |
|
"eval_steps_per_second": 17.206, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.013684863258280411, |
|
"grad_norm": 4.960232257843018, |
|
"learning_rate": 0.0002, |
|
"loss": 7.9318, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.014540167211922937, |
|
"grad_norm": 5.194616794586182, |
|
"learning_rate": 0.0002, |
|
"loss": 7.9618, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.015395471165565463, |
|
"grad_norm": 4.613633632659912, |
|
"learning_rate": 0.0002, |
|
"loss": 7.2951, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01625077511920799, |
|
"grad_norm": 5.5177507400512695, |
|
"learning_rate": 0.0002, |
|
"loss": 7.6231, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.017106079072850514, |
|
"grad_norm": 13.841983795166016, |
|
"learning_rate": 0.0002, |
|
"loss": 8.0881, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.017106079072850514, |
|
"eval_loss": 1.9552608728408813, |
|
"eval_runtime": 143.1864, |
|
"eval_samples_per_second": 34.382, |
|
"eval_steps_per_second": 17.194, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.534314936519885e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|