Upload LoRa adapter model with Rank = 64. Using oversampling technique to tackle the issue of class imbalance.
a03b535
verified
{ | |
"best_global_step": 2500, | |
"best_metric": 0.3258962035179138, | |
"best_model_checkpoint": "output/checkpoint-2500", | |
"epoch": 3.0, | |
"eval_steps": 500, | |
"global_step": 2607, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.11514104778353483, | |
"grad_norm": 0.36587607860565186, | |
"learning_rate": 7.586206896551724e-05, | |
"loss": 1.271, | |
"mean_token_accuracy": 0.7241690966486931, | |
"num_tokens": 1095302.0, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.23028209556706966, | |
"grad_norm": 0.41935256123542786, | |
"learning_rate": 0.0001524904214559387, | |
"loss": 0.8833, | |
"mean_token_accuracy": 0.7880920493602752, | |
"num_tokens": 2219842.0, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.3454231433506045, | |
"grad_norm": 0.4040184020996094, | |
"learning_rate": 0.0001998705544249015, | |
"loss": 0.6926, | |
"mean_token_accuracy": 0.8305454832315445, | |
"num_tokens": 3321210.0, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.4605641911341393, | |
"grad_norm": 0.3564732074737549, | |
"learning_rate": 0.0001982973099683902, | |
"loss": 0.5373, | |
"mean_token_accuracy": 0.8680069527029991, | |
"num_tokens": 4464927.0, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.5757052389176741, | |
"grad_norm": 0.4126855731010437, | |
"learning_rate": 0.00019496396989003193, | |
"loss": 0.453, | |
"mean_token_accuracy": 0.8878908574581146, | |
"num_tokens": 5583328.0, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.5757052389176741, | |
"eval_loss": 0.6618072986602783, | |
"eval_mean_token_accuracy": 0.8427139545351731, | |
"eval_num_tokens": 5583328.0, | |
"eval_runtime": 303.7163, | |
"eval_samples_per_second": 5.084, | |
"eval_steps_per_second": 1.271, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.690846286701209, | |
"grad_norm": 0.3499382436275482, | |
"learning_rate": 0.0001899302204343428, | |
"loss": 0.3756, | |
"mean_token_accuracy": 0.9073699393868446, | |
"num_tokens": 6704920.0, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.8059873344847438, | |
"grad_norm": 0.38613247871398926, | |
"learning_rate": 0.00018328619509919044, | |
"loss": 0.332, | |
"mean_token_accuracy": 0.9180173775553704, | |
"num_tokens": 7818546.0, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.9211283822682786, | |
"grad_norm": 0.3983283042907715, | |
"learning_rate": 0.00017515086072006204, | |
"loss": 0.285, | |
"mean_token_accuracy": 0.9301327157020569, | |
"num_tokens": 8936663.0, | |
"step": 800 | |
}, | |
{ | |
"epoch": 1.035693724812896, | |
"grad_norm": 0.4746868312358856, | |
"learning_rate": 0.00016566988726928513, | |
"loss": 0.2345, | |
"mean_token_accuracy": 0.9422861801919027, | |
"num_tokens": 10063248.0, | |
"step": 900 | |
}, | |
{ | |
"epoch": 1.1508347725964305, | |
"grad_norm": 0.3728397488594055, | |
"learning_rate": 0.00015501303951322943, | |
"loss": 0.2199, | |
"mean_token_accuracy": 0.9462704074382782, | |
"num_tokens": 11183984.0, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 1.1508347725964305, | |
"eval_loss": 0.4751618206501007, | |
"eval_mean_token_accuracy": 0.8873606966567164, | |
"eval_num_tokens": 11183984.0, | |
"eval_runtime": 304.4089, | |
"eval_samples_per_second": 5.072, | |
"eval_steps_per_second": 1.268, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 1.2659758203799654, | |
"grad_norm": 0.38173842430114746, | |
"learning_rate": 0.00014337113723205126, | |
"loss": 0.2083, | |
"mean_token_accuracy": 0.9480547454953193, | |
"num_tokens": 12290743.0, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 1.3811168681635002, | |
"grad_norm": 0.32138150930404663, | |
"learning_rate": 0.00013095263843179028, | |
"loss": 0.1801, | |
"mean_token_accuracy": 0.9549228352308273, | |
"num_tokens": 13386739.0, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 1.496257915947035, | |
"grad_norm": 0.27364546060562134, | |
"learning_rate": 0.00011797990672926652, | |
"loss": 0.1706, | |
"mean_token_accuracy": 0.9577119436860084, | |
"num_tokens": 14529074.0, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 1.61139896373057, | |
"grad_norm": 0.3040049970149994, | |
"learning_rate": 0.00010468522974537567, | |
"loss": 0.1571, | |
"mean_token_accuracy": 0.9611357498168945, | |
"num_tokens": 15631838.0, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 1.7265400115141047, | |
"grad_norm": 0.30038943886756897, | |
"learning_rate": 9.130665980078394e-05, | |
"loss": 0.1403, | |
"mean_token_accuracy": 0.9660706561803818, | |
"num_tokens": 16764080.0, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 1.7265400115141047, | |
"eval_loss": 0.38552024960517883, | |
"eval_mean_token_accuracy": 0.9092679991932113, | |
"eval_num_tokens": 16764080.0, | |
"eval_runtime": 301.8929, | |
"eval_samples_per_second": 5.114, | |
"eval_steps_per_second": 1.279, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 1.8416810592976396, | |
"grad_norm": 0.09564235061407089, | |
"learning_rate": 7.808375138984745e-05, | |
"loss": 0.132, | |
"mean_token_accuracy": 0.967877941429615, | |
"num_tokens": 17861001.0, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 1.9568221070811744, | |
"grad_norm": 0.29065728187561035, | |
"learning_rate": 6.525327175685459e-05, | |
"loss": 0.1325, | |
"mean_token_accuracy": 0.9674064460396766, | |
"num_tokens": 18955484.0, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 2.071387449625792, | |
"grad_norm": 0.14087554812431335, | |
"learning_rate": 5.304496138031373e-05, | |
"loss": 0.128, | |
"mean_token_accuracy": 0.9687896742293584, | |
"num_tokens": 20065177.0, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 2.186528497409326, | |
"grad_norm": 0.3411475718021393, | |
"learning_rate": 4.167742027736482e-05, | |
"loss": 0.1113, | |
"mean_token_accuracy": 0.9726087141036988, | |
"num_tokens": 21183569.0, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 2.301669545192861, | |
"grad_norm": 0.12446445226669312, | |
"learning_rate": 3.135419378747742e-05, | |
"loss": 0.0979, | |
"mean_token_accuracy": 0.9754682299494744, | |
"num_tokens": 22291098.0, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 2.301669545192861, | |
"eval_loss": 0.3393489718437195, | |
"eval_mean_token_accuracy": 0.9214305071633097, | |
"eval_num_tokens": 22291098.0, | |
"eval_runtime": 279.3526, | |
"eval_samples_per_second": 5.527, | |
"eval_steps_per_second": 1.382, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 2.416810592976396, | |
"grad_norm": 0.3788171708583832, | |
"learning_rate": 2.226012792275538e-05, | |
"loss": 0.0892, | |
"mean_token_accuracy": 0.9781047487258911, | |
"num_tokens": 23410315.0, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 2.5319516407599307, | |
"grad_norm": 0.17727908492088318, | |
"learning_rate": 1.4558059545351143e-05, | |
"loss": 0.0908, | |
"mean_token_accuracy": 0.9779339152574539, | |
"num_tokens": 24545121.0, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 2.6470926885434656, | |
"grad_norm": 0.16450349986553192, | |
"learning_rate": 8.385900637134792e-06, | |
"loss": 0.0924, | |
"mean_token_accuracy": 0.9776055815815926, | |
"num_tokens": 25675956.0, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 2.7622337363270004, | |
"grad_norm": 0.2806933522224426, | |
"learning_rate": 3.85416887020934e-06, | |
"loss": 0.1021, | |
"mean_token_accuracy": 0.9756536969542503, | |
"num_tokens": 26787936.0, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 2.8773747841105353, | |
"grad_norm": 0.23432117700576782, | |
"learning_rate": 1.0440086954749517e-06, | |
"loss": 0.0969, | |
"mean_token_accuracy": 0.9766348168253899, | |
"num_tokens": 27883884.0, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 2.8773747841105353, | |
"eval_loss": 0.3258962035179138, | |
"eval_mean_token_accuracy": 0.9253271395060683, | |
"eval_num_tokens": 27883884.0, | |
"eval_runtime": 278.746, | |
"eval_samples_per_second": 5.539, | |
"eval_steps_per_second": 1.385, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 2.99251583189407, | |
"grad_norm": 0.3019677996635437, | |
"learning_rate": 5.738383307818396e-09, | |
"loss": 0.0937, | |
"mean_token_accuracy": 0.9774355563521385, | |
"num_tokens": 28979973.0, | |
"step": 2600 | |
} | |
], | |
"logging_steps": 100, | |
"max_steps": 2607, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 3, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 1.979053737195946e+18, | |
"train_batch_size": 8, | |
"trial_name": null, | |
"trial_params": null | |
} | |