LegrandFrederic's picture
Upload trainer_state.json with huggingface_hub
ab4c747 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 680,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14705882352941177,
"grad_norm": 5.515628814697266,
"learning_rate": 2.9411764705882354e-05,
"loss": 1.1436,
"step": 10
},
{
"epoch": 0.29411764705882354,
"grad_norm": 2.5779266357421875,
"learning_rate": 5.882352941176471e-05,
"loss": 0.4215,
"step": 20
},
{
"epoch": 0.4411764705882353,
"grad_norm": 1.5243560075759888,
"learning_rate": 8.823529411764706e-05,
"loss": 0.2531,
"step": 30
},
{
"epoch": 0.5882352941176471,
"grad_norm": 1.0052002668380737,
"learning_rate": 9.997871633546257e-05,
"loss": 0.1957,
"step": 40
},
{
"epoch": 0.7352941176470589,
"grad_norm": 1.7932289838790894,
"learning_rate": 9.98487151097676e-05,
"loss": 0.1667,
"step": 50
},
{
"epoch": 0.8823529411764706,
"grad_norm": 0.6952548623085022,
"learning_rate": 9.960084393841355e-05,
"loss": 0.1425,
"step": 60
},
{
"epoch": 1.0294117647058822,
"grad_norm": 0.944196343421936,
"learning_rate": 9.923568892600578e-05,
"loss": 0.1222,
"step": 70
},
{
"epoch": 1.1764705882352942,
"grad_norm": 0.6027107238769531,
"learning_rate": 9.875411350104744e-05,
"loss": 0.1081,
"step": 80
},
{
"epoch": 1.3235294117647058,
"grad_norm": 0.7133479714393616,
"learning_rate": 9.815725637431662e-05,
"loss": 0.1004,
"step": 90
},
{
"epoch": 1.4705882352941178,
"grad_norm": 0.6044264435768127,
"learning_rate": 9.744652884632406e-05,
"loss": 0.0935,
"step": 100
},
{
"epoch": 1.6176470588235294,
"grad_norm": 0.5952054858207703,
"learning_rate": 9.662361147021779e-05,
"loss": 0.0938,
"step": 110
},
{
"epoch": 1.7647058823529411,
"grad_norm": 0.7661593556404114,
"learning_rate": 9.569045007802559e-05,
"loss": 0.0876,
"step": 120
},
{
"epoch": 1.9117647058823528,
"grad_norm": 0.5203129649162292,
"learning_rate": 9.464925117963133e-05,
"loss": 0.0799,
"step": 130
},
{
"epoch": 2.0588235294117645,
"grad_norm": 1.392182469367981,
"learning_rate": 9.35024767453647e-05,
"loss": 0.0848,
"step": 140
},
{
"epoch": 2.2058823529411766,
"grad_norm": 0.44986793398857117,
"learning_rate": 9.225283838454111e-05,
"loss": 0.0789,
"step": 150
},
{
"epoch": 2.3529411764705883,
"grad_norm": 0.4528612196445465,
"learning_rate": 9.090329093371666e-05,
"loss": 0.0776,
"step": 160
},
{
"epoch": 2.5,
"grad_norm": 0.5809573531150818,
"learning_rate": 8.945702546981969e-05,
"loss": 0.0715,
"step": 170
},
{
"epoch": 2.6470588235294117,
"grad_norm": 0.5827745795249939,
"learning_rate": 8.791746176467907e-05,
"loss": 0.0716,
"step": 180
},
{
"epoch": 2.7941176470588234,
"grad_norm": 0.5849335789680481,
"learning_rate": 8.628824019879137e-05,
"loss": 0.0653,
"step": 190
},
{
"epoch": 2.9411764705882355,
"grad_norm": 0.661180317401886,
"learning_rate": 8.457321315344694e-05,
"loss": 0.0669,
"step": 200
},
{
"epoch": 3.088235294117647,
"grad_norm": 0.5251627564430237,
"learning_rate": 8.277643590156894e-05,
"loss": 0.069,
"step": 210
},
{
"epoch": 3.235294117647059,
"grad_norm": 0.471332311630249,
"learning_rate": 8.090215701880419e-05,
"loss": 0.0609,
"step": 220
},
{
"epoch": 3.3823529411764706,
"grad_norm": 0.42382729053497314,
"learning_rate": 7.89548083375394e-05,
"loss": 0.0622,
"step": 230
},
{
"epoch": 3.5294117647058822,
"grad_norm": 0.899319589138031,
"learning_rate": 7.693899446759727e-05,
"loss": 0.0648,
"step": 240
},
{
"epoch": 3.6764705882352944,
"grad_norm": 0.6775935292243958,
"learning_rate": 7.485948190839077e-05,
"loss": 0.0598,
"step": 250
},
{
"epoch": 3.8235294117647056,
"grad_norm": 0.43540897965431213,
"learning_rate": 7.272118777828108e-05,
"loss": 0.0573,
"step": 260
},
{
"epoch": 3.9705882352941178,
"grad_norm": 0.27909591794013977,
"learning_rate": 7.052916818778918e-05,
"loss": 0.0492,
"step": 270
},
{
"epoch": 4.117647058823529,
"grad_norm": 0.42636606097221375,
"learning_rate": 6.828860628415253e-05,
"loss": 0.0557,
"step": 280
},
{
"epoch": 4.264705882352941,
"grad_norm": 0.4702949821949005,
"learning_rate": 6.60047999954972e-05,
"loss": 0.0536,
"step": 290
},
{
"epoch": 4.411764705882353,
"grad_norm": 0.5331495404243469,
"learning_rate": 6.368314950360415e-05,
"loss": 0.0525,
"step": 300
},
{
"epoch": 4.5588235294117645,
"grad_norm": 0.301176518201828,
"learning_rate": 6.132914447489137e-05,
"loss": 0.0568,
"step": 310
},
{
"epoch": 4.705882352941177,
"grad_norm": 0.2303120642900467,
"learning_rate": 5.8948351079804875e-05,
"loss": 0.0432,
"step": 320
},
{
"epoch": 4.852941176470588,
"grad_norm": 0.37262749671936035,
"learning_rate": 5.654639883131178e-05,
"loss": 0.0491,
"step": 330
},
{
"epoch": 5.0,
"grad_norm": 0.6805188059806824,
"learning_rate": 5.4128967273616625e-05,
"loss": 0.0513,
"step": 340
},
{
"epoch": 5.147058823529412,
"grad_norm": 0.4015423655509949,
"learning_rate": 5.170177255257618e-05,
"loss": 0.0456,
"step": 350
},
{
"epoch": 5.294117647058823,
"grad_norm": 0.3756394386291504,
"learning_rate": 4.9270553899567686e-05,
"loss": 0.0535,
"step": 360
},
{
"epoch": 5.4411764705882355,
"grad_norm": 0.3560592532157898,
"learning_rate": 4.6841060060770154e-05,
"loss": 0.0463,
"step": 370
},
{
"epoch": 5.588235294117647,
"grad_norm": 0.4422471523284912,
"learning_rate": 4.441903570394739e-05,
"loss": 0.0417,
"step": 380
},
{
"epoch": 5.735294117647059,
"grad_norm": 0.577297568321228,
"learning_rate": 4.201020783487464e-05,
"loss": 0.0402,
"step": 390
},
{
"epoch": 5.882352941176471,
"grad_norm": 0.7647914290428162,
"learning_rate": 3.962027225552807e-05,
"loss": 0.0402,
"step": 400
},
{
"epoch": 6.029411764705882,
"grad_norm": 0.8858449459075928,
"learning_rate": 3.7254880096057073e-05,
"loss": 0.0479,
"step": 410
},
{
"epoch": 6.176470588235294,
"grad_norm": 0.49515625834465027,
"learning_rate": 3.491962445238569e-05,
"loss": 0.0434,
"step": 420
},
{
"epoch": 6.323529411764706,
"grad_norm": 0.548555850982666,
"learning_rate": 3.262002716103897e-05,
"loss": 0.0442,
"step": 430
},
{
"epoch": 6.470588235294118,
"grad_norm": 0.35759156942367554,
"learning_rate": 3.0361525742465973e-05,
"loss": 0.0411,
"step": 440
},
{
"epoch": 6.617647058823529,
"grad_norm": 0.31173354387283325,
"learning_rate": 2.8149460543732664e-05,
"loss": 0.0376,
"step": 450
},
{
"epoch": 6.764705882352941,
"grad_norm": 0.24947527050971985,
"learning_rate": 2.598906211098643e-05,
"loss": 0.0391,
"step": 460
},
{
"epoch": 6.911764705882353,
"grad_norm": 0.20982353389263153,
"learning_rate": 2.388543882155067e-05,
"loss": 0.0365,
"step": 470
},
{
"epoch": 7.0588235294117645,
"grad_norm": 0.3836628496646881,
"learning_rate": 2.184356480489432e-05,
"loss": 0.0365,
"step": 480
},
{
"epoch": 7.205882352941177,
"grad_norm": 0.23856157064437866,
"learning_rate": 1.9868268181037185e-05,
"loss": 0.0333,
"step": 490
},
{
"epoch": 7.352941176470588,
"grad_norm": 0.6093345880508423,
"learning_rate": 1.796421964420285e-05,
"loss": 0.0389,
"step": 500
},
{
"epoch": 7.5,
"grad_norm": 0.2536391019821167,
"learning_rate": 1.6135921418712956e-05,
"loss": 0.0355,
"step": 510
},
{
"epoch": 7.647058823529412,
"grad_norm": 0.22027313709259033,
"learning_rate": 1.4387696613237612e-05,
"loss": 0.0331,
"step": 520
},
{
"epoch": 7.794117647058823,
"grad_norm": 0.367398738861084,
"learning_rate": 1.2723678998574512e-05,
"loss": 0.0352,
"step": 530
},
{
"epoch": 7.9411764705882355,
"grad_norm": 0.24603775143623352,
"learning_rate": 1.114780323312724e-05,
"loss": 0.0399,
"step": 540
},
{
"epoch": 8.088235294117647,
"grad_norm": 0.22743625938892365,
"learning_rate": 9.663795559195733e-06,
"loss": 0.0315,
"step": 550
},
{
"epoch": 8.235294117647058,
"grad_norm": 0.3211243152618408,
"learning_rate": 8.275164992077556e-06,
"loss": 0.033,
"step": 560
},
{
"epoch": 8.382352941176471,
"grad_norm": 0.3177715241909027,
"learning_rate": 6.985195022814067e-06,
"loss": 0.0366,
"step": 570
},
{
"epoch": 8.529411764705882,
"grad_norm": 0.4995149075984955,
"learning_rate": 5.796935854200763e-06,
"loss": 0.0353,
"step": 580
},
{
"epoch": 8.676470588235293,
"grad_norm": 0.26444506645202637,
"learning_rate": 4.713197188420026e-06,
"loss": 0.0311,
"step": 590
},
{
"epoch": 8.823529411764707,
"grad_norm": 0.2560294568538666,
"learning_rate": 3.7365415833504725e-06,
"loss": 0.0355,
"step": 600
},
{
"epoch": 8.970588235294118,
"grad_norm": 0.20665033161640167,
"learning_rate": 2.869278393262226e-06,
"loss": 0.0367,
"step": 610
},
{
"epoch": 9.117647058823529,
"grad_norm": 0.4208621382713318,
"learning_rate": 2.113458308225458e-06,
"loss": 0.0346,
"step": 620
},
{
"epoch": 9.264705882352942,
"grad_norm": 0.3151559829711914,
"learning_rate": 1.4708685051444515e-06,
"loss": 0.0332,
"step": 630
},
{
"epoch": 9.411764705882353,
"grad_norm": 0.5470899343490601,
"learning_rate": 9.430284218824026e-07,
"loss": 0.0319,
"step": 640
},
{
"epoch": 9.558823529411764,
"grad_norm": 1.0164936780929565,
"learning_rate": 5.311861644696048e-07,
"loss": 0.0378,
"step": 650
},
{
"epoch": 9.705882352941176,
"grad_norm": 0.18899625539779663,
"learning_rate": 2.363155558901542e-07,
"loss": 0.0349,
"step": 660
},
{
"epoch": 9.852941176470589,
"grad_norm": 0.18862251937389374,
"learning_rate": 5.911383342556143e-08,
"loss": 0.036,
"step": 670
},
{
"epoch": 10.0,
"grad_norm": 0.724671483039856,
"learning_rate": 0.0,
"loss": 0.0299,
"step": 680
},
{
"epoch": 10.0,
"step": 680,
"total_flos": 9.581997557091456e+16,
"train_loss": 0.08232775286716573,
"train_runtime": 870.4339,
"train_samples_per_second": 49.378,
"train_steps_per_second": 0.781
}
],
"logging_steps": 10,
"max_steps": 680,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.581997557091456e+16,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}