Simulation_LLM_google_14B_V1 / trainer_state.json
sunhaonlp's picture
Upload folder using huggingface_hub
5b636f0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.067484662576687,
"eval_steps": 10000000,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03067484662576687,
"grad_norm": 40.34086591617319,
"learning_rate": 6.134969325153374e-09,
"loss": 2.8872,
"step": 10
},
{
"epoch": 0.06134969325153374,
"grad_norm": 40.21853524962094,
"learning_rate": 1.2269938650306748e-08,
"loss": 2.8167,
"step": 20
},
{
"epoch": 0.09202453987730061,
"grad_norm": 42.91710233147346,
"learning_rate": 1.8404907975460124e-08,
"loss": 2.9017,
"step": 30
},
{
"epoch": 0.12269938650306748,
"grad_norm": 42.19478017724499,
"learning_rate": 2.4539877300613496e-08,
"loss": 2.9177,
"step": 40
},
{
"epoch": 0.15337423312883436,
"grad_norm": 39.89856563443135,
"learning_rate": 3.067484662576687e-08,
"loss": 2.8621,
"step": 50
},
{
"epoch": 0.18404907975460122,
"grad_norm": 39.031000731023376,
"learning_rate": 3.680981595092025e-08,
"loss": 2.8787,
"step": 60
},
{
"epoch": 0.2147239263803681,
"grad_norm": 40.17556155872599,
"learning_rate": 4.294478527607362e-08,
"loss": 2.8723,
"step": 70
},
{
"epoch": 0.24539877300613497,
"grad_norm": 40.44537988014516,
"learning_rate": 4.907975460122699e-08,
"loss": 2.8328,
"step": 80
},
{
"epoch": 0.27607361963190186,
"grad_norm": 36.38071257012731,
"learning_rate": 5.521472392638036e-08,
"loss": 2.8269,
"step": 90
},
{
"epoch": 0.3067484662576687,
"grad_norm": 35.01954486493492,
"learning_rate": 6.134969325153374e-08,
"loss": 2.8021,
"step": 100
},
{
"epoch": 0.3374233128834356,
"grad_norm": 35.816703465963236,
"learning_rate": 6.748466257668711e-08,
"loss": 2.7854,
"step": 110
},
{
"epoch": 0.36809815950920244,
"grad_norm": 25.57200780260961,
"learning_rate": 7.36196319018405e-08,
"loss": 2.6267,
"step": 120
},
{
"epoch": 0.3987730061349693,
"grad_norm": 22.490209736367557,
"learning_rate": 7.975460122699386e-08,
"loss": 2.5126,
"step": 130
},
{
"epoch": 0.4294478527607362,
"grad_norm": 21.154756083713355,
"learning_rate": 8.588957055214724e-08,
"loss": 2.5148,
"step": 140
},
{
"epoch": 0.4601226993865031,
"grad_norm": 18.721430927730754,
"learning_rate": 9.202453987730061e-08,
"loss": 2.4065,
"step": 150
},
{
"epoch": 0.49079754601226994,
"grad_norm": 11.049403510574644,
"learning_rate": 9.815950920245398e-08,
"loss": 2.3448,
"step": 160
},
{
"epoch": 0.5214723926380368,
"grad_norm": 8.021100065182837,
"learning_rate": 1.0429447852760735e-07,
"loss": 2.1846,
"step": 170
},
{
"epoch": 0.5521472392638037,
"grad_norm": 6.759878770426535,
"learning_rate": 1.1042944785276073e-07,
"loss": 2.1407,
"step": 180
},
{
"epoch": 0.5828220858895705,
"grad_norm": 5.6950515516559275,
"learning_rate": 1.165644171779141e-07,
"loss": 2.0731,
"step": 190
},
{
"epoch": 0.6134969325153374,
"grad_norm": 4.589530123869859,
"learning_rate": 1.2269938650306748e-07,
"loss": 2.096,
"step": 200
},
{
"epoch": 0.6441717791411042,
"grad_norm": 4.0999552773713255,
"learning_rate": 1.2883435582822087e-07,
"loss": 2.0651,
"step": 210
},
{
"epoch": 0.6748466257668712,
"grad_norm": 3.8635184963656877,
"learning_rate": 1.3496932515337422e-07,
"loss": 2.0136,
"step": 220
},
{
"epoch": 0.7055214723926381,
"grad_norm": 3.801649063409229,
"learning_rate": 1.4110429447852758e-07,
"loss": 2.0283,
"step": 230
},
{
"epoch": 0.7361963190184049,
"grad_norm": 3.499553222570716,
"learning_rate": 1.47239263803681e-07,
"loss": 2.0207,
"step": 240
},
{
"epoch": 0.7668711656441718,
"grad_norm": 3.5859940211720125,
"learning_rate": 1.5337423312883435e-07,
"loss": 1.9723,
"step": 250
},
{
"epoch": 0.7975460122699386,
"grad_norm": 3.5360060488259433,
"learning_rate": 1.595092024539877e-07,
"loss": 1.9378,
"step": 260
},
{
"epoch": 0.8282208588957055,
"grad_norm": 3.4861162838130446,
"learning_rate": 1.656441717791411e-07,
"loss": 2.0179,
"step": 270
},
{
"epoch": 0.8588957055214724,
"grad_norm": 3.5048041671476895,
"learning_rate": 1.7177914110429448e-07,
"loss": 1.9889,
"step": 280
},
{
"epoch": 0.8895705521472392,
"grad_norm": 3.5320508940958515,
"learning_rate": 1.7791411042944784e-07,
"loss": 1.9974,
"step": 290
},
{
"epoch": 0.9202453987730062,
"grad_norm": 3.479602078136922,
"learning_rate": 1.8404907975460122e-07,
"loss": 1.94,
"step": 300
},
{
"epoch": 0.950920245398773,
"grad_norm": 3.463981738252198,
"learning_rate": 1.901840490797546e-07,
"loss": 1.956,
"step": 310
},
{
"epoch": 0.9815950920245399,
"grad_norm": 3.2538325898676637,
"learning_rate": 1.9631901840490797e-07,
"loss": 1.9886,
"step": 320
},
{
"epoch": 1.0122699386503067,
"grad_norm": 3.5596683489380294,
"learning_rate": 2.0245398773006135e-07,
"loss": 1.9918,
"step": 330
},
{
"epoch": 1.0429447852760736,
"grad_norm": 3.238486372143609,
"learning_rate": 2.085889570552147e-07,
"loss": 1.9839,
"step": 340
},
{
"epoch": 1.0736196319018405,
"grad_norm": 3.246470029117208,
"learning_rate": 2.147239263803681e-07,
"loss": 1.9707,
"step": 350
},
{
"epoch": 1.1042944785276074,
"grad_norm": 3.3548676253751895,
"learning_rate": 2.2085889570552145e-07,
"loss": 1.9399,
"step": 360
},
{
"epoch": 1.1349693251533743,
"grad_norm": 2.9837136528798998,
"learning_rate": 2.2699386503067484e-07,
"loss": 1.9081,
"step": 370
},
{
"epoch": 1.165644171779141,
"grad_norm": 3.123338783793841,
"learning_rate": 2.331288343558282e-07,
"loss": 1.9495,
"step": 380
},
{
"epoch": 1.196319018404908,
"grad_norm": 3.12823522424731,
"learning_rate": 2.392638036809816e-07,
"loss": 1.9149,
"step": 390
},
{
"epoch": 1.2269938650306749,
"grad_norm": 3.127421120344186,
"learning_rate": 2.4539877300613496e-07,
"loss": 1.9231,
"step": 400
},
{
"epoch": 1.2576687116564418,
"grad_norm": 3.0563142379215784,
"learning_rate": 2.5153374233128835e-07,
"loss": 1.9259,
"step": 410
},
{
"epoch": 1.2883435582822087,
"grad_norm": 3.002860823723609,
"learning_rate": 2.5766871165644173e-07,
"loss": 1.8779,
"step": 420
},
{
"epoch": 1.3190184049079754,
"grad_norm": 3.040065378882382,
"learning_rate": 2.6380368098159506e-07,
"loss": 1.919,
"step": 430
},
{
"epoch": 1.3496932515337423,
"grad_norm": 3.2257346917267484,
"learning_rate": 2.6993865030674845e-07,
"loss": 1.9377,
"step": 440
},
{
"epoch": 1.3803680981595092,
"grad_norm": 2.9954147708634724,
"learning_rate": 2.7607361963190183e-07,
"loss": 1.9159,
"step": 450
},
{
"epoch": 1.4110429447852761,
"grad_norm": 3.0206681193118583,
"learning_rate": 2.8220858895705517e-07,
"loss": 1.9015,
"step": 460
},
{
"epoch": 1.441717791411043,
"grad_norm": 3.1151975930939413,
"learning_rate": 2.8834355828220855e-07,
"loss": 1.9162,
"step": 470
},
{
"epoch": 1.4723926380368098,
"grad_norm": 3.110216679336694,
"learning_rate": 2.94478527607362e-07,
"loss": 1.92,
"step": 480
},
{
"epoch": 1.5030674846625767,
"grad_norm": 3.046863348968689,
"learning_rate": 3.006134969325153e-07,
"loss": 1.9171,
"step": 490
},
{
"epoch": 1.5337423312883436,
"grad_norm": 2.945865360342739,
"learning_rate": 3.067484662576687e-07,
"loss": 1.9095,
"step": 500
},
{
"epoch": 1.5644171779141103,
"grad_norm": 3.176637928075508,
"learning_rate": 3.128834355828221e-07,
"loss": 1.9282,
"step": 510
},
{
"epoch": 1.5950920245398774,
"grad_norm": 3.0369820219737056,
"learning_rate": 3.190184049079754e-07,
"loss": 1.936,
"step": 520
},
{
"epoch": 1.6257668711656441,
"grad_norm": 3.004806377144078,
"learning_rate": 3.251533742331288e-07,
"loss": 1.9434,
"step": 530
},
{
"epoch": 1.656441717791411,
"grad_norm": 3.2902184434846133,
"learning_rate": 3.312883435582822e-07,
"loss": 1.8933,
"step": 540
},
{
"epoch": 1.687116564417178,
"grad_norm": 3.1870514820826905,
"learning_rate": 3.374233128834356e-07,
"loss": 1.9213,
"step": 550
},
{
"epoch": 1.7177914110429446,
"grad_norm": 3.131942976786612,
"learning_rate": 3.4355828220858896e-07,
"loss": 1.9434,
"step": 560
},
{
"epoch": 1.7484662576687118,
"grad_norm": 3.0513682450360378,
"learning_rate": 3.496932515337423e-07,
"loss": 1.8838,
"step": 570
},
{
"epoch": 1.7791411042944785,
"grad_norm": 3.177020551414707,
"learning_rate": 3.558282208588957e-07,
"loss": 1.9052,
"step": 580
},
{
"epoch": 1.8098159509202454,
"grad_norm": 2.9620179420902506,
"learning_rate": 3.6196319018404906e-07,
"loss": 1.8909,
"step": 590
},
{
"epoch": 1.8404907975460123,
"grad_norm": 2.990366230166413,
"learning_rate": 3.6809815950920245e-07,
"loss": 1.8493,
"step": 600
},
{
"epoch": 1.871165644171779,
"grad_norm": 2.975455921573483,
"learning_rate": 3.7423312883435583e-07,
"loss": 1.876,
"step": 610
},
{
"epoch": 1.9018404907975461,
"grad_norm": 3.1776175076162905,
"learning_rate": 3.803680981595092e-07,
"loss": 1.8962,
"step": 620
},
{
"epoch": 1.9325153374233128,
"grad_norm": 3.0920446560067725,
"learning_rate": 3.8650306748466255e-07,
"loss": 1.9504,
"step": 630
},
{
"epoch": 1.9631901840490797,
"grad_norm": 3.0815885404494883,
"learning_rate": 3.9263803680981593e-07,
"loss": 1.912,
"step": 640
},
{
"epoch": 1.9938650306748467,
"grad_norm": 3.111764851371804,
"learning_rate": 3.9877300613496926e-07,
"loss": 1.9481,
"step": 650
},
{
"epoch": 2.0245398773006134,
"grad_norm": 2.7879149976133806,
"learning_rate": 4.049079754601227e-07,
"loss": 1.8721,
"step": 660
},
{
"epoch": 2.0552147239263805,
"grad_norm": 3.001396783754136,
"learning_rate": 4.110429447852761e-07,
"loss": 1.8426,
"step": 670
},
{
"epoch": 2.085889570552147,
"grad_norm": 2.818029202172762,
"learning_rate": 4.171779141104294e-07,
"loss": 1.8664,
"step": 680
},
{
"epoch": 2.116564417177914,
"grad_norm": 3.0655338296444645,
"learning_rate": 4.233128834355828e-07,
"loss": 1.8812,
"step": 690
},
{
"epoch": 2.147239263803681,
"grad_norm": 3.0134914811420237,
"learning_rate": 4.294478527607362e-07,
"loss": 1.8828,
"step": 700
},
{
"epoch": 2.1779141104294477,
"grad_norm": 3.0293103580307252,
"learning_rate": 4.355828220858895e-07,
"loss": 1.8596,
"step": 710
},
{
"epoch": 2.208588957055215,
"grad_norm": 2.7905142590645284,
"learning_rate": 4.417177914110429e-07,
"loss": 1.8733,
"step": 720
},
{
"epoch": 2.2392638036809815,
"grad_norm": 2.925363620028882,
"learning_rate": 4.4785276073619634e-07,
"loss": 1.8983,
"step": 730
},
{
"epoch": 2.2699386503067487,
"grad_norm": 2.8448443834523975,
"learning_rate": 4.5398773006134967e-07,
"loss": 1.8764,
"step": 740
},
{
"epoch": 2.3006134969325154,
"grad_norm": 2.9628937142708875,
"learning_rate": 4.6012269938650306e-07,
"loss": 1.8754,
"step": 750
},
{
"epoch": 2.331288343558282,
"grad_norm": 3.4170857688413427,
"learning_rate": 4.662576687116564e-07,
"loss": 1.8782,
"step": 760
},
{
"epoch": 2.361963190184049,
"grad_norm": 3.0679580454695388,
"learning_rate": 4.7239263803680977e-07,
"loss": 1.904,
"step": 770
},
{
"epoch": 2.392638036809816,
"grad_norm": 2.9137539934318784,
"learning_rate": 4.785276073619632e-07,
"loss": 1.8132,
"step": 780
},
{
"epoch": 2.4233128834355826,
"grad_norm": 3.061624511427873,
"learning_rate": 4.846625766871165e-07,
"loss": 1.8769,
"step": 790
},
{
"epoch": 2.4539877300613497,
"grad_norm": 3.139832542082945,
"learning_rate": 4.907975460122699e-07,
"loss": 1.9011,
"step": 800
},
{
"epoch": 2.4846625766871164,
"grad_norm": 3.040517067124859,
"learning_rate": 4.969325153374233e-07,
"loss": 1.8553,
"step": 810
},
{
"epoch": 2.5153374233128836,
"grad_norm": 3.2258257658971905,
"learning_rate": 5.030674846625767e-07,
"loss": 1.865,
"step": 820
},
{
"epoch": 2.5460122699386503,
"grad_norm": 3.0070195680844845,
"learning_rate": 5.0920245398773e-07,
"loss": 1.8483,
"step": 830
},
{
"epoch": 2.5766871165644174,
"grad_norm": 2.86401603854259,
"learning_rate": 5.153374233128835e-07,
"loss": 1.8825,
"step": 840
},
{
"epoch": 2.607361963190184,
"grad_norm": 2.8239983106989333,
"learning_rate": 5.214723926380368e-07,
"loss": 1.8549,
"step": 850
},
{
"epoch": 2.638036809815951,
"grad_norm": 3.0228245480925113,
"learning_rate": 5.276073619631901e-07,
"loss": 1.8534,
"step": 860
},
{
"epoch": 2.668711656441718,
"grad_norm": 3.407476550864115,
"learning_rate": 5.337423312883436e-07,
"loss": 1.8691,
"step": 870
},
{
"epoch": 2.6993865030674846,
"grad_norm": 3.0394566261515643,
"learning_rate": 5.398773006134969e-07,
"loss": 1.858,
"step": 880
},
{
"epoch": 2.7300613496932513,
"grad_norm": 2.9005018784059606,
"learning_rate": 5.460122699386502e-07,
"loss": 1.8769,
"step": 890
},
{
"epoch": 2.7607361963190185,
"grad_norm": 3.069201075368232,
"learning_rate": 5.521472392638037e-07,
"loss": 1.8772,
"step": 900
},
{
"epoch": 2.791411042944785,
"grad_norm": 3.1035188318429636,
"learning_rate": 5.58282208588957e-07,
"loss": 1.7893,
"step": 910
},
{
"epoch": 2.8220858895705523,
"grad_norm": 2.92374023876635,
"learning_rate": 5.644171779141103e-07,
"loss": 1.8506,
"step": 920
},
{
"epoch": 2.852760736196319,
"grad_norm": 2.9181750683296763,
"learning_rate": 5.705521472392638e-07,
"loss": 1.8237,
"step": 930
},
{
"epoch": 2.883435582822086,
"grad_norm": 3.0644688369202497,
"learning_rate": 5.766871165644171e-07,
"loss": 1.8199,
"step": 940
},
{
"epoch": 2.914110429447853,
"grad_norm": 3.0388171866487177,
"learning_rate": 5.828220858895705e-07,
"loss": 1.8451,
"step": 950
},
{
"epoch": 2.9447852760736195,
"grad_norm": 2.9394714399921673,
"learning_rate": 5.88957055214724e-07,
"loss": 1.8562,
"step": 960
},
{
"epoch": 2.9754601226993866,
"grad_norm": 3.516190266528493,
"learning_rate": 5.950920245398773e-07,
"loss": 1.8405,
"step": 970
},
{
"epoch": 3.0061349693251533,
"grad_norm": 2.9630766702006324,
"learning_rate": 6.012269938650306e-07,
"loss": 1.8408,
"step": 980
},
{
"epoch": 3.03680981595092,
"grad_norm": 2.7943981161053917,
"learning_rate": 6.073619631901841e-07,
"loss": 1.7695,
"step": 990
},
{
"epoch": 3.067484662576687,
"grad_norm": 2.870330771125139,
"learning_rate": 6.134969325153374e-07,
"loss": 1.8207,
"step": 1000
}
],
"logging_steps": 10,
"max_steps": 16300,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 53163861147648.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}