GAN-PhoBert-ViISA / last-checkpoint /trainer_state.json
HiAmNear's picture
Training in progress, epoch 10, checkpoint
7bd75a6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 80,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.125,
"grad_norm": 52.583274841308594,
"learning_rate": 1.57035175879397e-09,
"loss": 4.4832,
"step": 1
},
{
"epoch": 0.25,
"grad_norm": 52.1717414855957,
"learning_rate": 3.14070351758794e-09,
"loss": 4.4526,
"step": 2
},
{
"epoch": 0.375,
"grad_norm": 51.840389251708984,
"learning_rate": 4.71105527638191e-09,
"loss": 4.4294,
"step": 3
},
{
"epoch": 0.5,
"grad_norm": 52.11858367919922,
"learning_rate": 6.28140703517588e-09,
"loss": 4.4392,
"step": 4
},
{
"epoch": 0.625,
"grad_norm": 51.991600036621094,
"learning_rate": 7.85175879396985e-09,
"loss": 4.4464,
"step": 5
},
{
"epoch": 0.75,
"grad_norm": 52.67415237426758,
"learning_rate": 9.42211055276382e-09,
"loss": 4.4485,
"step": 6
},
{
"epoch": 0.875,
"grad_norm": 52.06214904785156,
"learning_rate": 1.099246231155779e-08,
"loss": 4.445,
"step": 7
},
{
"epoch": 1.0,
"grad_norm": 52.24057388305664,
"learning_rate": 1.256281407035176e-08,
"loss": 4.4631,
"step": 8
},
{
"epoch": 1.0,
"eval_loss": 4.591891765594482,
"eval_runtime": 48.4152,
"eval_samples_per_second": 28.214,
"eval_steps_per_second": 0.289,
"step": 8
},
{
"epoch": 1.125,
"grad_norm": 52.291419982910156,
"learning_rate": 1.413316582914573e-08,
"loss": 4.4435,
"step": 9
},
{
"epoch": 1.25,
"grad_norm": 51.911685943603516,
"learning_rate": 1.57035175879397e-08,
"loss": 4.4448,
"step": 10
},
{
"epoch": 1.375,
"grad_norm": 52.54676055908203,
"learning_rate": 1.7273869346733672e-08,
"loss": 4.4468,
"step": 11
},
{
"epoch": 1.5,
"grad_norm": 52.45462417602539,
"learning_rate": 1.884422110552764e-08,
"loss": 4.4229,
"step": 12
},
{
"epoch": 1.625,
"grad_norm": 52.290565490722656,
"learning_rate": 2.041457286432161e-08,
"loss": 4.4212,
"step": 13
},
{
"epoch": 1.75,
"grad_norm": 51.737483978271484,
"learning_rate": 2.198492462311558e-08,
"loss": 4.4379,
"step": 14
},
{
"epoch": 1.875,
"grad_norm": 52.21713638305664,
"learning_rate": 2.3555276381909547e-08,
"loss": 4.426,
"step": 15
},
{
"epoch": 2.0,
"grad_norm": 52.21146011352539,
"learning_rate": 2.512562814070352e-08,
"loss": 4.4206,
"step": 16
},
{
"epoch": 2.0,
"eval_loss": 4.538872241973877,
"eval_runtime": 45.8583,
"eval_samples_per_second": 29.787,
"eval_steps_per_second": 0.305,
"step": 16
},
{
"epoch": 2.125,
"grad_norm": 52.31325149536133,
"learning_rate": 2.669597989949749e-08,
"loss": 4.4286,
"step": 17
},
{
"epoch": 2.25,
"grad_norm": 52.39244842529297,
"learning_rate": 2.826633165829146e-08,
"loss": 4.4363,
"step": 18
},
{
"epoch": 2.375,
"grad_norm": 51.97914123535156,
"learning_rate": 2.983668341708543e-08,
"loss": 4.4068,
"step": 19
},
{
"epoch": 2.5,
"grad_norm": 52.424015045166016,
"learning_rate": 3.14070351758794e-08,
"loss": 4.4079,
"step": 20
},
{
"epoch": 2.625,
"grad_norm": 51.720726013183594,
"learning_rate": 3.297738693467337e-08,
"loss": 4.4012,
"step": 21
},
{
"epoch": 2.75,
"grad_norm": 51.92622375488281,
"learning_rate": 3.4547738693467345e-08,
"loss": 4.3656,
"step": 22
},
{
"epoch": 2.875,
"grad_norm": 51.43586730957031,
"learning_rate": 3.611809045226131e-08,
"loss": 4.3986,
"step": 23
},
{
"epoch": 3.0,
"grad_norm": 52.25431823730469,
"learning_rate": 3.768844221105528e-08,
"loss": 4.3742,
"step": 24
},
{
"epoch": 3.0,
"eval_loss": 4.466385841369629,
"eval_runtime": 43.0073,
"eval_samples_per_second": 31.762,
"eval_steps_per_second": 0.326,
"step": 24
},
{
"epoch": 3.125,
"grad_norm": 52.38350296020508,
"learning_rate": 3.925879396984925e-08,
"loss": 4.3569,
"step": 25
},
{
"epoch": 3.25,
"grad_norm": 52.09661865234375,
"learning_rate": 4.082914572864322e-08,
"loss": 4.3329,
"step": 26
},
{
"epoch": 3.375,
"grad_norm": 52.156551361083984,
"learning_rate": 4.239949748743719e-08,
"loss": 4.3611,
"step": 27
},
{
"epoch": 3.5,
"grad_norm": 51.66514587402344,
"learning_rate": 4.396984924623116e-08,
"loss": 4.3414,
"step": 28
},
{
"epoch": 3.625,
"grad_norm": 51.9528694152832,
"learning_rate": 4.5540201005025126e-08,
"loss": 4.3208,
"step": 29
},
{
"epoch": 3.75,
"grad_norm": 51.63153839111328,
"learning_rate": 4.7110552763819094e-08,
"loss": 4.3323,
"step": 30
},
{
"epoch": 3.875,
"grad_norm": 51.82636642456055,
"learning_rate": 4.8680904522613075e-08,
"loss": 4.2969,
"step": 31
},
{
"epoch": 4.0,
"grad_norm": 50.99885177612305,
"learning_rate": 5.025125628140704e-08,
"loss": 4.2848,
"step": 32
},
{
"epoch": 4.0,
"eval_loss": 4.35823392868042,
"eval_runtime": 42.4893,
"eval_samples_per_second": 32.149,
"eval_steps_per_second": 0.329,
"step": 32
},
{
"epoch": 4.125,
"grad_norm": 51.07362365722656,
"learning_rate": 5.182160804020101e-08,
"loss": 4.2605,
"step": 33
},
{
"epoch": 4.25,
"grad_norm": 51.94071578979492,
"learning_rate": 5.339195979899498e-08,
"loss": 4.2979,
"step": 34
},
{
"epoch": 4.375,
"grad_norm": 50.928531646728516,
"learning_rate": 5.496231155778895e-08,
"loss": 4.2821,
"step": 35
},
{
"epoch": 4.5,
"grad_norm": 51.46839141845703,
"learning_rate": 5.653266331658292e-08,
"loss": 4.2471,
"step": 36
},
{
"epoch": 4.625,
"grad_norm": 51.3604621887207,
"learning_rate": 5.810301507537689e-08,
"loss": 4.2608,
"step": 37
},
{
"epoch": 4.75,
"grad_norm": 51.4367561340332,
"learning_rate": 5.967336683417086e-08,
"loss": 4.24,
"step": 38
},
{
"epoch": 4.875,
"grad_norm": 51.37506103515625,
"learning_rate": 6.124371859296483e-08,
"loss": 4.2268,
"step": 39
},
{
"epoch": 5.0,
"grad_norm": 51.0534782409668,
"learning_rate": 6.28140703517588e-08,
"loss": 4.1992,
"step": 40
},
{
"epoch": 5.0,
"eval_loss": 4.225412845611572,
"eval_runtime": 62.0906,
"eval_samples_per_second": 22.0,
"eval_steps_per_second": 0.225,
"step": 40
},
{
"epoch": 5.125,
"grad_norm": 50.854610443115234,
"learning_rate": 6.438442211055277e-08,
"loss": 4.1707,
"step": 41
},
{
"epoch": 5.25,
"grad_norm": 51.0251579284668,
"learning_rate": 6.595477386934674e-08,
"loss": 4.1889,
"step": 42
},
{
"epoch": 5.375,
"grad_norm": 51.27962875366211,
"learning_rate": 6.75251256281407e-08,
"loss": 4.1892,
"step": 43
},
{
"epoch": 5.5,
"grad_norm": 50.632904052734375,
"learning_rate": 6.909547738693469e-08,
"loss": 4.1684,
"step": 44
},
{
"epoch": 5.625,
"grad_norm": 51.070587158203125,
"learning_rate": 7.066582914572865e-08,
"loss": 4.154,
"step": 45
},
{
"epoch": 5.75,
"grad_norm": 50.42203903198242,
"learning_rate": 7.223618090452263e-08,
"loss": 4.1407,
"step": 46
},
{
"epoch": 5.875,
"grad_norm": 50.98355484008789,
"learning_rate": 7.380653266331659e-08,
"loss": 4.129,
"step": 47
},
{
"epoch": 6.0,
"grad_norm": 50.823097229003906,
"learning_rate": 7.537688442211056e-08,
"loss": 4.1172,
"step": 48
},
{
"epoch": 6.0,
"eval_loss": 4.049016952514648,
"eval_runtime": 46.6978,
"eval_samples_per_second": 29.252,
"eval_steps_per_second": 0.3,
"step": 48
},
{
"epoch": 6.125,
"grad_norm": 51.02920150756836,
"learning_rate": 7.694723618090454e-08,
"loss": 4.1099,
"step": 49
},
{
"epoch": 6.25,
"grad_norm": 50.04145812988281,
"learning_rate": 7.85175879396985e-08,
"loss": 4.076,
"step": 50
},
{
"epoch": 6.375,
"grad_norm": 50.59613037109375,
"learning_rate": 8.008793969849247e-08,
"loss": 4.0718,
"step": 51
},
{
"epoch": 6.5,
"grad_norm": 50.145957946777344,
"learning_rate": 8.165829145728645e-08,
"loss": 4.0557,
"step": 52
},
{
"epoch": 6.625,
"grad_norm": 49.8894157409668,
"learning_rate": 8.322864321608041e-08,
"loss": 4.0178,
"step": 53
},
{
"epoch": 6.75,
"grad_norm": 49.86162185668945,
"learning_rate": 8.479899497487438e-08,
"loss": 4.0299,
"step": 54
},
{
"epoch": 6.875,
"grad_norm": 50.02029800415039,
"learning_rate": 8.636934673366834e-08,
"loss": 3.9924,
"step": 55
},
{
"epoch": 7.0,
"grad_norm": 49.66703414916992,
"learning_rate": 8.793969849246232e-08,
"loss": 3.9685,
"step": 56
},
{
"epoch": 7.0,
"eval_loss": 3.8612220287323,
"eval_runtime": 46.1098,
"eval_samples_per_second": 29.625,
"eval_steps_per_second": 0.304,
"step": 56
},
{
"epoch": 7.125,
"grad_norm": 49.38985824584961,
"learning_rate": 8.951005025125629e-08,
"loss": 3.9575,
"step": 57
},
{
"epoch": 7.25,
"grad_norm": 49.38282012939453,
"learning_rate": 9.108040201005025e-08,
"loss": 3.9457,
"step": 58
},
{
"epoch": 7.375,
"grad_norm": 49.578147888183594,
"learning_rate": 9.265075376884423e-08,
"loss": 3.9147,
"step": 59
},
{
"epoch": 7.5,
"grad_norm": 49.263980865478516,
"learning_rate": 9.422110552763819e-08,
"loss": 3.9124,
"step": 60
},
{
"epoch": 7.625,
"grad_norm": 48.740360260009766,
"learning_rate": 9.579145728643216e-08,
"loss": 3.9056,
"step": 61
},
{
"epoch": 7.75,
"grad_norm": 49.30016326904297,
"learning_rate": 9.736180904522615e-08,
"loss": 3.8962,
"step": 62
},
{
"epoch": 7.875,
"grad_norm": 48.78429412841797,
"learning_rate": 9.89321608040201e-08,
"loss": 3.8614,
"step": 63
},
{
"epoch": 8.0,
"grad_norm": 48.219146728515625,
"learning_rate": 1.0050251256281409e-07,
"loss": 3.8165,
"step": 64
},
{
"epoch": 8.0,
"eval_loss": 3.6316096782684326,
"eval_runtime": 45.5729,
"eval_samples_per_second": 29.974,
"eval_steps_per_second": 0.307,
"step": 64
},
{
"epoch": 8.125,
"grad_norm": 48.37006759643555,
"learning_rate": 1.0207286432160806e-07,
"loss": 3.8137,
"step": 65
},
{
"epoch": 8.25,
"grad_norm": 47.9218635559082,
"learning_rate": 1.0364321608040202e-07,
"loss": 3.7862,
"step": 66
},
{
"epoch": 8.375,
"grad_norm": 48.484378814697266,
"learning_rate": 1.05213567839196e-07,
"loss": 3.789,
"step": 67
},
{
"epoch": 8.5,
"grad_norm": 48.121212005615234,
"learning_rate": 1.0678391959798996e-07,
"loss": 3.7609,
"step": 68
},
{
"epoch": 8.625,
"grad_norm": 48.182281494140625,
"learning_rate": 1.0835427135678393e-07,
"loss": 3.7594,
"step": 69
},
{
"epoch": 8.75,
"grad_norm": 48.09780502319336,
"learning_rate": 1.099246231155779e-07,
"loss": 3.6978,
"step": 70
},
{
"epoch": 8.875,
"grad_norm": 47.829345703125,
"learning_rate": 1.1149497487437187e-07,
"loss": 3.7101,
"step": 71
},
{
"epoch": 9.0,
"grad_norm": 47.34735107421875,
"learning_rate": 1.1306532663316584e-07,
"loss": 3.6672,
"step": 72
},
{
"epoch": 9.0,
"eval_loss": 3.3924500942230225,
"eval_runtime": 42.4081,
"eval_samples_per_second": 32.211,
"eval_steps_per_second": 0.33,
"step": 72
},
{
"epoch": 9.125,
"grad_norm": 47.713768005371094,
"learning_rate": 1.146356783919598e-07,
"loss": 3.6657,
"step": 73
},
{
"epoch": 9.25,
"grad_norm": 48.04665756225586,
"learning_rate": 1.1620603015075378e-07,
"loss": 3.635,
"step": 74
},
{
"epoch": 9.375,
"grad_norm": 47.147972106933594,
"learning_rate": 1.1777638190954775e-07,
"loss": 3.6149,
"step": 75
},
{
"epoch": 9.5,
"grad_norm": 47.14889144897461,
"learning_rate": 1.193467336683417e-07,
"loss": 3.5891,
"step": 76
},
{
"epoch": 9.625,
"grad_norm": 47.27253723144531,
"learning_rate": 1.209170854271357e-07,
"loss": 3.56,
"step": 77
},
{
"epoch": 9.75,
"grad_norm": 47.30030822753906,
"learning_rate": 1.2248743718592966e-07,
"loss": 3.554,
"step": 78
},
{
"epoch": 9.875,
"grad_norm": 46.53535461425781,
"learning_rate": 1.2405778894472362e-07,
"loss": 3.5081,
"step": 79
},
{
"epoch": 10.0,
"grad_norm": 46.524356842041016,
"learning_rate": 1.256281407035176e-07,
"loss": 3.4906,
"step": 80
},
{
"epoch": 10.0,
"eval_loss": 3.130697011947632,
"eval_runtime": 44.7299,
"eval_samples_per_second": 30.539,
"eval_steps_per_second": 0.313,
"step": 80
}
],
"logging_steps": 1,
"max_steps": 80,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 100,
"trial_name": null,
"trial_params": null
}