Training in progress, step 10500, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:234a74342f029ed78afc7504951e30209937ebfc40da414b5c0ddba808050cde
 size 30214176

 version https://git-lfs.github.com/spec/v1
+oid sha256:860eca3961053936b241a5a2d4fa22d5d55c591199aaa9b78de4fdc667354710
 size 30214176

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:edf3bd7c510bb22c9bef6a5be4eb2c5e53373f8b91d20a894a89d9aca7dcee5f
 size 291962

 version https://git-lfs.github.com/spec/v1
+oid sha256:0908b4e68e646f4581a0026b2c4fa54184efb4bcbd7553998a1c4a4c12cd731e
 size 291962

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12bdf3459ffab5d32ec375231a2eebdf56e9dbd56f588155f39eb4841c87ad7b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:79f81c651a86e496a8a0683bb40cfa7b5abd415344f225e4b6cf5b7f3bf0d148
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:536bf64114e88301429208f737493c4ae0d118f5c27c3327e4079f245e1d631e
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:d094517def3bab9b1b376a801fd8dd8681180f5ccdb5bd9fd6776c1cd25c2968
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c477b290e5f0acc03a821a7eef064a5961781fd60d9d238c571a7e7c070565f7
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd17e6a2456b591e82434cf85c6c3687643ac9b76a0a0c080b341aa852e38872
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": 10000,
   "best_metric": 1.5091972351074219,
   "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-10000",
-  "epoch": 4.452817587310421,
   "eval_steps": 500,
-  "global_step": 10000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1568,6 +1568,84 @@
       "eval_samples_per_second": 393.855,
       "eval_steps_per_second": 49.247,
       "step": 10000
     }
   ],
   "logging_steps": 50,
@@ -1582,7 +1660,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -1596,7 +1674,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.945897129811661e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": 10000,
   "best_metric": 1.5091972351074219,
   "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-10000",
+  "epoch": 4.6754417698622515,
   "eval_steps": 500,
+  "global_step": 10500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 393.855,
       "eval_steps_per_second": 49.247,
       "step": 10000
+    },
+    {
+      "epoch": 4.475080005565605,
+      "grad_norm": 1.0181940793991089,
+      "learning_rate": 5e-05,
+      "loss": 1.5594,
+      "step": 10050
+    },
+    {
+      "epoch": 4.497342423820788,
+      "grad_norm": 1.0538172721862793,
+      "learning_rate": 5e-05,
+      "loss": 1.5523,
+      "step": 10100
+    },
+    {
+      "epoch": 4.519604842075971,
+      "grad_norm": 0.936060905456543,
+      "learning_rate": 5e-05,
+      "loss": 1.547,
+      "step": 10150
+    },
+    {
+      "epoch": 4.541867260331154,
+      "grad_norm": 1.225715160369873,
+      "learning_rate": 5e-05,
+      "loss": 1.5491,
+      "step": 10200
+    },
+    {
+      "epoch": 4.564129678586337,
+      "grad_norm": 1.2574198246002197,
+      "learning_rate": 5e-05,
+      "loss": 1.5496,
+      "step": 10250
+    },
+    {
+      "epoch": 4.58639209684152,
+      "grad_norm": 1.2122540473937988,
+      "learning_rate": 5e-05,
+      "loss": 1.5327,
+      "step": 10300
+    },
+    {
+      "epoch": 4.608654515096703,
+      "grad_norm": 1.1094001531600952,
+      "learning_rate": 5e-05,
+      "loss": 1.5375,
+      "step": 10350
+    },
+    {
+      "epoch": 4.630916933351886,
+      "grad_norm": 1.0384974479675293,
+      "learning_rate": 5e-05,
+      "loss": 1.555,
+      "step": 10400
+    },
+    {
+      "epoch": 4.6531793516070685,
+      "grad_norm": 1.0797594785690308,
+      "learning_rate": 5e-05,
+      "loss": 1.5621,
+      "step": 10450
+    },
+    {
+      "epoch": 4.6754417698622515,
+      "grad_norm": 1.0724256038665771,
+      "learning_rate": 5e-05,
+      "loss": 1.547,
+      "step": 10500
+    },
+    {
+      "epoch": 4.6754417698622515,
+      "eval_loss": 1.5108764171600342,
+      "eval_runtime": 40.4882,
+      "eval_samples_per_second": 394.461,
+      "eval_steps_per_second": 49.323,
+      "step": 10500
     }
   ],
   "logging_steps": 50,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 4.37287432012032e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null