Training in progress, step 1500, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f472a71e58c1e05f38e3ac96ecd5e8545ca967b82df98601d845c258a3482a23
 size 30214176

 version https://git-lfs.github.com/spec/v1
+oid sha256:34d3555cfca007ef636809b8f3241beff2d95435ec2f1596bb03a9d3f865cc63
 size 30214176

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9dafdaff06aed498a0e111444051ede7608a7962108f89d16cbbd37fdac81cfa
 size 291962

 version https://git-lfs.github.com/spec/v1
+oid sha256:25011134f263f44b5a284e8818a6302e5420c80915fb0baed6f67eb7a593568a
 size 291962

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c9b7da814158dbbb8d60189b3b5255f312edc3c87062d26ecc4a3197477d7d1f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8bd8829e73b825a940f709da54f29899bba70342040af60abace1f481dd4757
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b15d8832650f7e6c0cd77d0d5d945bee578308b9cd39297e92f1063b58b0f3c8
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:3efab4a26e653f12b072c8c84cd98873adfff5605ef352f4a784bafec4fd37e6
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04fe46db27f239a414db1d5d90722d80220853d3e644018ca60e784cd72b6710
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0054bae4a1765c1151c5d499ec353895197f5e92e18df41e313afd3470bb8693
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": 500,
   "best_metric": 1.6513175964355469,
   "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-500",
-  "epoch": 0.4452483651036594,
   "eval_steps": 500,
-  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -164,6 +164,84 @@
       "eval_samples_per_second": 392.083,
       "eval_steps_per_second": 49.026,
       "step": 1000
     }
   ],
   "logging_steps": 50,
@@ -178,7 +256,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -192,7 +270,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4620262884249600.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": 500,
   "best_metric": 1.6513175964355469,
   "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-500",
+  "epoch": 0.6678725476554891,
   "eval_steps": 500,
+  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 392.083,
       "eval_steps_per_second": 49.026,
       "step": 1000
+    },
+    {
+      "epoch": 0.46751078335884233,
+      "grad_norm": 0.9494450092315674,
+      "learning_rate": 5e-05,
+      "loss": 1.6604,
+      "step": 1050
+    },
+    {
+      "epoch": 0.4897732016140253,
+      "grad_norm": 0.9924134612083435,
+      "learning_rate": 5e-05,
+      "loss": 1.6539,
+      "step": 1100
+    },
+    {
+      "epoch": 0.5120356198692083,
+      "grad_norm": 1.0620170831680298,
+      "learning_rate": 5e-05,
+      "loss": 1.6552,
+      "step": 1150
+    },
+    {
+      "epoch": 0.5342980381243913,
+      "grad_norm": 1.1163603067398071,
+      "learning_rate": 5e-05,
+      "loss": 1.6452,
+      "step": 1200
+    },
+    {
+      "epoch": 0.5565604563795742,
+      "grad_norm": 1.025298833847046,
+      "learning_rate": 5e-05,
+      "loss": 1.6468,
+      "step": 1250
+    },
+    {
+      "epoch": 0.5788228746347572,
+      "grad_norm": 0.9661399722099304,
+      "learning_rate": 5e-05,
+      "loss": 1.6377,
+      "step": 1300
+    },
+    {
+      "epoch": 0.6010852928899402,
+      "grad_norm": 0.9570266008377075,
+      "learning_rate": 5e-05,
+      "loss": 1.6525,
+      "step": 1350
+    },
+    {
+      "epoch": 0.6233477111451231,
+      "grad_norm": 0.9325594902038574,
+      "learning_rate": 5e-05,
+      "loss": 1.6443,
+      "step": 1400
+    },
+    {
+      "epoch": 0.6456101294003062,
+      "grad_norm": 1.071475625038147,
+      "learning_rate": 5e-05,
+      "loss": 1.6418,
+      "step": 1450
+    },
+    {
+      "epoch": 0.6678725476554891,
+      "grad_norm": 0.9684040546417236,
+      "learning_rate": 5e-05,
+      "loss": 1.6396,
+      "step": 1500
+    },
+    {
+      "epoch": 0.6678725476554891,
+      "eval_loss": 1.6649832725524902,
+      "eval_runtime": 40.9434,
+      "eval_samples_per_second": 390.075,
+      "eval_steps_per_second": 48.775,
+      "step": 1500
     }
   ],
   "logging_steps": 50,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 8275168727519232.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null