Training in progress, step 5000, checkpoint

Files changed (7) hide show

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ffe7afe5c070fab96ca43ba7d40ffa5a5697d168401423ad6c0ad6e75a43f93b
 size 6710771584

 version https://git-lfs.github.com/spec/v1
+oid sha256:82bc6140c804b5d3375fee96fc07573ed3f7f3bd1827010bfb2d2cba57c7527e
 size 6710771584

last-checkpoint/pytorch_model-00001-of-00003.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34683cb4007c3b92110f7840142f7ce353dc35fa8c47d3906f2a8292351af604
 size 4986145435

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a9e4393d35049ad958dc05a7893ce3d9fc4b21c4800a5cb9d0632332aab1743
 size 4986145435

last-checkpoint/pytorch_model-00002-of-00003.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7892cbbcf16ff8490e2d0a329d8594e2218d984e8584083ac3da0573e6d508dd
 size 4985796040

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d05fe05ad2b03594c78064a4e20af41c072f2551802c62e8cc765ab2658740c
 size 4985796040

last-checkpoint/pytorch_model-00003-of-00003.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e4147dc29ced8cfe98d7c7047558106240377827105c3d76a5ee8d8e17d7754
 size 3407874846

 version https://git-lfs.github.com/spec/v1
+oid sha256:5627e77582102da559297a40c571e3fd53fef55c7f74cf114eb83aca4b1518ce
 size 3407874846

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4fbdd49ae32378de0eb7b581f65e4edbef5eb7583e3157a829c62678554ea54
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c50f902d46e1503d63201e520cf2d2a219286c093050136afe0283b841bb99b9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47132a874ad4fda1d1f651cb8d7cf3a21ccb1e85b7d1f0c122a2dcf69a4af0ac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a9b177af396170d7a7558a5fee9e05fa92b1c1bfde5ab937b9c9b2a05a9e953
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.1881097555160522,
   "best_model_checkpoint": "../facebook/nllb-200-3.3B-finetuned/checkpoint-1000",
-  "epoch": 4500.0,
   "eval_steps": 500,
-  "global_step": 4500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -160,6 +160,23 @@
       "eval_samples_per_second": 0.287,
       "eval_steps_per_second": 0.144,
       "step": 4500
     }
   ],
   "logging_steps": 500,
@@ -167,7 +184,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5000,
   "save_steps": 500,
-  "total_flos": 2.8630196748288e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.1881097555160522,
   "best_model_checkpoint": "../facebook/nllb-200-3.3B-finetuned/checkpoint-1000",
+  "epoch": 5000.0,
   "eval_steps": 500,
+  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 0.287,
       "eval_steps_per_second": 0.144,
       "step": 4500
+    },
+    {
+      "epoch": 5000.0,
+      "grad_norm": 0.029535507783293724,
+      "learning_rate": 8.882641330809627e-12,
+      "loss": 0.0016,
+      "step": 5000
+    },
+    {
+      "epoch": 5000.0,
+      "eval_gen_len": 24.5,
+      "eval_loss": 1.4524964094161987,
+      "eval_rouge": 0.0357,
+      "eval_runtime": 6.9555,
+      "eval_samples_per_second": 0.288,
+      "eval_steps_per_second": 0.144,
+      "step": 5000
     }
   ],
   "logging_steps": 500,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5000,
   "save_steps": 500,
+  "total_flos": 3.181132972032e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null