Model save

Files changed (5) hide show

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/polyagent/huggingface/runs/26qriwow)
 This model was trained with SFT.

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/polyagent/huggingface/runs/y5uti29q)
 This model was trained with SFT.

all_results.json CHANGED Viewed

@@ -2,8 +2,8 @@
     "epoch": 3.0,
     "total_flos": 5.361516984661967e+18,
     "train_loss": 5.0057218712328115,
-    "train_runtime": 5286.5666,
     "train_samples": 95663,
-    "train_samples_per_second": 9.883,
-    "train_steps_per_second": 0.155
 }

     "epoch": 3.0,
     "total_flos": 5.361516984661967e+18,
     "train_loss": 5.0057218712328115,
+    "train_runtime": 5088.8536,
     "train_samples": 95663,
+    "train_samples_per_second": 10.267,
+    "train_steps_per_second": 0.161
 }

runs/Dec26_13-37-26_mia1-gpu-110/events.out.tfevents.1735220304.mia1-gpu-110.3868266.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3f91fe0fec6ba8b51210a5dc7b4913f9ca9c102ec0d49ff5266443641ace8c4
-size 41013

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4cb7415ed71bc4ed48c3eb38d592b50b0a560f5415b73602343827db106d79c
+size 41367

train_results.json CHANGED Viewed

@@ -2,8 +2,8 @@
     "epoch": 3.0,
     "total_flos": 5.361516984661967e+18,
     "train_loss": 5.0057218712328115,
-    "train_runtime": 5286.5666,
     "train_samples": 95663,
-    "train_samples_per_second": 9.883,
-    "train_steps_per_second": 0.155
 }

     "epoch": 3.0,
     "total_flos": 5.361516984661967e+18,
     "train_loss": 5.0057218712328115,
+    "train_runtime": 5088.8536,
     "train_samples": 95663,
+    "train_samples_per_second": 10.267,
+    "train_steps_per_second": 0.161
 }

trainer_state.json CHANGED Viewed

@@ -438,9 +438,9 @@
     {
       "epoch": 1.098901098901099,
       "eval_loss": 4.895308017730713,
-      "eval_runtime": 14.6132,
-      "eval_samples_per_second": 36.2,
-      "eval_steps_per_second": 1.163,
       "step": 300
     },
     {
@@ -866,9 +866,9 @@
     {
       "epoch": 2.197802197802198,
       "eval_loss": 4.718916893005371,
-      "eval_runtime": 14.5971,
-      "eval_samples_per_second": 36.24,
-      "eval_steps_per_second": 1.165,
       "step": 600
     },
     {
@@ -1177,9 +1177,9 @@
       "step": 819,
       "total_flos": 5.361516984661967e+18,
       "train_loss": 5.0057218712328115,
-      "train_runtime": 5286.5666,
-      "train_samples_per_second": 9.883,
-      "train_steps_per_second": 0.155
     }
   ],
   "logging_steps": 5,

     {
       "epoch": 1.098901098901099,
       "eval_loss": 4.895308017730713,
+      "eval_runtime": 14.5245,
+      "eval_samples_per_second": 36.421,
+      "eval_steps_per_second": 1.17,
       "step": 300
     },
     {
     {
       "epoch": 2.197802197802198,
       "eval_loss": 4.718916893005371,
+      "eval_runtime": 14.5284,
+      "eval_samples_per_second": 36.412,
+      "eval_steps_per_second": 1.17,
       "step": 600
     },
     {
       "step": 819,
       "total_flos": 5.361516984661967e+18,
       "train_loss": 5.0057218712328115,
+      "train_runtime": 5088.8536,
+      "train_samples_per_second": 10.267,
+      "train_steps_per_second": 0.161
     }
   ],
   "logging_steps": 5,