Training in progress, step 5200, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -286,9 +286,9 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[1.0000, 0.6188, 0.1217],
-#         [0.6188, 1.0000, 0.1507],
-#         [0.1217, 0.1507, 1.0000]])
 ```
 <!--
@@ -1222,6 +1222,8 @@ You can finetune this model on your own dataset.
 </details>
 ### Training Logs
 | Epoch  | Step | Training Loss |
 |:------:|:----:|:-------------:|
 | 0.0009 | 50   | 1.3738        |
@@ -1324,7 +1326,12 @@ You can finetune this model on your own dataset.
 | 0.0866 | 4900 | 0.4275        |
 | 0.0875 | 4950 | 0.5575        |
 | 0.0884 | 5000 | 0.4197        |
 ### Framework Versions
 - Python: 3.12.11

 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[1.0000, 0.6341, 0.1285],
+#         [0.6341, 1.0000, 0.1635],
+#         [0.1285, 0.1635, 1.0000]])
 ```
 <!--
 </details>
 ### Training Logs
+<details><summary>Click to expand</summary>
 | Epoch  | Step | Training Loss |
 |:------:|:----:|:-------------:|
 | 0.0009 | 50   | 1.3738        |
 | 0.0866 | 4900 | 0.4275        |
 | 0.0875 | 4950 | 0.5575        |
 | 0.0884 | 5000 | 0.4197        |
+| 0.0892 | 5050 | 0.4525        |
+| 0.0901 | 5100 | 0.4469        |
+| 0.0910 | 5150 | 0.5283        |
+| 0.0919 | 5200 | 0.4826        |
+</details>
 ### Framework Versions
 - Python: 3.12.11

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90e4d2bbf59fbcb40ff4b4462a436528ececec21cfbe639ba0aa1880e8a048b9
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:4af90c7f23b49e51d466a6d8a0d86939bf37e4d8ea5c0b655ab35b7c151a05cf
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:498b25c3a0cdf659b68bca58a1fe81b240dadbd1c0686b5aa30fdf8a0f4407de
 size 180609611

 version https://git-lfs.github.com/spec/v1
+oid sha256:544baf7d60ab6e318f100fc6c2fccfb140e19ae226f8f7d820d236577b1da105
 size 180609611

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bbd110dd6b99c908a73a401be3d8438b48f355d5cb710bfb9c01fca2894f5f8e
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:11c0cb9bd2c9c6c5c964ae0bb4e4e4872958c4dc97ac96f40f1c118dec4c9803
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82ad8990572ad11a824b7db276c8af49c179ca7e7724b4e6906cd0ae480a80a8
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:afb655591367e732af512c0b489f6652e710a205ef4e8286da8729a948980ee0
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77d3a1390f85cf6329aeb072176eb6782a1d00623e0775f00f722a024157ca78
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:407fdddb74e20b064ec7452aeeae963d95b777da5e4cde56c489f916bcf701b2
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.08835327172165185,
   "eval_steps": 500,
-  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -708,6 +708,34 @@
       "learning_rate": 4.4160777385159016e-05,
       "loss": 0.4197,
       "step": 5000
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.09188740259051793,
   "eval_steps": 500,
+  "global_step": 5200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.4160777385159016e-05,
       "loss": 0.4197,
       "step": 5000
+    },
+    {
+      "epoch": 0.08923680443886838,
+      "grad_norm": 1.8962676525115967,
+      "learning_rate": 4.4602473498233214e-05,
+      "loss": 0.4525,
+      "step": 5050
+    },
+    {
+      "epoch": 0.09012033715608489,
+      "grad_norm": 2.1373822689056396,
+      "learning_rate": 4.5044169611307425e-05,
+      "loss": 0.4469,
+      "step": 5100
+    },
+    {
+      "epoch": 0.0910038698733014,
+      "grad_norm": 5.542126178741455,
+      "learning_rate": 4.548586572438163e-05,
+      "loss": 0.5283,
+      "step": 5150
+    },
+    {
+      "epoch": 0.09188740259051793,
+      "grad_norm": 2.4414310455322266,
+      "learning_rate": 4.5927561837455834e-05,
+      "loss": 0.4826,
+      "step": 5200
     }
   ],
   "logging_steps": 50,