Training in progress, step 5400, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -286,9 +286,9 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[1.0000, 0.6341, 0.1285],
-#         [0.6341, 1.0000, 0.1635],
-#         [0.1285, 0.1635, 1.0000]])
 ```
 <!--
@@ -1330,6 +1330,10 @@ You can finetune this model on your own dataset.
 | 0.0901 | 5100 | 0.4469        |
 | 0.0910 | 5150 | 0.5283        |
 | 0.0919 | 5200 | 0.4826        |
 </details>

 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[1.0000, 0.6268, 0.1112],
+#         [0.6268, 1.0000, 0.1476],
+#         [0.1112, 0.1476, 1.0000]])
 ```
 <!--
 | 0.0901 | 5100 | 0.4469        |
 | 0.0910 | 5150 | 0.5283        |
 | 0.0919 | 5200 | 0.4826        |
+| 0.0928 | 5250 | 0.3895        |
+| 0.0937 | 5300 | 0.4873        |
+| 0.0945 | 5350 | 0.4895        |
+| 0.0954 | 5400 | 0.4686        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4af90c7f23b49e51d466a6d8a0d86939bf37e4d8ea5c0b655ab35b7c151a05cf
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:1a7b3038038d614d02d13200e3426ce1cee9998e3dd720f3ef8373fd32bc320c
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:544baf7d60ab6e318f100fc6c2fccfb140e19ae226f8f7d820d236577b1da105
 size 180609611

 version https://git-lfs.github.com/spec/v1
+oid sha256:da23c5a242ed1a18ccfa540abf270f8af47081a60b90b0528855f1e52bf8ac00
 size 180609611

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11c0cb9bd2c9c6c5c964ae0bb4e4e4872958c4dc97ac96f40f1c118dec4c9803
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:43b1cabcf4d622f4eeb0e2e256d17e427254b320b0d958e0925925fc3965ffc2
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:afb655591367e732af512c0b489f6652e710a205ef4e8286da8729a948980ee0
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:93d3edcb87fa33fd6a7048f57178907c582fac7657ef4f6ed7cd1a489f3a9b01
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:407fdddb74e20b064ec7452aeeae963d95b777da5e4cde56c489f916bcf701b2
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:cde96e015f065de3d9994d1a7735820057fec16cad19c67655b731af60c92ecc
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.09188740259051793,
   "eval_steps": 500,
-  "global_step": 5200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -736,6 +736,34 @@
       "learning_rate": 4.5927561837455834e-05,
       "loss": 0.4826,
       "step": 5200
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.095421533459384,
   "eval_steps": 500,
+  "global_step": 5400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.5927561837455834e-05,
       "loss": 0.4826,
       "step": 5200
+    },
+    {
+      "epoch": 0.09277093530773445,
+      "grad_norm": 3.52422833442688,
+      "learning_rate": 4.636925795053004e-05,
+      "loss": 0.3895,
+      "step": 5250
+    },
+    {
+      "epoch": 0.09365446802495096,
+      "grad_norm": 2.1975631713867188,
+      "learning_rate": 4.681095406360424e-05,
+      "loss": 0.4873,
+      "step": 5300
+    },
+    {
+      "epoch": 0.09453800074216748,
+      "grad_norm": 3.4910616874694824,
+      "learning_rate": 4.725265017667845e-05,
+      "loss": 0.4895,
+      "step": 5350
+    },
+    {
+      "epoch": 0.095421533459384,
+      "grad_norm": 2.1225690841674805,
+      "learning_rate": 4.769434628975265e-05,
+      "loss": 0.4686,
+      "step": 5400
     }
   ],
   "logging_steps": 50,