Training in progress, step 5200, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -286,9 +286,9 @@ print(embeddings.shape)
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
-
# tensor([[1.0000, 0.
|
| 290 |
-
# [0.
|
| 291 |
-
# [0.
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
@@ -1222,6 +1222,8 @@ You can finetune this model on your own dataset.
|
|
| 1222 |
</details>
|
| 1223 |
|
| 1224 |
### Training Logs
|
|
|
|
|
|
|
| 1225 |
| Epoch | Step | Training Loss |
|
| 1226 |
|:------:|:----:|:-------------:|
|
| 1227 |
| 0.0009 | 50 | 1.3738 |
|
|
@@ -1324,7 +1326,12 @@ You can finetune this model on your own dataset.
|
|
| 1324 |
| 0.0866 | 4900 | 0.4275 |
|
| 1325 |
| 0.0875 | 4950 | 0.5575 |
|
| 1326 |
| 0.0884 | 5000 | 0.4197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1327 |
|
|
|
|
| 1328 |
|
| 1329 |
### Framework Versions
|
| 1330 |
- Python: 3.12.11
|
|
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
+
# tensor([[1.0000, 0.6341, 0.1285],
|
| 290 |
+
# [0.6341, 1.0000, 0.1635],
|
| 291 |
+
# [0.1285, 0.1635, 1.0000]])
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
|
|
| 1222 |
</details>
|
| 1223 |
|
| 1224 |
### Training Logs
|
| 1225 |
+
<details><summary>Click to expand</summary>
|
| 1226 |
+
|
| 1227 |
| Epoch | Step | Training Loss |
|
| 1228 |
|:------:|:----:|:-------------:|
|
| 1229 |
| 0.0009 | 50 | 1.3738 |
|
|
|
|
| 1326 |
| 0.0866 | 4900 | 0.4275 |
|
| 1327 |
| 0.0875 | 4950 | 0.5575 |
|
| 1328 |
| 0.0884 | 5000 | 0.4197 |
|
| 1329 |
+
| 0.0892 | 5050 | 0.4525 |
|
| 1330 |
+
| 0.0901 | 5100 | 0.4469 |
|
| 1331 |
+
| 0.0910 | 5150 | 0.5283 |
|
| 1332 |
+
| 0.0919 | 5200 | 0.4826 |
|
| 1333 |
|
| 1334 |
+
</details>
|
| 1335 |
|
| 1336 |
### Framework Versions
|
| 1337 |
- Python: 3.12.11
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4af90c7f23b49e51d466a6d8a0d86939bf37e4d8ea5c0b655ab35b7c151a05cf
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:544baf7d60ab6e318f100fc6c2fccfb140e19ae226f8f7d820d236577b1da105
|
| 3 |
size 180609611
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11c0cb9bd2c9c6c5c964ae0bb4e4e4872958c4dc97ac96f40f1c118dec4c9803
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:afb655591367e732af512c0b489f6652e710a205ef4e8286da8729a948980ee0
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:407fdddb74e20b064ec7452aeeae963d95b777da5e4cde56c489f916bcf701b2
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -708,6 +708,34 @@
|
|
| 708 |
"learning_rate": 4.4160777385159016e-05,
|
| 709 |
"loss": 0.4197,
|
| 710 |
"step": 5000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 711 |
}
|
| 712 |
],
|
| 713 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.09188740259051793,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 5200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 708 |
"learning_rate": 4.4160777385159016e-05,
|
| 709 |
"loss": 0.4197,
|
| 710 |
"step": 5000
|
| 711 |
+
},
|
| 712 |
+
{
|
| 713 |
+
"epoch": 0.08923680443886838,
|
| 714 |
+
"grad_norm": 1.8962676525115967,
|
| 715 |
+
"learning_rate": 4.4602473498233214e-05,
|
| 716 |
+
"loss": 0.4525,
|
| 717 |
+
"step": 5050
|
| 718 |
+
},
|
| 719 |
+
{
|
| 720 |
+
"epoch": 0.09012033715608489,
|
| 721 |
+
"grad_norm": 2.1373822689056396,
|
| 722 |
+
"learning_rate": 4.5044169611307425e-05,
|
| 723 |
+
"loss": 0.4469,
|
| 724 |
+
"step": 5100
|
| 725 |
+
},
|
| 726 |
+
{
|
| 727 |
+
"epoch": 0.0910038698733014,
|
| 728 |
+
"grad_norm": 5.542126178741455,
|
| 729 |
+
"learning_rate": 4.548586572438163e-05,
|
| 730 |
+
"loss": 0.5283,
|
| 731 |
+
"step": 5150
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"epoch": 0.09188740259051793,
|
| 735 |
+
"grad_norm": 2.4414310455322266,
|
| 736 |
+
"learning_rate": 4.5927561837455834e-05,
|
| 737 |
+
"loss": 0.4826,
|
| 738 |
+
"step": 5200
|
| 739 |
}
|
| 740 |
],
|
| 741 |
"logging_steps": 50,
|