Training in progress, step 1000

Browse files

Files changed (13) hide show

final/1_Pooling/config.json +10 -0
final/README.md +652 -0
final/config.json +24 -0
final/config_sentence_transformers.json +10 -0
final/model.safetensors +3 -0
final/modules.json +14 -0
final/sentence_bert_config.json +4 -0
final/special_tokens_map.json +44 -0
final/tokenizer.json +0 -0
final/tokenizer_config.json +72 -0
final/vocab.txt +0 -0
model.safetensors +1 -1
training_args.bin +3 -0

final/1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 384,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

final/README.md ADDED Viewed

	@@ -0,0 +1,652 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:2839738
+- loss:CoSENTLoss
+base_model: Mihaiii/gte-micro-v4
+widget:
+- source_sentence: 314d5e89-55f7-42b4-af19-d4d0f499a265_c808a8ec-895c-4777-9e11-e83ce34eddef
+  sentences:
+  - https://cards.scryfall.io/normal/front/3/1/314d5e89-55f7-42b4-af19-d4d0f499a265.jpg?1710406384
+  - https://cards.scryfall.io/normal/front/c/8/c808a8ec-895c-4777-9e11-e83ce34eddef.jpg?1593272714
+  - 'Title: Killer Instinct
+    Cost: {4}{R}{G}
+    Colors: [''G'', ''R'']
+    Type: Enchantment
+    Desc: At the beginning of your upkeep, reveal the top card of your library. If
+    it''s a creature card, put it onto the battlefield. That creature gains haste
+    until end of turn. Sacrifice it at the beginning of the next end step.'
+  - 'Title: Ixidor, Reality Sculptor
+    Cost: {3}{U}{U}
+    Colors: [''U'']
+    Type: Legendary Creature — Human Wizard
+    Desc: Face-down creatures get +1/+1.
+    {2}{U}: Turn target face-down creature face up.'
+- source_sentence: a252a1f5-bba5-4525-8141-57caea9624e9_5fd29cd7-9950-49c0-9e71-d6b0f944292c
+  sentences:
+  - https://cards.scryfall.io/normal/front/5/f/5fd29cd7-9950-49c0-9e71-d6b0f944292c.jpg?1637627823
+  - 'Title: Celestial Judgment
+    Cost: {4}{W}{W}
+    Colors: [''W'']
+    Type: Sorcery
+    Desc: For each different power among creatures on the battlefield, choose a creature
+    with that power. Destroy each creature not chosen this way.'
+  - 'Title: Gibbering Hyenas
+    Cost: {2}{G}
+    Colors: [''G'']
+    Type: Creature — Hyena
+    Desc: This creature can''t block black creatures.'
+  - https://cards.scryfall.io/normal/front/a/2/a252a1f5-bba5-4525-8141-57caea9624e9.jpg?1562720953
+- source_sentence: 0d09c2c8-526b-4693-bbaa-109911ce5281_1a47da7c-80f3-4b98-aaac-778c34a35cb6
+  sentences:
+  - https://cards.scryfall.io/normal/front/1/a/1a47da7c-80f3-4b98-aaac-778c34a35cb6.jpg?1561817948
+  - 'Title: Corpse Harvester
+    Cost: {3}{B}{B}
+    Colors: [''B'']
+    Type: Creature — Zombie Wizard
+    Desc: {1}{B}, {T}, Sacrifice a creature: Search your library for a Zombie card
+    and a Swamp card, reveal them, put them into your hand, then shuffle.'
+  - https://cards.scryfall.io/normal/front/0/d/0d09c2c8-526b-4693-bbaa-109911ce5281.jpg?1562897662
+  - 'Title: Master Biomancer
+    Cost: {2}{G}{U}
+    Colors: [''G'', ''U'']
+    Type: Creature — Elf Wizard
+    Desc: Each other creature you control enters with a number of additional +1/+1
+    counters on it equal to this creature''s power and as a Mutant in addition to
+    its other types.'
+- source_sentence: 938d5157-154c-4300-82d4-0e23d934d436_10be9a82-4008-45ae-a739-fdee95e39619
+  sentences:
+  - https://cards.scryfall.io/normal/front/9/3/938d5157-154c-4300-82d4-0e23d934d436.jpg?1562922364
+  - https://cards.scryfall.io/normal/front/1/0/10be9a82-4008-45ae-a739-fdee95e39619.jpg?1711892785
+  - 'Title: Shadow of Doubt
+    Cost: {U/B}{U/B}
+    Colors: [''B'', ''U'']
+    Type: Instant
+    Desc: ({U/B} can be paid with either {U} or {B}.)
+    Players can''t search libraries this turn.
+    Draw a card.'
+  - 'Title: Stone-Tongue Basilisk
+    Cost: {4}{G}{G}{G}
+    Colors: [''G'']
+    Type: Creature — Basilisk
+    Desc: Whenever this creature deals combat damage to a creature, destroy that creature
+    at end of combat.
+    Threshold — As long as seven or more cards are in your graveyard, all creatures
+    able to block this creature do so.'
+- source_sentence: 141a031d-f899-497b-adf7-4af142078085_0367fac8-6990-4544-ac7d-ed363b55a9cf
+  sentences:
+  - 'Title: Quirion Explorer
+    Cost: {1}{G}
+    Colors: [''G'']
+    Type: Creature — Elf Druid Scout
+    Desc: {T}: Add one mana of any color that a land an opponent controls could produce.'
+  - https://cards.scryfall.io/normal/front/1/4/141a031d-f899-497b-adf7-4af142078085.jpg?1562899241
+  - 'Title: Savage Hunger
+    Cost: {2}{G}
+    Colors: [''G'']
+    Type: Enchantment — Aura
+    Desc: Enchant creature
+    Enchanted creature gets +1/+0 and has trample.
+    Cycling {2} ({2}, Discard this card: Draw a card.)'
+  - https://cards.scryfall.io/normal/front/0/3/0367fac8-6990-4544-ac7d-ed363b55a9cf.jpg?1562700664
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+metrics:
+- pearson_cosine
+- spearman_cosine
+model-index:
+- name: SentenceTransformer based on Mihaiii/gte-micro-v4
+  results:
+  - task:
+      type: semantic-similarity
+      name: Semantic Similarity
+    dataset:
+      name: sts dev
+      type: sts-dev
+    metrics:
+    - type: pearson_cosine
+      value: 0.3217344770453035
+      name: Pearson Cosine
+    - type: spearman_cosine
+      value: 0.33145577598581166
+      name: Spearman Cosine
+    - type: pearson_cosine
+      value: 0.43782959181274894
+      name: Pearson Cosine
+    - type: spearman_cosine
+      value: 0.4808140058026093
+      name: Spearman Cosine
+  - task:
+      type: semantic-similarity
+      name: Semantic Similarity
+    dataset:
+      name: sts test
+      type: sts-test
+    metrics:
+    - type: pearson_cosine
+      value: 0.5782060287197249
+      name: Pearson Cosine
+    - type: spearman_cosine
+      value: 0.6348407516031069
+      name: Spearman Cosine
+---
+# SentenceTransformer based on Mihaiii/gte-micro-v4
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Mihaiii/gte-micro-v4](https://huggingface.co/Mihaiii/gte-micro-v4) on the json dataset. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [Mihaiii/gte-micro-v4](https://huggingface.co/Mihaiii/gte-micro-v4) <!-- at revision 78e1a4b348f8524c3ab2e3e3475788f5adb8c98f -->
+- **Maximum Sequence Length:** 512 tokens
+- **Output Dimensionality:** 384 dimensions
+- **Similarity Function:** Cosine Similarity
+- **Training Dataset:**
+    - json
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
+  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("sentence_transformers_model_id")
+# Run inference
+sentences = [
+    '141a031d-f899-497b-adf7-4af142078085_0367fac8-6990-4544-ac7d-ed363b55a9cf',
+    "Title: Quirion Explorer\nCost: {1}{G}\nColors: ['G']\nType: Creature — Elf Druid Scout\nDesc: {T}: Add one mana of any color that a land an opponent controls could produce.",
+    "Title: Savage Hunger\nCost: {2}{G}\nColors: ['G']\nType: Enchantment — Aura\nDesc: Enchant creature\nEnchanted creature gets +1/+0 and has trample.\nCycling {2} ({2}, Discard this card: Draw a card.)",
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 384]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Semantic Similarity
+* Datasets: `sts-dev` and `sts-test`
+* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
+| Metric              | sts-dev    | sts-test   |
+|:--------------------|:-----------|:-----------|
+| pearson_cosine      | 0.3217     | 0.5782     |
+| **spearman_cosine** | **0.3315** | **0.6348** |
+#### Semantic Similarity
+* Dataset: `sts-dev`
+* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
+| Metric              | Value      |
+|:--------------------|:-----------|
+| pearson_cosine      | 0.4378     |
+| **spearman_cosine** | **0.4808** |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### json
+* Dataset: json
+* Size: 2,839,738 training samples
+* Columns: <code>uuid</code>, <code>sentence_1</code>, <code>sentence_2</code>, <code>image_1</code>, <code>image_2</code>, and <code>score</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | uuid                                                                               | sentence_1                                                                         | sentence_2                                                                          | image_1                                                                            | image_2                                                                            | score                                                          |
+  |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:---------------------------------------------------------------|
+  | type    | string                                                                             | string                                                                             | string                                                                              | string                                                                             | string                                                                             | float                                                          |
+  | details | <ul><li>min: 49 tokens</li><li>mean: 56.99 tokens</li><li>max: 65 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 69.4 tokens</li><li>max: 180 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 68.59 tokens</li><li>max: 166 tokens</li></ul> | <ul><li>min: 53 tokens</li><li>mean: 58.17 tokens</li><li>max: 64 tokens</li></ul> | <ul><li>min: 52 tokens</li><li>mean: 58.28 tokens</li><li>max: 64 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.13</li><li>max: 0.5</li></ul> |
+* Samples:
+  | uuid                                                                                   | sentence_1                                                                                                                                                                                                                                                                                                     | sentence_2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | image_1                                                                                                     | image_2                                                                                                     | score             |
+  |:---------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------|:------------------|
+  | <code>08f9b863-10b7-46d6-badd-97381e6c7c5e_4330efa7-a11b-4776-9fb0-1cae8aed67b1</code> | <code>Title: Blast Zone<br>Type: Land<br>Desc: This land enters with a charge counter on it.<br>{T}: Add {C}.<br>{X}{X}, {T}: Put X charge counters on this land.<br>{3}, {T}, Sacrifice this land: Destroy each nonland permanent with mana value equal to the number of charge counters on this land.</code> | <code>Title: Tom van de Logt Bio (2000)<br>Type: Card<br>Desc: Quarterfinalist Tom van de Logt posted a perfect 6—0 record during the Standard portion of this year's World Championships. The 19-year-old Groesbeek, Holland native was playing a deck that had a big impact on the metagame this year, "Replenish." This deck used cards like Attunement and Frantic Search to put powerful enchantments, such as Parallax Wave and Opalescence, into the graveyard and then used Replenish to put them all back into play at once.</code> | <code>https://cards.scryfall.io/normal/front/0/8/08f9b863-10b7-46d6-badd-97381e6c7c5e.jpg?1674423042</code> | <code>https://cards.scryfall.io/normal/front/4/3/4330efa7-a11b-4776-9fb0-1cae8aed67b1.jpg?1562767017</code> | <code>0.25</code> |
+  | <code>abe9cf1e-d398-41e0-8b11-afe1015e4fd9_40cb67f7-b4e1-423b-8f55-d44ed383e778</code> | <code>Title: Coral Net<br>Cost: {U}<br>Colors: ['U']<br>Type: Enchantment — Aura<br>Desc: Enchant green or white creature<br>Enchanted creature has "At the beginning of your upkeep, sacrifice this creature unless you discard a card."</code>                                                               | <code>Title: Silumgar Butcher<br>Cost: {4}{B}<br>Colors: ['B']<br>Type: Creature — Zombie Djinn<br>Desc: Exploit (When this creature enters, you may sacrifice a creature.)<br>When this creature exploits a creature, target creature gets -3/-3 until end of turn.</code>                                                                                                                                                                                                                                                                  | <code>https://cards.scryfall.io/normal/front/a/b/abe9cf1e-d398-41e0-8b11-afe1015e4fd9.jpg?1562631469</code> | <code>https://cards.scryfall.io/normal/front/4/0/40cb67f7-b4e1-423b-8f55-d44ed383e778.jpg?1562785294</code> | <code>0.0</code>  |
+  | <code>3dd13408-b4db-42e7-bf3c-d46716538a7c_05a6dc90-3997-4911-8bd6-854c85eca35b</code> | <code>Title: Rishadan Brigand<br>Cost: {4}{U}<br>Colors: ['U']<br>Type: Creature — Human Pirate<br>Desc: Flying<br>When this creature enters, each opponent sacrifices a permanent of their choice unless they pay {3}.<br>This creature can block only creatures with flying.</code>                          | <code>Title: Banishing Stroke<br>Cost: {5}{W}<br>Colors: ['W']<br>Type: Instant<br>Desc: Put target artifact, creature, or enchantment on the bottom of its owner's library.<br>Miracle {W} (You may cast this card for its miracle cost when you draw it if it's the first card you drew this turn.)</code>                                                                                                                                                                                                                                 | <code>https://cards.scryfall.io/normal/front/3/d/3dd13408-b4db-42e7-bf3c-d46716538a7c.jpg?1632145390</code> | <code>https://cards.scryfall.io/normal/front/0/5/05a6dc90-3997-4911-8bd6-854c85eca35b.jpg?1723433851</code> | <code>0.0</code>  |
+* Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "pairwise_cos_sim"
+  }
+  ```
+### Evaluation Dataset
+#### json
+* Dataset: json
+* Size: 74,730 evaluation samples
+* Columns: <code>uuid</code>, <code>sentence_1</code>, <code>sentence_2</code>, <code>image_1</code>, <code>image_2</code>, and <code>score</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | uuid                                                                              | sentence_1                                                                          | sentence_2                                                                          | image_1                                                                            | image_2                                                                            | score                                                           |
+  |:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:----------------------------------------------------------------|
+  | type    | string                                                                            | string                                                                              | string                                                                              | string                                                                             | string                                                                             | float                                                           |
+  | details | <ul><li>min: 50 tokens</li><li>mean: 56.9 tokens</li><li>max: 65 tokens</li></ul> | <ul><li>min: 14 tokens</li><li>mean: 68.44 tokens</li><li>max: 181 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 69.49 tokens</li><li>max: 179 tokens</li></ul> | <ul><li>min: 52 tokens</li><li>mean: 58.22 tokens</li><li>max: 64 tokens</li></ul> | <ul><li>min: 52 tokens</li><li>mean: 58.21 tokens</li><li>max: 64 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.12</li><li>max: 0.75</li></ul> |
+* Samples:
+  | uuid                                                                                   | sentence_1                                                                                                                                                                                                                                                                                                                                                                                                          | sentence_2                                                                                                                                                                                                                                                                                                                                                             | image_1                                                                                                     | image_2                                                                                                     | score             |
+  |:---------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------|:------------------|
+  | <code>6bdd8645-aee9-44cb-acaa-2674f55cdf2f_b34bb149-2e50-462e-8b83-5c8339bb3aff</code> | <code>Title: Syr Cadian, Knight Owl<br>Cost: {3}{W}{W}<br>Colors: ['W']<br>Type: Legendary Creature — Bird Knight<br>Desc: Knightlifelink (Damage dealt by Knights you control also causes you to gain that much life.)<br>{W}: Syr Cadian gains vigilance until end of turn. Activate only from sunrise to sunset.<br>{B}: Syr Cadian gains flying until end of turn. Activate only from sunset to sunrise.</code> | <code>Title: Non-Human Cannonball<br>Cost: {2}{R}<br>Colors: ['R']<br>Type: Artifact Creature — Clown Robot<br>Desc: When this creature dies, roll a six-sided die. If the result is 4 or less, this creature deals that much damage to you.</code>                                                                                                                    | <code>https://cards.scryfall.io/normal/front/6/b/6bdd8645-aee9-44cb-acaa-2674f55cdf2f.jpg?1664317187</code> | <code>https://cards.scryfall.io/normal/front/b/3/b34bb149-2e50-462e-8b83-5c8339bb3aff.jpg?1673917877</code> | <code>0.25</code> |
+  | <code>860f4304-38f1-4c2f-a122-2590619522fd_08d6db9b-b2da-4148-aa49-8c2fecac6e32</code> | <code>Title: Hindering Light<br>Cost: {W}{U}<br>Colors: ['U', 'W']<br>Type: Instant<br>Desc: Counter target spell that targets you or a permanent you control.<br>Draw a card.</code>                                                                                                                                                                                                                               | <code>Title: Gleam of Resistance<br>Cost: {4}{W}<br>Colors: ['W']<br>Type: Instant<br>Desc: Creatures you control get +1/+2 until end of turn. Untap those creatures.<br>Basic landcycling {1}{W} ({1}{W}, Discard this card: Search your library for a basic land card, reveal it, put it into your hand, then shuffle.)</code>                                       | <code>https://cards.scryfall.io/normal/front/8/6/860f4304-38f1-4c2f-a122-2590619522fd.jpg?1712353583</code> | <code>https://cards.scryfall.io/normal/front/0/8/08d6db9b-b2da-4148-aa49-8c2fecac6e32.jpg?1573505575</code> | <code>0.25</code> |
+  | <code>91b448f4-aa0c-42c7-a771-e8dd20e0520c_46f810c2-310e-42f5-ab1f-d56396cf5124</code> | <code>Title: Practiced Tactics<br>Cost: {W}<br>Colors: ['W']<br>Type: Instant<br>Desc: Choose target attacking or blocking creature. Practiced Tactics deals damage to that creature equal to twice the number of creatures in your party. (Your party consists of up to one each of Cleric, Rogue, Warrior, and Wizard.)</code>                                                                                    | <code>Title: Anointer Priest<br>Cost: {1}{W}<br>Colors: ['W']<br>Type: Creature — Human Cleric<br>Desc: Whenever a creature token you control enters, you gain 1 life.<br>Embalm {3}{W} ({3}{W}, Exile this card from your graveyard: Create a token that's a copy of it, except it's a white Zombie Human Cleric with no mana cost. Embalm only as a sorcery.)</code> | <code>https://cards.scryfall.io/normal/front/9/1/91b448f4-aa0c-42c7-a771-e8dd20e0520c.jpg?1604192922</code> | <code>https://cards.scryfall.io/normal/front/4/6/46f810c2-310e-42f5-ab1f-d56396cf5124.jpg?1599769231</code> | <code>0.25</code> |
+* Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "pairwise_cos_sim"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_train_batch_size`: 64
+- `per_device_eval_batch_size`: 64
+- `learning_rate`: 2e-05
+- `num_train_epochs`: 1
+- `warmup_ratio`: 0.1
+- `log_level_replica`: passive
+- `log_on_each_node`: False
+- `logging_nan_inf_filter`: False
+- `fp16`: True
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 64
+- `per_device_eval_batch_size`: 64
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: passive
+- `log_on_each_node`: False
+- `logging_nan_inf_filter`: False
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: True
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `tp_size`: 0
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `dispatch_batches`: None
+- `split_batches`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+</details>
+### Training Logs
+| Epoch  | Step  | Training Loss | sts-dev_spearman_cosine | sts-test_spearman_cosine |
+|:------:|:-----:|:-------------:|:-----------------------:|:------------------------:|
+| -1     | -1    | -             | 0.3315                  | -                        |
+| 0.0014 | 500   | 2.7624        | -                       | -                        |
+| 0.0028 | 1000  | 2.7106        | -                       | -                        |
+| 0.0042 | 1500  | 2.7205        | -                       | -                        |
+| 0.0056 | 2000  | 2.6742        | -                       | -                        |
+| 0.0070 | 2500  | 2.6359        | -                       | -                        |
+| -1     | -1    | -             | 0.4808                  | -                        |
+| 0.0113 | 500   | 6.8841        | -                       | -                        |
+| 0.0225 | 1000  | 6.8706        | -                       | -                        |
+| 0.0338 | 1500  | 6.8388        | -                       | -                        |
+| 0.0451 | 2000  | 6.8323        | -                       | -                        |
+| 0.0563 | 2500  | 6.8277        | -                       | -                        |
+| 0.0676 | 3000  | 6.8306        | -                       | -                        |
+| 0.0789 | 3500  | 6.833         | -                       | -                        |
+| 0.0901 | 4000  | 6.8267        | -                       | -                        |
+| 0.1014 | 4500  | 6.8323        | -                       | -                        |
+| 0.1127 | 5000  | 6.8293        | -                       | -                        |
+| 0.1240 | 5500  | 6.8384        | -                       | -                        |
+| 0.1352 | 6000  | 6.8265        | -                       | -                        |
+| 0.1465 | 6500  | 6.8205        | -                       | -                        |
+| 0.1578 | 7000  | 6.8257        | -                       | -                        |
+| 0.1690 | 7500  | 6.8167        | -                       | -                        |
+| 0.1803 | 8000  | 6.8171        | -                       | -                        |
+| 0.1916 | 8500  | 6.8221        | -                       | -                        |
+| 0.2028 | 9000  | 6.8208        | -                       | -                        |
+| 0.2141 | 9500  | 6.8301        | -                       | -                        |
+| 0.2254 | 10000 | 6.8166        | -                       | -                        |
+| 0.2366 | 10500 | 6.8143        | -                       | -                        |
+| 0.2479 | 11000 | 6.8184        | -                       | -                        |
+| 0.2592 | 11500 | 6.8274        | -                       | -                        |
+| 0.2704 | 12000 | 6.8339        | -                       | -                        |
+| 0.2817 | 12500 | 6.8273        | -                       | -                        |
+| 0.2930 | 13000 | 6.8338        | -                       | -                        |
+| 0.3043 | 13500 | 6.821         | -                       | -                        |
+| 0.3155 | 14000 | 6.8375        | -                       | -                        |
+| 0.3268 | 14500 | 6.8219        | -                       | -                        |
+| 0.3381 | 15000 | 6.8277        | -                       | -                        |
+| 0.3493 | 15500 | 6.8248        | -                       | -                        |
+| 0.3606 | 16000 | 6.8234        | -                       | -                        |
+| 0.3719 | 16500 | 6.8215        | -                       | -                        |
+| 0.3831 | 17000 | 6.823         | -                       | -                        |
+| 0.3944 | 17500 | 6.8287        | -                       | -                        |
+| 0.4057 | 18000 | 6.8226        | -                       | -                        |
+| 0.4169 | 18500 | 6.8179        | -                       | -                        |
+| 0.4282 | 19000 | 6.8142        | -                       | -                        |
+| 0.4395 | 19500 | 6.82          | -                       | -                        |
+| 0.4507 | 20000 | 6.8243        | -                       | -                        |
+| 0.4620 | 20500 | 6.8185        | -                       | -                        |
+| 0.4733 | 21000 | 6.8191        | -                       | -                        |
+| 0.4846 | 21500 | 6.8318        | -                       | -                        |
+| 0.4958 | 22000 | 6.8282        | -                       | -                        |
+| 0.5071 | 22500 | 6.8291        | -                       | -                        |
+| 0.5184 | 23000 | 6.8259        | -                       | -                        |
+| 0.5296 | 23500 | 6.8232        | -                       | -                        |
+| 0.5409 | 24000 | 6.822         | -                       | -                        |
+| 0.5522 | 24500 | 6.8271        | -                       | -                        |
+| 0.5634 | 25000 | 6.8174        | -                       | -                        |
+| 0.5747 | 25500 | 6.8164        | -                       | -                        |
+| 0.5860 | 26000 | 6.8279        | -                       | -                        |
+| 0.5972 | 26500 | 6.8153        | -                       | -                        |
+| 0.6085 | 27000 | 6.8242        | -                       | -                        |
+| 0.6198 | 27500 | 6.806         | -                       | -                        |
+| 0.6310 | 28000 | 6.8305        | -                       | -                        |
+| 0.6423 | 28500 | 6.8164        | -                       | -                        |
+| 0.6536 | 29000 | 6.8198        | -                       | -                        |
+| 0.6648 | 29500 | 6.8171        | -                       | -                        |
+| 0.6761 | 30000 | 6.8131        | -                       | -                        |
+| 0.6874 | 30500 | 6.8149        | -                       | -                        |
+| 0.6987 | 31000 | 6.8149        | -                       | -                        |
+| 0.7099 | 31500 | 6.8216        | -                       | -                        |
+| 0.7212 | 32000 | 6.8244        | -                       | -                        |
+| 0.7325 | 32500 | 6.8264        | -                       | -                        |
+| 0.7437 | 33000 | 6.8176        | -                       | -                        |
+| 0.7550 | 33500 | 6.8255        | -                       | -                        |
+| 0.7663 | 34000 | 6.807         | -                       | -                        |
+| 0.7775 | 34500 | 6.8187        | -                       | -                        |
+| 0.7888 | 35000 | 6.8174        | -                       | -                        |
+| 0.8001 | 35500 | 6.8197        | -                       | -                        |
+| 0.8113 | 36000 | 6.8074        | -                       | -                        |
+| 0.8226 | 36500 | 6.8105        | -                       | -                        |
+| 0.8339 | 37000 | 6.8143        | -                       | -                        |
+| 0.8451 | 37500 | 6.8069        | -                       | -                        |
+| 0.8564 | 38000 | 6.8109        | -                       | -                        |
+| 0.8677 | 38500 | 6.8072        | -                       | -                        |
+| 0.8790 | 39000 | 6.8172        | -                       | -                        |
+| 0.8902 | 39500 | 6.8127        | -                       | -                        |
+| 0.9015 | 40000 | 6.8151        | -                       | -                        |
+| 0.9128 | 40500 | 6.8188        | -                       | -                        |
+| 0.9240 | 41000 | 6.8191        | -                       | -                        |
+| 0.9353 | 41500 | 6.811         | -                       | -                        |
+| 0.9466 | 42000 | 6.8095        | -                       | -                        |
+| 0.9578 | 42500 | 6.8042        | -                       | -                        |
+| 0.9691 | 43000 | 6.8086        | -                       | -                        |
+| 0.9804 | 43500 | 6.8106        | -                       | -                        |
+| 0.9916 | 44000 | 6.8038        | -                       | -                        |
+| -1     | -1    | -             | -                       | 0.6348                   |
+### Framework Versions
+- Python: 3.10.14
+- Sentence Transformers: 4.0.2
+- Transformers: 4.50.3
+- PyTorch: 2.6.0+cu124
+- Accelerate: 1.6.0
+- Datasets: 3.5.0
+- Tokenizers: 0.21.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### CoSENTLoss
+```bibtex
+@online{kexuefm-8847,
+    title={CoSENT: A more efficient sentence vector scheme than Sentence-BERT},
+    author={Su Jianlin},
+    year={2022},
+    month={Jan},
+    url={https://kexue.fm/archives/8847},
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

final/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 4,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.3",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

final/config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "4.0.2",
+    "transformers": "4.50.3",
+    "pytorch": "2.6.0+cu124"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

final/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1099454258cd35e036c72b027a68b3c46c88a932491d2ff6f22e8dba027f8be5
+size 76664936

final/modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  }
+]

final/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 512,
+  "do_lower_case": false
+}

final/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "additional_special_tokens": [
+    "[PAD]",
+    "[UNK]",
+    "[CLS]",
+    "[SEP]",
+    "[MASK]"
+  ],
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

final/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

final/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,72 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "[PAD]",
+    "[UNK]",
+    "[CLS]",
+    "[SEP]",
+    "[MASK]"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "max_length": 128,
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

final/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1099454258cd35e036c72b027a68b3c46c88a932491d2ff6f22e8dba027f8be5
 size 76664936

 version https://git-lfs.github.com/spec/v1
+oid sha256:dcae832c07ba5b9574df11a0bce65230b41e2a32e8f5ac4580c3f6bf4666bd67
 size 76664936

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e984b75f86da268f7ec0c73c242514020d74c9e945f0b000e173ad5222086dad
+size 5688