HassanCS
/

chemberta-clintox-tunned-3

Sentence Similarity

sentence-transformers

feature-extraction

Generated from Trainer

dataset_size:4000

loss:ContrastiveLoss

text-embeddings-inference

Model card Files Files and versions

Metrics Training metrics Community

HassanCS commited on Feb 3

Commit

c7dee6d

·

verified ·

1 Parent(s): c7dc59f

Upload tokenizer

Files changed (2) hide show

tokenizer.json +4 -2
tokenizer_config.json +8 -1

tokenizer.json CHANGED Viewed

@@ -2,12 +2,14 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 512,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
-    "strategy": "BatchLongest",
     "direction": "Right",
     "pad_to_multiple_of": null,
     "pad_id": 0,

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 256,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
+    "strategy": {
+      "Fixed": 256
+    },
     "direction": "Right",
     "pad_to_multiple_of": null,
     "pad_id": 0,

tokenizer_config.json CHANGED Viewed

@@ -59,7 +59,7 @@
     }
   },
   "bos_token": "<s>",
-  "clean_up_tokenization_spaces": false,
   "cls_token": "[CLS]",
   "eos_token": "</s>",
   "errors": "replace",
@@ -67,10 +67,17 @@
   "full_tokenizer_file": null,
   "mask_token": "[MASK]",
   "max_len": 512,
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "tokenizer_class": "RobertaTokenizer",
   "trim_offsets": true,
   "unk_token": "[UNK]"
 }

     }
   },
   "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "eos_token": "</s>",
   "errors": "replace",
   "full_tokenizer_file": null,
   "mask_token": "[MASK]",
   "max_len": 512,
+  "max_length": 512,
   "model_max_length": 512,
+  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "sep_token": "[SEP]",
+  "stride": 0,
   "tokenizer_class": "RobertaTokenizer",
   "trim_offsets": true,
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }