Tom Aarsen
commited on
Commit
·
bd901a6
1
Parent(s):
8af4c0e
Use the DistilBertTokenizer for this DistilBERT-based model
Browse files- README.md +1 -1
- tokenizer_config.json +1 -1
README.md
CHANGED
@@ -88,7 +88,7 @@ query = "What's the weather in ny now?"
|
|
88 |
document = "Currently New York is rainy."
|
89 |
|
90 |
# encode the query & document
|
91 |
-
feature = tokenizer([query, document], padding=True, truncation=True, return_tensors='pt'
|
92 |
output = model(**feature)[0]
|
93 |
sparse_vector = get_sparse_vector(feature, output)
|
94 |
|
|
|
88 |
document = "Currently New York is rainy."
|
89 |
|
90 |
# encode the query & document
|
91 |
+
feature = tokenizer([query, document], padding=True, truncation=True, return_tensors='pt')
|
92 |
output = model(**feature)[0]
|
93 |
sparse_vector = get_sparse_vector(feature, output)
|
94 |
|
tokenizer_config.json
CHANGED
@@ -8,6 +8,6 @@
|
|
8 |
"sep_token": "[SEP]",
|
9 |
"strip_accents": null,
|
10 |
"tokenize_chinese_chars": true,
|
11 |
-
"tokenizer_class": "
|
12 |
"unk_token": "[UNK]"
|
13 |
}
|
|
|
8 |
"sep_token": "[SEP]",
|
9 |
"strip_accents": null,
|
10 |
"tokenize_chinese_chars": true,
|
11 |
+
"tokenizer_class": "DistilBertTokenizer",
|
12 |
"unk_token": "[UNK]"
|
13 |
}
|