Tom Aarsen commited on
Commit
bd901a6
·
1 Parent(s): 8af4c0e

Use the DistilBertTokenizer for this DistilBERT-based model

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. tokenizer_config.json +1 -1
README.md CHANGED
@@ -88,7 +88,7 @@ query = "What's the weather in ny now?"
88
  document = "Currently New York is rainy."
89
 
90
  # encode the query & document
91
- feature = tokenizer([query, document], padding=True, truncation=True, return_tensors='pt', return_token_type_ids=False)
92
  output = model(**feature)[0]
93
  sparse_vector = get_sparse_vector(feature, output)
94
 
 
88
  document = "Currently New York is rainy."
89
 
90
  # encode the query & document
91
+ feature = tokenizer([query, document], padding=True, truncation=True, return_tensors='pt')
92
  output = model(**feature)[0]
93
  sparse_vector = get_sparse_vector(feature, output)
94
 
tokenizer_config.json CHANGED
@@ -8,6 +8,6 @@
8
  "sep_token": "[SEP]",
9
  "strip_accents": null,
10
  "tokenize_chinese_chars": true,
11
- "tokenizer_class": "BertTokenizer",
12
  "unk_token": "[UNK]"
13
  }
 
8
  "sep_token": "[SEP]",
9
  "strip_accents": null,
10
  "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "DistilBertTokenizer",
12
  "unk_token": "[UNK]"
13
  }