Tom Aarsen
		
	commited on
		
		
					Commit 
							
							·
						
						bd901a6
	
1
								Parent(s):
							
							8af4c0e
								
Use the DistilBertTokenizer for this DistilBERT-based model
Browse files- README.md +1 -1
- tokenizer_config.json +1 -1
    	
        README.md
    CHANGED
    
    | @@ -88,7 +88,7 @@ query = "What's the weather in ny now?" | |
| 88 | 
             
            document = "Currently New York is rainy."
         | 
| 89 |  | 
| 90 | 
             
            # encode the query & document
         | 
| 91 | 
            -
            feature = tokenizer([query, document], padding=True, truncation=True, return_tensors='pt' | 
| 92 | 
             
            output = model(**feature)[0]
         | 
| 93 | 
             
            sparse_vector = get_sparse_vector(feature, output)
         | 
| 94 |  | 
|  | |
| 88 | 
             
            document = "Currently New York is rainy."
         | 
| 89 |  | 
| 90 | 
             
            # encode the query & document
         | 
| 91 | 
            +
            feature = tokenizer([query, document], padding=True, truncation=True, return_tensors='pt')
         | 
| 92 | 
             
            output = model(**feature)[0]
         | 
| 93 | 
             
            sparse_vector = get_sparse_vector(feature, output)
         | 
| 94 |  | 
    	
        tokenizer_config.json
    CHANGED
    
    | @@ -8,6 +8,6 @@ | |
| 8 | 
             
              "sep_token": "[SEP]",
         | 
| 9 | 
             
              "strip_accents": null,
         | 
| 10 | 
             
              "tokenize_chinese_chars": true,
         | 
| 11 | 
            -
              "tokenizer_class": " | 
| 12 | 
             
              "unk_token": "[UNK]"
         | 
| 13 | 
             
            }
         | 
|  | |
| 8 | 
             
              "sep_token": "[SEP]",
         | 
| 9 | 
             
              "strip_accents": null,
         | 
| 10 | 
             
              "tokenize_chinese_chars": true,
         | 
| 11 | 
            +
              "tokenizer_class": "DistilBertTokenizer",
         | 
| 12 | 
             
              "unk_token": "[UNK]"
         | 
| 13 | 
             
            }
         | 
