Update README.md
Browse files
README.md
CHANGED
@@ -19,12 +19,12 @@ model = BertForMaskedLM.from_pretrained(model_name)
|
|
19 |
tokenizer = BertTokenizer.from_pretrained(model_name)
|
20 |
````
|
21 |
|
22 |
-
To use it
|
23 |
|
24 |
````python
|
25 |
import torch
|
26 |
|
27 |
-
sentence = "
|
28 |
|
29 |
encoded_inputs = tokenizer([sentence], padding='longest')
|
30 |
input_ids = torch.tensor(encoded_inputs['input_ids'])
|
@@ -37,3 +37,17 @@ predicted_token = tokenizer.decode(masked_token)
|
|
37 |
|
38 |
print(predicted_token)
|
39 |
````
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
tokenizer = BertTokenizer.from_pretrained(model_name)
|
20 |
````
|
21 |
|
22 |
+
To use it as a masked language model :
|
23 |
|
24 |
````python
|
25 |
import torch
|
26 |
|
27 |
+
sentence = "Let's have a [MASK]."
|
28 |
|
29 |
encoded_inputs = tokenizer([sentence], padding='longest')
|
30 |
input_ids = torch.tensor(encoded_inputs['input_ids'])
|
|
|
37 |
|
38 |
print(predicted_token)
|
39 |
````
|
40 |
+
|
41 |
+
Or we can also predict the n most relevant predictions :
|
42 |
+
|
43 |
+
````python
|
44 |
+
top_n = 5
|
45 |
+
|
46 |
+
vocab_size = model.config.vocab_size
|
47 |
+
logits = output['logits'][0][mask_index].tolist()
|
48 |
+
top_tokens = sorted(list(range(vocab_size)), key=lambda i:logits[i], reverse=True)[:top_n]
|
49 |
+
|
50 |
+
tokenizer.decode(top_tokens)
|
51 |
+
````
|
52 |
+
|
53 |
+
|