Update README.md
Browse files
README.md
CHANGED
@@ -65,7 +65,7 @@ Please refer to the [github repository](https://github.com/AI4Bharat/IndicTrans2
|
|
65 |
```python
|
66 |
import torch
|
67 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
68 |
-
from IndicTransToolkit import IndicProcessor
|
69 |
# recommended to run this on a gpu with flash_attn installed
|
70 |
# don't set attn_implemetation if you don't have flash_attn
|
71 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -117,12 +117,11 @@ with torch.no_grad():
|
|
117 |
)
|
118 |
|
119 |
# Decode the generated tokens into text
|
120 |
-
|
121 |
-
generated_tokens
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
)
|
126 |
|
127 |
# Postprocess the translations, including entity replacement
|
128 |
translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
|
|
|
65 |
```python
|
66 |
import torch
|
67 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
68 |
+
from IndicTransToolkit.processor import IndicProcessor
|
69 |
# recommended to run this on a gpu with flash_attn installed
|
70 |
# don't set attn_implemetation if you don't have flash_attn
|
71 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
117 |
)
|
118 |
|
119 |
# Decode the generated tokens into text
|
120 |
+
generated_tokens = tokenizer.batch_decode(
|
121 |
+
generated_tokens,
|
122 |
+
skip_special_tokens=True,
|
123 |
+
clean_up_tokenization_spaces=True,
|
124 |
+
)
|
|
|
125 |
|
126 |
# Postprocess the translations, including entity replacement
|
127 |
translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
|