Update README.md
Browse files
README.md
CHANGED
@@ -58,7 +58,7 @@ Please refer to the [github repository](https://github.com/AI4Bharat/IndicTrans2
|
|
58 |
```python
|
59 |
import torch
|
60 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
61 |
-
from IndicTransToolkit import IndicProcessor
|
62 |
# recommended to run this on a gpu with flash_attn installed
|
63 |
# don't set attn_implemetation if you don't have flash_attn
|
64 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -112,12 +112,11 @@ with torch.no_grad():
|
|
112 |
)
|
113 |
|
114 |
# Decode the generated tokens into text
|
115 |
-
|
116 |
-
generated_tokens
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
)
|
121 |
|
122 |
# Postprocess the translations, including entity replacement
|
123 |
translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
|
|
|
58 |
```python
|
59 |
import torch
|
60 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
61 |
+
from IndicTransToolkit.processor import IndicProcessor
|
62 |
# recommended to run this on a gpu with flash_attn installed
|
63 |
# don't set attn_implemetation if you don't have flash_attn
|
64 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
112 |
)
|
113 |
|
114 |
# Decode the generated tokens into text
|
115 |
+
generated_tokens = tokenizer.batch_decode(
|
116 |
+
generated_tokens,
|
117 |
+
skip_special_tokens=True,
|
118 |
+
clean_up_tokenization_spaces=True,
|
119 |
+
)
|
|
|
120 |
|
121 |
# Postprocess the translations, including entity replacement
|
122 |
translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
|