Update README.md
Browse files
README.md
CHANGED
@@ -61,7 +61,7 @@ Please refer to the [github repository](https://github.com/AI4Bharat/IndicTrans2
|
|
61 |
```python
|
62 |
import torch
|
63 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
64 |
-
from IndicTransToolkit import IndicProcessor
|
65 |
# recommended to run this on a gpu with flash_attn installed
|
66 |
# don't set attn_implemetation if you don't have flash_attn
|
67 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -109,12 +109,11 @@ with torch.no_grad():
|
|
109 |
)
|
110 |
|
111 |
# Decode the generated tokens into text
|
112 |
-
|
113 |
-
generated_tokens
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
)
|
118 |
|
119 |
# Postprocess the translations, including entity replacement
|
120 |
translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
|
|
|
61 |
```python
|
62 |
import torch
|
63 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
64 |
+
from IndicTransToolkit.processor import IndicProcessor
|
65 |
# recommended to run this on a gpu with flash_attn installed
|
66 |
# don't set attn_implemetation if you don't have flash_attn
|
67 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
109 |
)
|
110 |
|
111 |
# Decode the generated tokens into text
|
112 |
+
generated_tokens = tokenizer.batch_decode(
|
113 |
+
generated_tokens,
|
114 |
+
skip_special_tokens=True,
|
115 |
+
clean_up_tokenization_spaces=True,
|
116 |
+
)
|
|
|
117 |
|
118 |
# Postprocess the translations, including entity replacement
|
119 |
translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
|