ai4bharat
/

indictrans2-en-indic-dist-200M

text2text-generation

Model card Files Files and versions Community

pranjalchitale commited on May 2

Commit

173b942

·

verified ·

1 Parent(s): a905303

Update README.md

Files changed (1) hide show

README.md +6 -7

README.md CHANGED Viewed

@@ -61,7 +61,7 @@ Please refer to the [github repository](https://github.com/AI4Bharat/IndicTrans2
 ```python
 import torch
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-from IndicTransToolkit import IndicProcessor
 # recommended to run this on a gpu with flash_attn installed
 # don't set attn_implemetation if you don't have flash_attn
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -109,12 +109,11 @@ with torch.no_grad():
     )
 # Decode the generated tokens into text
-with tokenizer.as_target_tokenizer():
-    generated_tokens = tokenizer.batch_decode(
-        generated_tokens.detach().cpu().tolist(),
-        skip_special_tokens=True,
-        clean_up_tokenization_spaces=True,
-    )
 # Postprocess the translations, including entity replacement
 translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)

 ```python
 import torch
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+from IndicTransToolkit.processor import IndicProcessor
 # recommended to run this on a gpu with flash_attn installed
 # don't set attn_implemetation if you don't have flash_attn
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
     )
 # Decode the generated tokens into text
+generated_tokens = tokenizer.batch_decode(
+    generated_tokens,
+    skip_special_tokens=True,
+    clean_up_tokenization_spaces=True,
+)
 # Postprocess the translations, including entity replacement
 translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)