shreyasvaidya commited on
Commit
a1b6a1f
·
verified ·
1 Parent(s): 125d6ce

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +41 -1
app.py CHANGED
@@ -27,7 +27,47 @@ DEVICE = "cpu"
27
  # Initialize the OCR object for text detection and recognition
28
  ocr = OCR(device="cpu", verbose=False)
29
  def translate_en_hin(given_str):
30
- return given_str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
 
33
 
 
27
  # Initialize the OCR object for text detection and recognition
28
  ocr = OCR(device="cpu", verbose=False)
29
  def translate_en_hin(given_str):
30
+ model = model.to(DEVICE)
31
+ model.eval()
32
+ src_lang, tgt_lang = "eng_Latn", "hin_Deva"
33
+
34
+ batch = ip.preprocess_batch(
35
+ [given_str],
36
+ src_lang=src_lang,
37
+ tgt_lang=tgt_lang,
38
+ )
39
+ inputs = tokenizer(
40
+ batch,
41
+ truncation=True,
42
+ padding="longest",
43
+ return_tensors="pt",
44
+ return_attention_mask=True,
45
+ ).to(DEVICE)
46
+ with torch.no_grad():
47
+ generated_tokens = model.generate(
48
+ **inputs,
49
+ use_cache=True,
50
+ min_length=0,
51
+ max_length=256,
52
+ num_beams=5,
53
+ num_return_sequences=1,
54
+ )
55
+
56
+ # Decode the generated tokens into text
57
+ with tokenizer.as_target_tokenizer():
58
+ generated_tokens = tokenizer.batch_decode(
59
+ generated_tokens.detach().cpu().tolist(),
60
+ skip_special_tokens=True,
61
+ clean_up_tokenization_spaces=True,
62
+ )
63
+ translation = ip.postprocess_batch(generated_tokens, lang=tgt_lang)[0]
64
+ return translation
65
+
66
+
67
+
68
+
69
+
70
+
71
 
72
 
73