saadustto2007 commited on
Commit
2e10399
·
verified ·
1 Parent(s): 33d52dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -1,13 +1,13 @@
1
- from transformers import NllbTokenizer, NllbForConditionalGeneration
2
  import gradio as gr
3
  import torch
4
 
5
  # Define the model
6
- model_name = "facebook/nllb-200-distilled-600M"
7
 
8
  try:
9
- tokenizer = NllbTokenizer.from_pretrained(model_name)
10
- model = NllbForConditionalGeneration.from_pretrained(model_name)
11
  except Exception as e:
12
  print(f"Error loading model or tokenizer: {e}")
13
  exit(1)
@@ -81,7 +81,7 @@ def transliterate_farsi_to_cyrillic(farsi_text):
81
  "سلام": "Салом",
82
  "خداحافظ": "Худоҳафиз",
83
  "شب بخیر": "Шаби хайр",
84
- "صبح بخیر": "Субҳи хайر",
85
  "ممنون": "Ташаккур",
86
  "خواهش میکنم": "Илтимос",
87
  "چطور هستی؟": "Чӣ хел ҳастӣ?",
@@ -92,7 +92,7 @@ def transliterate_farsi_to_cyrillic(farsi_text):
92
  "تشکر": "Ташаккур",
93
  "فارسی": "Форсī",
94
  "اسم من": "Номи ман",
95
- "لطفا": "Илтимос",
96
  "کمک": "Кумак",
97
  "هستی": "ҳастī",
98
  "هستید": "ҳастед",
@@ -158,9 +158,9 @@ def translate_to_cyrillic_farsi(text):
158
  return "Error: Please enter text in English (ASCII characters only).", ""
159
 
160
  # Try full sentence translation first
161
- tokenizer.src_lang = "eng_Latn" # English in Latin script
162
  encoded_text = tokenizer(text, return_tensors="pt", padding=True).to(device)
163
- translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fas_Arab")) # Farsi in Arabic script
164
  farsi_text = tokenizer.decode(translated[0], skip_special_tokens=True)
165
 
166
  # Check if the translation is valid Farsi
@@ -172,9 +172,9 @@ def translate_to_cyrillic_farsi(text):
172
  if phrase in common_phrases:
173
  farsi_translations.append(common_phrases[phrase])
174
  else:
175
- tokenizer.src_lang = "eng_Latn"
176
  encoded_text = tokenizer(phrase, return_tensors="pt", padding=True).to(device)
177
- translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fas_Arab"))
178
  translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
179
  if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in translated_text.replace(" ", "")):
180
  farsi_translations.append(translated_text)
 
1
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
2
  import gradio as gr
3
  import torch
4
 
5
  # Define the model
6
+ model_name = "facebook/m2m100_418M"
7
 
8
  try:
9
+ tokenizer = M2M100Tokenizer.from_pretrained(model_name)
10
+ model = M2M100ForConditionalGeneration.from_pretrained(model_name)
11
  except Exception as e:
12
  print(f"Error loading model or tokenizer: {e}")
13
  exit(1)
 
81
  "سلام": "Салом",
82
  "خداحافظ": "Худоҳафиз",
83
  "شب بخیر": "Шаби хайр",
84
+ "صبح بخیر": "Субҳи хайр",
85
  "ممنون": "Ташаккур",
86
  "خواهش میکنم": "Илтимос",
87
  "چطور هستی؟": "Чӣ хел ҳастӣ?",
 
92
  "تشکر": "Ташаккур",
93
  "فارسی": "Форсī",
94
  "اسم من": "Номи ман",
95
+ "لطفا": "Илтимوس",
96
  "کمک": "Кумак",
97
  "هستی": "ҳастī",
98
  "هستید": "ҳастед",
 
158
  return "Error: Please enter text in English (ASCII characters only).", ""
159
 
160
  # Try full sentence translation first
161
+ tokenizer.src_lang = "en"
162
  encoded_text = tokenizer(text, return_tensors="pt", padding=True).to(device)
163
+ translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
164
  farsi_text = tokenizer.decode(translated[0], skip_special_tokens=True)
165
 
166
  # Check if the translation is valid Farsi
 
172
  if phrase in common_phrases:
173
  farsi_translations.append(common_phrases[phrase])
174
  else:
175
+ tokenizer.src_lang = "en"
176
  encoded_text = tokenizer(phrase, return_tensors="pt", padding=True).to(device)
177
+ translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
178
  translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
179
  if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in translated_text.replace(" ", "")):
180
  farsi_translations.append(translated_text)