Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
-
from transformers import
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
|
5 |
# Define the model
|
6 |
-
model_name = "facebook/
|
7 |
|
8 |
try:
|
9 |
-
tokenizer =
|
10 |
-
model =
|
11 |
except Exception as e:
|
12 |
print(f"Error loading model or tokenizer: {e}")
|
13 |
exit(1)
|
@@ -81,7 +81,7 @@ def transliterate_farsi_to_cyrillic(farsi_text):
|
|
81 |
"سلام": "Салом",
|
82 |
"خداحافظ": "Худоҳафиз",
|
83 |
"شب بخیر": "Шаби хайр",
|
84 |
-
"صبح بخیر": "Субҳи
|
85 |
"ممنون": "Ташаккур",
|
86 |
"خواهش میکنم": "Илтимос",
|
87 |
"چطور هستی؟": "Чӣ хел ҳастӣ?",
|
@@ -92,7 +92,7 @@ def transliterate_farsi_to_cyrillic(farsi_text):
|
|
92 |
"تشکر": "Ташаккур",
|
93 |
"فارسی": "Форсī",
|
94 |
"اسم من": "Номи ман",
|
95 |
-
"لطفا": "
|
96 |
"کمک": "Кумак",
|
97 |
"هستی": "ҳастī",
|
98 |
"هستید": "ҳастед",
|
@@ -158,9 +158,9 @@ def translate_to_cyrillic_farsi(text):
|
|
158 |
return "Error: Please enter text in English (ASCII characters only).", ""
|
159 |
|
160 |
# Try full sentence translation first
|
161 |
-
tokenizer.src_lang = "
|
162 |
encoded_text = tokenizer(text, return_tensors="pt", padding=True).to(device)
|
163 |
-
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("
|
164 |
farsi_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
165 |
|
166 |
# Check if the translation is valid Farsi
|
@@ -172,9 +172,9 @@ def translate_to_cyrillic_farsi(text):
|
|
172 |
if phrase in common_phrases:
|
173 |
farsi_translations.append(common_phrases[phrase])
|
174 |
else:
|
175 |
-
tokenizer.src_lang = "
|
176 |
encoded_text = tokenizer(phrase, return_tensors="pt", padding=True).to(device)
|
177 |
-
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("
|
178 |
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
179 |
if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in translated_text.replace(" ", "")):
|
180 |
farsi_translations.append(translated_text)
|
|
|
1 |
+
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
|
5 |
# Define the model
|
6 |
+
model_name = "facebook/m2m100_418M"
|
7 |
|
8 |
try:
|
9 |
+
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
|
10 |
+
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
|
11 |
except Exception as e:
|
12 |
print(f"Error loading model or tokenizer: {e}")
|
13 |
exit(1)
|
|
|
81 |
"سلام": "Салом",
|
82 |
"خداحافظ": "Худоҳафиз",
|
83 |
"شب بخیر": "Шаби хайр",
|
84 |
+
"صبح بخیر": "Субҳи хайр",
|
85 |
"ممنون": "Ташаккур",
|
86 |
"خواهش میکنم": "Илтимос",
|
87 |
"چطور هستی؟": "Чӣ хел ҳастӣ?",
|
|
|
92 |
"تشکر": "Ташаккур",
|
93 |
"فارسی": "Форсī",
|
94 |
"اسم من": "Номи ман",
|
95 |
+
"لطفا": "Илтимوس",
|
96 |
"کمک": "Кумак",
|
97 |
"هستی": "ҳастī",
|
98 |
"هستید": "ҳастед",
|
|
|
158 |
return "Error: Please enter text in English (ASCII characters only).", ""
|
159 |
|
160 |
# Try full sentence translation first
|
161 |
+
tokenizer.src_lang = "en"
|
162 |
encoded_text = tokenizer(text, return_tensors="pt", padding=True).to(device)
|
163 |
+
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
|
164 |
farsi_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
165 |
|
166 |
# Check if the translation is valid Farsi
|
|
|
172 |
if phrase in common_phrases:
|
173 |
farsi_translations.append(common_phrases[phrase])
|
174 |
else:
|
175 |
+
tokenizer.src_lang = "en"
|
176 |
encoded_text = tokenizer(phrase, return_tensors="pt", padding=True).to(device)
|
177 |
+
translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
|
178 |
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
179 |
if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in translated_text.replace(" ", "")):
|
180 |
farsi_translations.append(translated_text)
|