from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer import gradio as gr import torch # Define the model model_name = "facebook/m2m100_418M" try: tokenizer = M2M100Tokenizer.from_pretrained(model_name) model = M2M100ForConditionalGeneration.from_pretrained(model_name) except Exception as e: print(f"Error loading model or tokenizer: {e}") exit(1) # Move model to GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) # Predefined common English-to-Farsi phrase mappings common_phrases = { "Hello": "سلام", "Hi!": "سلام!", "Good morning": "صبح بخیر", "Good afternoon": "عصر بخیر", "Good evening": "شب بخیر", "Goodbye": "خداحافظ", "Good night": "شب خوش", "How are you?": "حالت چطوره؟", "I am fine, thank you. And you?": "خوبم، متشکرم. و شما؟", "Thank you (very much)": "متشکرم (خیلی ممنون)", "You're welcome": "خواهش میکنم", "Excuse me": "ببخشید", "Pardon me": "معذرت می‌خواهم", "I'm sorry": "متأسفم", "Congratulations": "تبریک می‌گویم", "Please sit down": "لطفاً بنشینید", "Good luck": "موفق باشید", "Have a good trip": "سفر خوبی داشته باشید", "What is your name?": "اسم شما چیست؟", "My name is Sara": "اسم من سارا است", "Where are you from?": "اهل کجا هستید؟", "I am from Iran": "من اهل ایران هستم", "Do you speak English?": "آیا انگلیسی صحبت می‌کنید؟", "I don't understand": "من متوجه نمی‌شوم", "Please speak slowly": "لطفاً آهسته صحبت کنید", "Do you have a Persian-English dictionary?": "آیا دیکشنری فارسی-انگلیسی دارید؟", "How do you say this in English?": "این را در انگلیسی چگونه می‌گویند؟", "How much is this?": "این چقدر قیمت دارد؟", "Where is the bathroom?": "دستشویی کجاست؟", "Help!": "کمک!", "I am lost": "من گم شده‌ام", "Can you help me?": "می‌توانید به من کمک کنید؟", "What time is it?": "ساعت چند است؟", "Where is the hospital?": "بیمارستان کجاست؟", "I love you": "دوستت دارم", "How can I get to the airport?": "چطور می‌توانم به فرودگاه بروم؟", "I need a doctor": "به یک پزشک نیاز دارم", "Where can I buy a ticket?": "از کجا می‌توانم بلیط بخرم؟", "I am hungry": "گرسنه‌ام", "Can I have some water?": "می‌توانم کمی آب بگیرم؟", "It’s very beautiful": "خیلی زیباست", "See you later": "بعداً می‌بینمت", "What is this?": "این چیست؟", "I am happy": "خوشحالم", "It is very chilly today": "امروز خیلی سرد است", "I hope we have better weather tomorrow": "امیدوارم فردا هوا بهتر شود", } # Function to split text into smaller phrases def split_into_phrases(text): separators = [",", ".", "?", "!"] phrases = [text] for sep in separators: new_phrases = [] for phrase in phrases: new_phrases.extend(phrase.split(sep)) phrases = new_phrases return [phrase.strip() for phrase in phrases if phrase.strip()] # Improved transliteration function (Farsi to Cyrillic) def transliterate_farsi_to_cyrillic(farsi_text): word_map = { "سلام": "Салом", "خداحافظ": "Худоҳафиз", "شب بخیر": "Шаб хайр", "صبح بخیر": "Субҳ хайр", "ممنون": "Ташаккур", "خواهش میکنم": "Илтимос", "چطور هستی؟": "Чӣ тур ҳастӣ?", "چطور هستید؟": "Шумо чӣ туред?", "بله": "Ҳа", "نه": "Не", "ایران": "Эрон", "تشکر": "Ташаккур", "فارسی": "Форсӣ", "اسم من": "Номи ман", "لطفا": "Илтимос", "کمک": "Кумак", "هستی": "ҳастӣ", "هستید": "ҳастед", "است": "аст", "امروز": "Имрӯз", "خیلی": "Хеле", "سرد": "Сард", "امیدوارم": "Умидворам", "فردا": "Фардо", "هوا": "Ҳаво", "بهتر": "Беҳтар", "شود": "Шавад", } char_map = { "ا": "а", "ب": "б", "پ": "п", "ت": "т", "ج": "ж", "چ": "ч", "ح": "ҳ", "خ": "х", "د": "д", "ر": "р", "ز": "з", "س": "с", "ش": "ш", "ص": "с", "ط": "т", "ع": "ъ", "غ": "ғ", "ف": "ф", "ق": "қ", "ک": "к", "گ": "г", "ل": "л", "م": "м", "ن": "н", "و": "в", "ه": "ҳ", "ی": "й", "؟": "?", "،": ",", " ": " ", } def transliterate_name(word): if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء" for c in word) and len(word) > 2: return "".join(char_map.get(c, c) for c in word) return word if farsi_text in word_map: return word_map[farsi_text] words = farsi_text.split() cyrillic_words = [] for word in words: if word in word_map: cyrillic_words.append(word_map[word]) else: cyrillic_words.append(transliterate_name(word)) return " ".join(cyrillic_words) # Translation function with input validation and cleaning def translate_to_cyrillic_farsi(text): if not text or not text.strip(): return "Error: Please enter a valid English text.", "" if not all(ord(char) < 128 for char in text): return "Error: Please enter text in English (ASCII characters only).", "" # Try full sentence translation first tokenizer.src_lang = "en" encoded_text = tokenizer(text, return_tensors="pt", padding=True).to(device) translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa")) farsi_text = tokenizer.decode(translated[0], skip_special_tokens=True) # Clean the Farsi text (remove leading/trailing unwanted punctuation) farsi_text = farsi_text.strip(".!?, ") # Check if the translation is valid Farsi if not farsi_text or not any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in farsi_text.replace(" ", "")): # Fall back to phrase-by-phrase translation phrases = split_into_phrases(text) farsi_translations = [] for phrase in phrases: if phrase in common_phrases: farsi_translations.append(common_phrases[phrase]) else: tokenizer.src_lang = "en" encoded_text = tokenizer(phrase, return_tensors="pt", padding=True).to(device) translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa")) translated_text = tokenizer.decode(translated[0], skip_special_tokens=True).strip(".!?, ") if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in translated_text.replace(" ", "")): farsi_translations.append(translated_text) else: farsi_translations.append(f"[UNTRANSLATED: {phrase}]") farsi_text = " ".join(farsi_translations) cyrillic_text = transliterate_farsi_to_cyrillic(farsi_text) return farsi_text, cyrillic_text # Gradio Interface interface = gr.Interface( fn=translate_to_cyrillic_farsi, inputs=gr.Textbox(label="Enter Text in English"), outputs=[ gr.Textbox(label="Farsi Translation (Native Script)"), gr.Textbox(label="Farsi Translation (Cyrillic Script)"), ], title="English to Cyrillic Farsi Translator", description="Enter an English word or sentence, and this tool will translate it to Farsi in both native and Cyrillic scripts." ) # Launch the app if __name__ == "__main__": interface.launch()