from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import gradio as gr
import torch

# Define the model
model_name = "facebook/m2m100_418M"

try:
    tokenizer = M2M100Tokenizer.from_pretrained(model_name)
    model = M2M100ForConditionalGeneration.from_pretrained(model_name)
except Exception as e:
    print(f"Error loading model or tokenizer: {e}")
    exit(1)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Predefined common English-to-Farsi phrase mappings
common_phrases = {
    "Hello": "سلام",
    "Hi!": "سلام!",
    "Good morning": "صبح بخیر",
    "Good afternoon": "عصر بخیر",
    "Good evening": "شب بخیر",
    "Goodbye": "خداحافظ",
    "Good night": "شب خوش",
    "How are you?": "حالت چطوره؟",
    "I am fine, thank you. And you?": "خوبم، متشکرم. و شما؟",
    "Thank you (very much)": "متشکرم (خیلی ممنون)",
    "You're welcome": "خواهش میکنم",
    "Excuse me": "ببخشید",
    "Pardon me": "معذرت می‌خواهم",
    "I'm sorry": "متأسفم",
    "Congratulations": "تبریک می‌گویم",
    "Please sit down": "لطفاً بنشینید",
    "Good luck": "موفق باشید",
    "Have a good trip": "سفر خوبی داشته باشید",
    "What is your name?": "اسم شما چیست؟",
    "My name is Sara": "اسم من سارا است",
    "Where are you from?": "اهل کجا هستید؟",
    "I am from Iran": "من اهل ایران هستم",
    "Do you speak English?": "آیا انگلیسی صحبت می‌کنید؟",
    "I don't understand": "من متوجه نمی‌شوم",
    "Please speak slowly": "لطفاً آهسته صحبت کنید",
    "Do you have a Persian-English dictionary?": "آیا دیکشنری فارسی-انگلیسی دارید؟",
    "How do you say this in English?": "این را در انگلیسی چگونه می‌گویند؟",
    "How much is this?": "این چقدر قیمت دارد؟",
    "Where is the bathroom?": "دستشویی کجاست؟",
    "Help!": "کمک!",
    "I am lost": "من گم شده‌ام",
    "Can you help me?": "می‌توانید به من کمک کنید؟",
    "What time is it?": "ساعت چند است؟",
    "Where is the hospital?": "بیمارستان کجاست؟",
    "I love you": "دوستت دارم",
    "How can I get to the airport?": "چطور می‌توانم به فرودگاه بروم؟",
    "I need a doctor": "به یک پزشک نیاز دارم",
    "Where can I buy a ticket?": "از کجا می‌توانم بلیط بخرم؟",
    "I am hungry": "گرسنه‌ام",
    "Can I have some water?": "می‌توانم کمی آب بگیرم؟",
    "It’s very beautiful": "خیلی زیباست",
    "See you later": "بعداً می‌بینمت",
    "What is this?": "این چیست؟",
    "I am happy": "خوشحالم",
    "It is very chilly today": "امروز خیلی سرد است",
    "I hope we have better weather tomorrow": "امیدوارم فردا هوا بهتر شود",
}

# Function to split text into smaller phrases
def split_into_phrases(text):
    separators = [",", ".", "?", "!"]
    phrases = [text]
    for sep in separators:
        new_phrases = []
        for phrase in phrases:
            new_phrases.extend(phrase.split(sep))
        phrases = new_phrases
    return [phrase.strip() for phrase in phrases if phrase.strip()]

# Improved transliteration function (Farsi to Cyrillic)
def transliterate_farsi_to_cyrillic(farsi_text):
    word_map = {
        "سلام": "Салом", 
        "خداحافظ": "Худоҳафиз", 
        "شب بخیر": "Шаб хайр",
        "صبح بخیر": "Субҳ хайр", 
        "ممنون": "Ташаккур", 
        "خواهش میکنم": "Илтимос",
        "چطور هستی؟": "Чӣ тур ҳастӣ?", 
        "چطور هستید؟": "Шумо чӣ туред?",  
        "بله": "Ҳа", 
        "نه": "Не",
        "ایران": "Эрон",
        "تشکر": "Ташаккур",
        "فارسی": "Форсӣ",
        "اسم من": "Номи ман",
        "لطفا": "Илтимос",
        "کمک": "Кумак",
        "هستی": "ҳастӣ",
        "هستید": "ҳастед",
        "است": "аст",
        "امروز": "Имрӯз",
        "خیلی": "Хеле",
        "سرد": "Сард",
        "امیدوارم": "Умидворам",
        "فردا": "Фардо",
        "هوا": "Ҳаво",
        "بهتر": "Беҳтар",
        "شود": "Шавад",
    }

    char_map = {
        "ا": "а",
        "ب": "б",
        "پ": "п",
        "ت": "т",
        "ج": "ж",
        "چ": "ч",
        "ح": "ҳ",
        "خ": "х",
        "د": "д",
        "ر": "р",
        "ز": "з",
        "س": "с",
        "ش": "ш",
        "ص": "с",
        "ط": "т",
        "ع": "ъ",
        "غ": "ғ",
        "ف": "ф",
        "ق": "қ",
        "ک": "к",
        "گ": "г",
        "ل": "л",
        "م": "м",
        "ن": "н",
        "و": "в",
        "ه": "ҳ",
        "ی": "й",
        "؟": "?",
        "،": ",",
        " ": " ",
    }

    def transliterate_name(word):
        if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء" for c in word) and len(word) > 2:
            return "".join(char_map.get(c, c) for c in word)
        return word

    if farsi_text in word_map:
        return word_map[farsi_text]

    words = farsi_text.split()
    cyrillic_words = []
    for word in words:
        if word in word_map:
            cyrillic_words.append(word_map[word])
        else:
            cyrillic_words.append(transliterate_name(word))
    
    return " ".join(cyrillic_words)

# Translation function with input validation and cleaning
def translate_to_cyrillic_farsi(text):
    if not text or not text.strip():
        return "Error: Please enter a valid English text.", ""
    if not all(ord(char) < 128 for char in text):
        return "Error: Please enter text in English (ASCII characters only).", ""

    # Try full sentence translation first
    tokenizer.src_lang = "en"
    encoded_text = tokenizer(text, return_tensors="pt", padding=True).to(device)
    translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
    farsi_text = tokenizer.decode(translated[0], skip_special_tokens=True)

    # Clean the Farsi text (remove leading/trailing unwanted punctuation)
    farsi_text = farsi_text.strip(".!?, ")

    # Check if the translation is valid Farsi
    if not farsi_text or not any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in farsi_text.replace(" ", "")):
        # Fall back to phrase-by-phrase translation
        phrases = split_into_phrases(text)
        farsi_translations = []
        for phrase in phrases:
            if phrase in common_phrases:
                farsi_translations.append(common_phrases[phrase])
            else:
                tokenizer.src_lang = "en"
                encoded_text = tokenizer(phrase, return_tensors="pt", padding=True).to(device)
                translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("fa"))
                translated_text = tokenizer.decode(translated[0], skip_special_tokens=True).strip(".!?, ")
                if any(c in "ابتثجحخدذرزسشصضطظعغفقکگلمنوهیءأؤئء،؟" for c in translated_text.replace(" ", "")):
                    farsi_translations.append(translated_text)
                else:
                    farsi_translations.append(f"[UNTRANSLATED: {phrase}]")
        farsi_text = " ".join(farsi_translations)

    cyrillic_text = transliterate_farsi_to_cyrillic(farsi_text)
    return farsi_text, cyrillic_text

# Gradio Interface
interface = gr.Interface(
    fn=translate_to_cyrillic_farsi,
    inputs=gr.Textbox(label="Enter Text in English"),
    outputs=[
        gr.Textbox(label="Farsi Translation (Native Script)"),
        gr.Textbox(label="Farsi Translation (Cyrillic Script)"),
    ],
    title="English to Cyrillic Farsi Translator",
    description="Enter an English word or sentence, and this tool will translate it to Farsi in both native and Cyrillic scripts."
)

# Launch the app
if __name__ == "__main__":
    interface.launch()