import gradio as gr import torch import os from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import spaces # Load Hugging Face token from the environment variable HF_TOKEN = os.getenv("HF_TOKEN") if HF_TOKEN is None: raise ValueError("HF_TOKEN environment variable is not set. Please set it before running the script.") # Check for GPU support and configure appropriately device = "cuda" if torch.cuda.is_available() else "cpu" zero = torch.Tensor([0]).to(device) print(f"Device being used: {zero.device}") # Model configurations MSA_TO_SYRIAN_MODEL = "Omartificial-Intelligence-Space/Shami-MT" SYRIAN_TO_MSA_MODEL = "Omartificial-Intelligence-Space/SHAMI-MT-2MSA" # Load models and tokenizers print("Loading MSA to Syrian model...") msa_to_syrian_tokenizer = AutoTokenizer.from_pretrained(MSA_TO_SYRIAN_MODEL) msa_to_syrian_model = AutoModelForSeq2SeqLM.from_pretrained(MSA_TO_SYRIAN_MODEL).to(device) print("Loading Syrian to MSA model...") syrian_to_msa_tokenizer = AutoTokenizer.from_pretrained(SYRIAN_TO_MSA_MODEL) syrian_to_msa_model = AutoModelForSeq2SeqLM.from_pretrained(SYRIAN_TO_MSA_MODEL).to(device) print("Models loaded successfully!") @spaces.GPU(duration=120) def translate_msa_to_syrian(text): """Translate from Modern Standard Arabic to Syrian dialect""" if not text.strip(): return "" try: input_ids = msa_to_syrian_tokenizer(text, return_tensors="pt").input_ids.to(device) outputs = msa_to_syrian_model.generate(input_ids, max_length=128, num_beams=5, early_stopping=True) translated_text = msa_to_syrian_tokenizer.decode(outputs[0], skip_special_tokens=True) return translated_text except Exception as e: return f"Translation error: {str(e)}" @spaces.GPU(duration=120) def translate_syrian_to_msa(text): """Translate from Syrian dialect to Modern Standard Arabic""" if not text.strip(): return "" try: input_ids = syrian_to_msa_tokenizer(text, return_tensors="pt").input_ids.to(device) outputs = syrian_to_msa_model.generate(input_ids, max_length=128, num_beams=5, early_stopping=True) translated_text = syrian_to_msa_tokenizer.decode(outputs[0], skip_special_tokens=True) return translated_text except Exception as e: return f"Translation error: {str(e)}" def bidirectional_translate(text, direction): """Handle bidirectional translation based on user selection""" if direction == "MSA → Syrian": return translate_msa_to_syrian(text) elif direction == "Syrian → MSA": return translate_syrian_to_msa(text) else: return "Please select a translation direction" # Create Gradio interface with gr.Blocks(title="SHAMI-MT: Bidirectional Syria Arabic Dialect MT Framework") as demo: gr.HTML("""
Translate between Modern Standard Arabic (MSA) and Syrian Dialect
Built on AraT5v2-base-1024 architecture