Spaces:
Sleeping
Sleeping
import gradio as gr | |
import logging | |
import tempfile | |
import os | |
# Same logger setup | |
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") | |
logger = logging.getLogger(__name__) | |
try: | |
# We use "NllbTokenizer" and "AutoModelForSeq2SeqLM" from HF | |
from transformers import NllbTokenizer, AutoModelForSeq2SeqLM | |
except ImportError: | |
logger.error("transformers library not found. Ensure 'transformers' is in requirements.txt.") | |
raise | |
# We'll assume your HF model is publicly available at "zensalaria/my-nllb-distilled" | |
MODEL_NAME = "zensalaria/my-nllb-distilled" | |
logger.info("Loading NLLB model from Hugging Face...") | |
try: | |
tokenizer = NllbTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) | |
logger.info("Model and tokenizer loaded successfully.") | |
except Exception as e: | |
logger.error(f"Error loading model: {e}") | |
raise e | |
def translate_text(input_text, target_lang="urd_Arab", max_length=512): | |
""" | |
Replicates your run script's translation logic, but in-memory (no local file writes). | |
""" | |
logger.info(f"Translating text to {target_lang}...") | |
try: | |
inputs = tokenizer( | |
input_text, | |
return_tensors="pt", | |
max_length=max_length, | |
truncation=True | |
) | |
# Forced BOS token for target language | |
if hasattr(tokenizer, "lang_code_to_id"): | |
inputs["forced_bos_token_id"] = tokenizer.lang_code_to_id[target_lang] | |
else: | |
inputs["forced_bos_token_id"] = tokenizer.convert_tokens_to_ids(target_lang) | |
outputs = model.generate(**inputs, max_length=max_length) | |
translated = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
logger.info("Translation complete!") | |
return translated | |
except Exception as e: | |
logger.error(f"Error during translation: {e}") | |
return "Error translating text" | |
def process_translation_request(input_text, target_lang="urd_Arab"): | |
""" | |
Logic from process_translation_request, but uses the in-memory translate_text. | |
""" | |
if not input_text.strip(): | |
return "Error: No text provided." | |
return translate_text(input_text, target_lang) | |
def gradio_interface(text, lang): | |
return process_translation_request(text, lang) | |
# Example language list for Gradio | |
LANG_CHOICES = [ | |
("English (Latin)", "eng_Latn"), | |
("Urdu (Arabic)", "urd_Arab"), | |
("Spanish (Latin)", "spa_Latn"), | |
] | |
demo = gr.Interface( | |
fn=gradio_interface, | |
inputs=[ | |
gr.Textbox(label="Input Text", lines=3), | |
gr.Dropdown( | |
choices=[c[1] for c in LANG_CHOICES], | |
label="Target Language", | |
value="urd_Arab" | |
) | |
], | |
outputs="text", | |
title="NLLB-200 Translator", | |
description="Translate text using your NLLB-200-distilled-600M model." | |
) | |
if __name__ == "__main__": | |
demo.launch() | |