Mamadou2727's picture
update
f89c72b verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# 1) Load the base M2M100 tokenizer (avoids the “non-consecutive added token” error)
BASE_MODEL = "facebook/m2m100_418M"
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
# 2) Load your fine-tuned French⇄Zarma model
FINETUNED_MODEL = "Mamadou2727/Feriji_model"
model = AutoModelForSeq2SeqLM.from_pretrained(FINETUNED_MODEL)
# 3) Ensure the model’s embedding matrix matches the tokenizer vocab size
model.resize_token_embeddings(len(tokenizer))
# 4) Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
# 5) Correct ISO 639-3 code for Zarma (“dje”)
LANG_CODES = {
"French": "fr",
"Zarma": "yo"
}
def translate(text: str, num_seqs: int):
# set source & target language codes
tokenizer.src_lang = LANG_CODES["French"]
tokenizer.tgt_lang = LANG_CODES["Zarma"]
# tokenize & move to device
inputs = tokenizer(text, return_tensors="pt", padding=True).to(device)
# generate translations
with torch.no_grad():
outputs = model.generate(
**inputs,
forced_bos_token_id=tokenizer.lang_code_to_id[LANG_CODES["Zarma"]],
num_beams=num_seqs,
num_return_sequences=num_seqs,
length_penalty=1.0,
early_stopping=True
)
# decode & join multiple hypotheses
translations = tokenizer.batch_decode(outputs, skip_special_tokens=True)
return "\n\n".join(translations)
# 6) Build Gradio app
with gr.Blocks() as app:
gr.Markdown(
"""
# FERIJI Translator: French ⇄ Zarma
*Beta version – academic & research use only.*
"""
)
with gr.Row():
inp = gr.Textbox(lines=7, label="Français / French")
beams = gr.Slider(
label="Nombre de séquences retournées",
minimum=1, maximum=5, value=1, step=1
)
out = gr.Textbox(lines=7, label="Zarma")
btn = gr.Button("Traduire")
btn.click(fn=translate, inputs=[inp, beams], outputs=out, api_name="predict")
# On HF Spaces you don’t need share=True
app.launch()