Try this model
Click to expand example usage
from transformers import MarianTokenizer, MarianMTModel
from uroman import Uroman
import pandas as pd
import torch
# Load input
df = pd.read_csv("test.csv")
uroman = Uroman()
# Romanize Coptic text
def preserve_brackets(text):
return str(text).replace("[]", "<MISSING>")
df["coptic_text_romanized"] = [
uroman.romanize_string(preserve_brackets(text)).replace("<MISSING>", "[]")
for text in df["coptic_text"].tolist()
]
# Load model
model_name = "chaouin/coptic-french-translation-helsinki"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name).to("cpu")
# Translate
translations = []
for text in df["coptic_text_romanized"]:
input_text = ">>fra<< " + text
inputs = tokenizer(input_text, return_tensors="pt", max_length=128, truncation=True)
inputs = {k: v.to("cpu") for k, v in inputs.items()}
with torch.no_grad():
output = model.generate(
**inputs,
max_length=128,
num_beams=6,
repetition_penalty=1.5,
length_penalty=2.5
)
translations.append(tokenizer.decode(output[0], skip_special_tokens=True))
print(translations)
โก๏ธ For a complete script to generate translations, see generate_translation_helsinki.py
๐ฌ For full training and evaluation scripts, visit the project repository
- Downloads last month
- 7
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support