Multilingual M2M fine-tuned model
Languages
- Tsonga
- Portuguese
- Emakhuwa
- Sena
- Nyanja
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import torch
data = []
src_lang="pt"
tgt_lang="zu" # Tsonga was mapped to zulu
text="<2ts> ‘Ele foi muito feliz’!"
data.append((src_lang, tgt_lang, text))
src_lang="pt"
tgt_lang="sw" # Emakhuwa was mapped to Swahili
text="<2vmw> O presidente da república de moçambique"
data.append((src_lang, tgt_lang, text))
src_lang="pt"
tgt_lang="lg" # Nyaja was mapped to lingala
text="<2ny> lutaram tanto que conquistaram a independencia"
data.append((src_lang, tgt_lang, text))
src_lang="pt"
tgt_lang="xh" # Sena was mapped to Xhosa
text="<2seh> 1. A Administração Pública serve o interesse público e na sua actuação respeita os direitos e liberdades fundamentais dos cidadãos."
data.append((src_lang, tgt_lang, text))
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model_name="felerminoali/m2m_bible_multilingual_en-pt-vmw-ny-seh-ts"
model = M2M100ForConditionalGeneration.from_pretrained(model_name).to(device)
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
for src_lang, tgt_lang, text in data:
print("\n---", src_lang, tgt_lang, text)
# translate into target language
tokenizer.src_lang = src_lang
encoded_zh = tokenizer(text, return_tensors="pt")
generated_tokens = model.generate(**encoded_zh.to(model.device), forced_bos_token_id=tokenizer.get_lang_id(tgt_lang))
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
trnas = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
print("Translation:")
print(trnas[0])
- Downloads last month
- 102
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support
HF Inference deployability: The model has no library tag.