finetuned-s2t-1000-step / tokenizer_config.json
Marialab's picture
End of training
474abf6 verified
{
"added_tokens_decoder": {
"0": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "<lang:pt>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"5": {
"content": "<lang:fr>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"6": {
"content": "<lang:ru>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"7": {
"content": "<lang:nl>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"8": {
"content": "<lang:ro>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"9": {
"content": "<lang:it>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"10": {
"content": "<lang:es>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"11": {
"content": "<lang:de>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"10000": {
"content": "الكبريت",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10001": {
"content": "خلاص",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10002": {
"content": "صيني",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10003": {
"content": "امس",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10004": {
"content": "ترقص",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10005": {
"content": "احذر",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10006": {
"content": "أكرهك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10007": {
"content": "امشِ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10008": {
"content": "ننتظر",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10009": {
"content": "أنا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10010": {
"content": "بخير",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10011": {
"content": "إنهض",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10012": {
"content": "ماذا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10013": {
"content": "هناك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10014": {
"content": "إذا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10015": {
"content": "رجع",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10016": {
"content": "هـل",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10017": {
"content": "هيا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10018": {
"content": "نمسكه",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10019": {
"content": "الناس",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10020": {
"content": "كذا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10021": {
"content": "لقد",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10022": {
"content": "فهمت",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10023": {
"content": "مساء",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10024": {
"content": "الخير",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10025": {
"content": "آريد",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10026": {
"content": "كن",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10027": {
"content": "هادئ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10028": {
"content": "مستحيل",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10029": {
"content": "هذا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10030": {
"content": "الشئ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10031": {
"content": "كاعية",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10032": {
"content": "الأزقة",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10033": {
"content": "أو",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10034": {
"content": "الشوارع",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10035": {
"content": "الضيقة",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10036": {
"content": "انا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10037": {
"content": "احبك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10038": {
"content": "ملعب",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10039": {
"content": "الكرة",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10040": {
"content": "اختفيتي",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10041": {
"content": "رايح",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10042": {
"content": "أغضبتني",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10043": {
"content": "كيف",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10044": {
"content": "حالك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10045": {
"content": "اكرهك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10046": {
"content": "ابتهج",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10047": {
"content": "أراك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10048": {
"content": "المرة",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10049": {
"content": "القادمة",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10050": {
"content": "كلام",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10051": {
"content": "الحارس",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10052": {
"content": "اجلس",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10053": {
"content": "دائم",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10054": {
"content": "متى",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10055": {
"content": "لايوجد",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10056": {
"content": "أحد",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10057": {
"content": "ليس",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10058": {
"content": "شيء",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10059": {
"content": "أنت",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10060": {
"content": "تسير",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10061": {
"content": "بسرعة",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10062": {
"content": "كبيرة",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10063": {
"content": "حبوب",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10064": {
"content": "سعيد",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10065": {
"content": "بلقائك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10066": {
"content": "الأمر",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10067": {
"content": "متروك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10068": {
"content": "لك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10069": {
"content": "يمسكوك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10070": {
"content": "يقبضون",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10071": {
"content": "عليك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10072": {
"content": "لا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10073": {
"content": "تقلق",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10074": {
"content": "تتحرك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10075": {
"content": "جميل",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10076": {
"content": "يمكنك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10077": {
"content": "الرجاء",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10078": {
"content": "تكرار",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10079": {
"content": "ذلك؟",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10080": {
"content": "قبيح",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10081": {
"content": "نام",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10082": {
"content": "اتصل",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10083": {
"content": "بي",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10084": {
"content": "تعال",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10085": {
"content": "ماهذا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10086": {
"content": "مصاب",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10087": {
"content": "بالخرف",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10088": {
"content": "يشتغل",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10089": {
"content": "[UNK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<lang:pt>",
"<lang:fr>",
"<lang:ru>",
"<lang:nl>",
"<lang:ro>",
"<lang:it>",
"<lang:es>",
"<lang:de>",
"<lang:pt>",
"<lang:fr>",
"<lang:ru>",
"<lang:nl>",
"<lang:ro>",
"<lang:it>",
"<lang:es>",
"<lang:de>",
"<lang:pt>",
"<lang:fr>",
"<lang:ru>",
"<lang:nl>",
"<lang:ro>",
"<lang:it>",
"<lang:es>",
"<lang:de>",
"[UNK]"
],
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"do_lower_case": false,
"do_upper_case": false,
"eos_token": "</s>",
"extra_special_tokens": {},
"lang_codes": "mustc",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<pad>",
"sp_model_kwargs": {},
"tgt_lang": "pt",
"tokenizer_class": "Speech2TextTokenizer",
"unk_token": "<unk>"
}