ilyes25 commited on
Commit
fbe01dd
·
verified ·
1 Parent(s): 6577f7c

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +1 -1
  2. vocab.json +58 -0
tokenizer_config.json CHANGED
@@ -41,7 +41,7 @@
41
  "model_max_length": 1000000000000000019884624838656,
42
  "pad_token": "[PAD]",
43
  "replace_word_delimiter_char": " ",
44
- "target_lang": "ardz",
45
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
  "unk_token": "[UNK]",
47
  "word_delimiter_token": "|"
 
41
  "model_max_length": 1000000000000000019884624838656,
42
  "pad_token": "[PAD]",
43
  "replace_word_delimiter_char": " ",
44
+ "target_lang": "ardzoff",
45
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
  "unk_token": "[UNK]",
47
  "word_delimiter_token": "|"
vocab.json CHANGED
@@ -57,6 +57,64 @@
57
  "ّ": 52,
58
  "ْ": 53
59
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  "kab": {
61
  "$": 1,
62
  "0": 2,
 
57
  "ّ": 52,
58
  "ْ": 53
59
  },
60
+ "ardzoff": {
61
+ "[PAD]": 55,
62
+ "[UNK]": 54,
63
+ "e": 1,
64
+ "h": 2,
65
+ "i": 3,
66
+ "k": 4,
67
+ "n": 5,
68
+ "o": 6,
69
+ "s": 7,
70
+ "u": 8,
71
+ "|": 0,
72
+ "ء": 9,
73
+ "آ": 10,
74
+ "أ": 11,
75
+ "ؤ": 12,
76
+ "إ": 13,
77
+ "ئ": 14,
78
+ "ا": 15,
79
+ "ب": 16,
80
+ "ة": 17,
81
+ "ت": 18,
82
+ "ث": 19,
83
+ "ج": 20,
84
+ "ح": 21,
85
+ "خ": 22,
86
+ "د": 23,
87
+ "ذ": 24,
88
+ "ر": 25,
89
+ "ز": 26,
90
+ "س": 27,
91
+ "ش": 28,
92
+ "ص": 29,
93
+ "ض": 30,
94
+ "ط": 31,
95
+ "ظ": 32,
96
+ "ع": 33,
97
+ "غ": 34,
98
+ "ـ": 35,
99
+ "ف": 36,
100
+ "ق": 37,
101
+ "ك": 38,
102
+ "ل": 39,
103
+ "م": 40,
104
+ "ن": 41,
105
+ "ه": 42,
106
+ "و": 43,
107
+ "ى": 44,
108
+ "ي": 45,
109
+ "ً": 46,
110
+ "ٌ": 47,
111
+ "ٍ": 48,
112
+ "َ": 49,
113
+ "ُ": 50,
114
+ "ِ": 51,
115
+ "ّ": 52,
116
+ "ْ": 53
117
+ },
118
  "kab": {
119
  "$": 1,
120
  "0": 2,