princepride commited on
Commit
bf6524d
·
verified ·
1 Parent(s): ffcf3bd

Upload 2 files

Browse files
Files changed (2) hide show
  1. model.py +278 -21
  2. pinyin.txt +408 -0
model.py CHANGED
@@ -1,11 +1,234 @@
1
- from transformers import MBartForConditionalGeneration, MBart50TokenizerFast, AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, pipeline
2
- from abc import ABC, abstractmethod
3
- from typing import Type
4
  import torch
5
- import torch.nn.functional as F
6
  from modules.file import ExcelFileWriter
7
  import os
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  script_dir = os.path.dirname(os.path.abspath(__file__))
10
  parent_dir = os.path.dirname(os.path.dirname(os.path.dirname(script_dir)))
11
 
@@ -17,6 +240,7 @@ class Model():
17
  Args:
18
  gpu_info (list): 包含 GPU 名称的列表
19
  target_gpu_name (str): 目标 GPU 的名称
 
20
  Returns:
21
  int: 目标 GPU 的索引,如果未找到则返回 -1
22
  """
@@ -37,6 +261,8 @@ class Model():
37
  # self.translator = pipeline('translation', model=self.original_model, tokenizer=self.tokenizer, src_lang=original_language, tgt_lang=target_language, device=device)
38
 
39
  def generate(self, inputs, original_language, target_languages, max_batch_size):
 
 
40
  def language_mapping(original_language):
41
  d = {
42
  "Achinese (Arabic script)": "ace_Arab",
@@ -139,7 +365,8 @@ class Model():
139
  "Ukrainian": "ukr_Cyrl",
140
  "Urdu": "urd_Arab",
141
  "Vietnamese": "vie_Latn",
142
- "Thai":"tha_Thai"
 
143
  }
144
  return d[original_language]
145
  def process_gpu_translate_result(temp_outputs):
@@ -199,22 +426,43 @@ class Model():
199
  processed_num = 0
200
  for index, batch in enumerate(batches):
201
  # Tokenize input
202
- input_ids = self.tokenizer(batch, return_tensors="pt", padding=True).to(self.device_name)
 
 
 
 
203
  temp = []
204
- for target_language in target_languages:
205
- target_lang_code = self.tokenizer.lang_code_to_id[language_mapping(target_language)]
206
- generated_tokens = self.model.generate(
207
- **input_ids,
208
- forced_bos_token_id=target_lang_code,
209
- )
210
- generated_translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
211
- # Append result to output
212
- temp.append({
213
- "target_language": target_language,
214
- "generated_translation": generated_translation,
215
- })
216
- input_ids.to('cpu')
217
- del input_ids
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  temp_outputs.append(temp)
219
  processed_num += len(batch)
220
  if (index + 1) * max_batch_size // 1000 - index * max_batch_size // 1000 == 1:
@@ -231,4 +479,13 @@ class Model():
231
  "generated_translation": trans['generated_translation'][i],
232
  })
233
  outputs.append(temp)
234
- return outputs
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
 
2
  import torch
 
3
  from modules.file import ExcelFileWriter
4
  import os
5
 
6
+ from abc import ABC, abstractmethod
7
+ from typing import List
8
+ import re
9
+
10
+ class FilterPipeline():
11
+ def __init__(self, filter_list):
12
+ self._filter_list:List[Filter] = filter_list
13
+
14
+ def append(self, filter):
15
+ self._filter_list.append(filter)
16
+
17
+ def batch_encoder(self, inputs):
18
+ for filter in self._filter_list:
19
+ inputs = filter.encoder(inputs)
20
+ return inputs
21
+
22
+ def batch_decoder(self, inputs):
23
+ for filter in reversed(self._filter_list):
24
+ inputs = filter.decoder(inputs)
25
+ return inputs
26
+
27
+ class Filter(ABC):
28
+ def __init__(self):
29
+ self.name = 'filter'
30
+ self.code = []
31
+ @abstractmethod
32
+ def encoder(self, inputs):
33
+ pass
34
+
35
+ @abstractmethod
36
+ def decoder(self, inputs):
37
+ pass
38
+
39
+ class SpecialTokenFilter(Filter):
40
+ def __init__(self):
41
+ self.name = 'special token filter'
42
+ self.code = []
43
+ self.special_tokens = ['!', '!', '-']
44
+
45
+ def encoder(self, inputs):
46
+ filtered_inputs = []
47
+ self.code = []
48
+ for i, input_str in enumerate(inputs):
49
+ if not all(char in self.special_tokens for char in input_str):
50
+ filtered_inputs.append(input_str)
51
+ else:
52
+ self.code.append([i, input_str])
53
+ return filtered_inputs
54
+
55
+ def decoder(self, inputs):
56
+ original_inputs = inputs.copy()
57
+ for removed_indice in self.code:
58
+ original_inputs.insert(removed_indice[0], removed_indice[1])
59
+ return original_inputs
60
+
61
+ class SperSignFilter(Filter):
62
+ def __init__(self):
63
+ self.name = 's percentage sign filter'
64
+ self.code = []
65
+
66
+ def encoder(self, inputs):
67
+ encoded_inputs = []
68
+ self.code = [] # 清空 self.code
69
+ for i, input_str in enumerate(inputs):
70
+ if '%s' in input_str:
71
+ encoded_str = input_str.replace('%s', '*')
72
+ self.code.append(i) # 将包含 '%s' 的字符串的索引存储到 self.code 中
73
+ else:
74
+ encoded_str = input_str
75
+ encoded_inputs.append(encoded_str)
76
+ return encoded_inputs
77
+
78
+ def decoder(self, inputs):
79
+ decoded_inputs = inputs.copy()
80
+ for i in self.code:
81
+ decoded_inputs[i] = decoded_inputs[i].replace('*', '%s') # 使用 self.code 中的索引还原原始字符串
82
+ return decoded_inputs
83
+
84
+ class ParenSParenFilter(Filter):
85
+ def __init__(self):
86
+ self.name = 'Paren s paren filter'
87
+ self.code = []
88
+
89
+ def encoder(self, inputs):
90
+ encoded_inputs = []
91
+ self.code = [] # 清空 self.code
92
+ for i, input_str in enumerate(inputs):
93
+ if '(s)' in input_str:
94
+ encoded_str = input_str.replace('(s)', '$')
95
+ self.code.append(i) # 将包含 '(s)' 的字符串的索引存储到 self.code 中
96
+ else:
97
+ encoded_str = input_str
98
+ encoded_inputs.append(encoded_str)
99
+ return encoded_inputs
100
+
101
+ def decoder(self, inputs):
102
+ decoded_inputs = inputs.copy()
103
+ for i in self.code:
104
+ decoded_inputs[i] = decoded_inputs[i].replace('$', '(s)') # 使用 self.code 中的索引还原原始字符串
105
+ return decoded_inputs
106
+
107
+ class ChevronsFilter(Filter):
108
+ def __init__(self):
109
+ self.name = 'chevrons filter'
110
+ self.code = []
111
+
112
+ def encoder(self, inputs):
113
+ encoded_inputs = []
114
+ self.code = [] # 清空 self.code
115
+ pattern = re.compile(r'<.*?>')
116
+ for i, input_str in enumerate(inputs):
117
+ if pattern.search(input_str):
118
+ matches = pattern.findall(input_str)
119
+ encoded_str = pattern.sub('#', input_str)
120
+ self.code.append((i, matches)) # 将包含匹配模式的字符串的索引和匹配列表存储到 self.code 中
121
+ else:
122
+ encoded_str = input_str
123
+ encoded_inputs.append(encoded_str)
124
+ return encoded_inputs
125
+
126
+ def decoder(self, inputs):
127
+ decoded_inputs = inputs.copy()
128
+ for i, matches in self.code:
129
+ for match in matches:
130
+ decoded_inputs[i] = decoded_inputs[i].replace('#', match, 1) # 使用 self.code 中的匹配列表依次还原原始字符串
131
+ return decoded_inputs
132
+
133
+ class SimilarFilter(Filter):
134
+ def __init__(self):
135
+ self.name = 'similar filter'
136
+ self.code = []
137
+
138
+ def is_similar(self, str1, str2):
139
+ # 判断两个字符串是否相似(只有数字上有区别)
140
+ pattern = re.compile(r'\d+')
141
+ return pattern.sub('', str1) == pattern.sub('', str2)
142
+
143
+ def encoder(self, inputs):
144
+ encoded_inputs = []
145
+ self.code = [] # 清空 self.code
146
+ i = 0
147
+ while i < len(inputs):
148
+ encoded_inputs.append(inputs[i])
149
+ similar_strs = [inputs[i]]
150
+ j = i + 1
151
+ while j < len(inputs) and self.is_similar(inputs[i], inputs[j]):
152
+ similar_strs.append(inputs[j])
153
+ j += 1
154
+ if len(similar_strs) > 1:
155
+ self.code.append((i, similar_strs)) # 将相似字符串的起始索引和实际字符串列表存储到 self.code 中
156
+ i = j
157
+ return encoded_inputs
158
+
159
+ def decoder(self, inputs:List):
160
+ decoded_inputs = inputs
161
+ for i, similar_strs in self.code:
162
+ pattern = re.compile(r'\d+')
163
+ for j in range(len(similar_strs)):
164
+ if pattern.search(similar_strs[j]):
165
+ number = re.findall(r'\d+', similar_strs[j])[0] # 获取相似字符串的数字部分
166
+ new_str = pattern.sub(number, inputs[i]) # 将新字符串的数字部分替换为相似字符串的数字部分
167
+ else:
168
+ new_str = inputs[i] # 如果相似字符串不含数字,直接使用新字符串
169
+ if j > 0:
170
+ decoded_inputs.insert(i+j, new_str)
171
+ return decoded_inputs
172
+
173
+ class ChineseFilter:
174
+ def __init__(self, pinyin_lib_file='pinyin.txt'):
175
+ self.name = 'chinese filter'
176
+ self.code = []
177
+ self.pinyin_lib = self.load_pinyin_lib(pinyin_lib_file)
178
+
179
+ def load_pinyin_lib(self, file_path):
180
+ with open(os.path.join(script_dir,file_path), 'r', encoding='utf-8') as f:
181
+ return set(line.strip().lower() for line in f)
182
+
183
+ def is_valid_chinese(self, word):
184
+ # 判断一个单词是否符合要求:只有一个单词构成,并且首字母大写
185
+ if len(word.split()) == 1 and word[0].isupper():
186
+ # 使用pinyin_or_word函数判断是否是合法的拼音
187
+ return self.is_pinyin(word.lower())
188
+ return False
189
+
190
+ def encoder(self, inputs):
191
+ encoded_inputs = []
192
+ self.code = [] # 清空 self.code
193
+ for i, word in enumerate(inputs):
194
+ if self.is_valid_chinese(word):
195
+ self.code.append((i, word)) # 将需要过滤的中文单词的索引和拼音存储到 self.code 中
196
+ else:
197
+ encoded_inputs.append(word)
198
+ return encoded_inputs
199
+
200
+ def decoder(self, inputs):
201
+ decoded_inputs = inputs.copy()
202
+ for i, word in self.code:
203
+ decoded_inputs.insert(i, word) # 根据索引将过滤的中文单词还原到原位置
204
+ return decoded_inputs
205
+
206
+ def is_pinyin(self, string):
207
+ '''
208
+ judge a string is a pinyin or a english word.
209
+ pinyin_Lib comes from a txt file.
210
+ '''
211
+ string = string.lower()
212
+ stringlen = len(string)
213
+ max_len = 6
214
+ result = []
215
+ n = 0
216
+ while n < stringlen:
217
+ matched = 0
218
+ temp_result = []
219
+ for i in range(max_len, 0, -1):
220
+ s = string[0:i]
221
+ if s in self.pinyin_lib:
222
+ temp_result.append(string[:i])
223
+ matched = i
224
+ break
225
+ if i == 1 and len(temp_result) == 0:
226
+ return False
227
+ result.extend(temp_result)
228
+ string = string[matched:]
229
+ n += matched
230
+ return True
231
+
232
  script_dir = os.path.dirname(os.path.abspath(__file__))
233
  parent_dir = os.path.dirname(os.path.dirname(os.path.dirname(script_dir)))
234
 
 
240
  Args:
241
  gpu_info (list): 包含 GPU 名称的列表
242
  target_gpu_name (str): 目标 GPU 的名称
243
+
244
  Returns:
245
  int: 目标 GPU 的索引,如果未找到则返回 -1
246
  """
 
261
  # self.translator = pipeline('translation', model=self.original_model, tokenizer=self.tokenizer, src_lang=original_language, tgt_lang=target_language, device=device)
262
 
263
  def generate(self, inputs, original_language, target_languages, max_batch_size):
264
+ filter_list = [SpecialTokenFilter(), SperSignFilter(), ParenSParenFilter(), ChevronsFilter(), SimilarFilter(), ChineseFilter()]
265
+ filter_pipeline = FilterPipeline(filter_list)
266
  def language_mapping(original_language):
267
  d = {
268
  "Achinese (Arabic script)": "ace_Arab",
 
365
  "Ukrainian": "ukr_Cyrl",
366
  "Urdu": "urd_Arab",
367
  "Vietnamese": "vie_Latn",
368
+ "Thai":"tha_Thai",
369
+ "Khmer":"khm_Khmr"
370
  }
371
  return d[original_language]
372
  def process_gpu_translate_result(temp_outputs):
 
426
  processed_num = 0
427
  for index, batch in enumerate(batches):
428
  # Tokenize input
429
+ print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
430
+ print(len(batch))
431
+ print(batch)
432
+ batch = filter_pipeline.batch_encoder(batch)
433
+ print(batch)
434
  temp = []
435
+ if len(batch) > 0:
436
+ input_ids = self.tokenizer(batch, return_tensors="pt", padding=True).to(self.device_name)
437
+ for target_language in target_languages:
438
+ target_lang_code = self.tokenizer.lang_code_to_id[language_mapping(target_language)]
439
+ generated_tokens = self.model.generate(
440
+ **input_ids,
441
+ forced_bos_token_id=target_lang_code,
442
+ )
443
+ generated_translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
444
+
445
+ print(generated_translation)
446
+ generated_translation = filter_pipeline.batch_decoder(generated_translation)
447
+ print(generated_translation)
448
+ print(len(generated_translation))
449
+ # Append result to output
450
+ temp.append({
451
+ "target_language": target_language,
452
+ "generated_translation": generated_translation,
453
+ })
454
+ input_ids.to('cpu')
455
+ del input_ids
456
+ else:
457
+ for target_language in target_languages:
458
+ generated_translation = filter_pipeline.batch_decoder(batch)
459
+ print(generated_translation)
460
+ print(len(generated_translation))
461
+ # Append result to output
462
+ temp.append({
463
+ "target_language": target_language,
464
+ "generated_translation": generated_translation,
465
+ })
466
  temp_outputs.append(temp)
467
  processed_num += len(batch)
468
  if (index + 1) * max_batch_size // 1000 - index * max_batch_size // 1000 == 1:
 
479
  "generated_translation": trans['generated_translation'][i],
480
  })
481
  outputs.append(temp)
482
+ return outputs
483
+ for filter in self._filter_list:
484
+ inputs = filter.encoder(inputs)
485
+ return inputs
486
+
487
+ def batch_decoder(self, inputs):
488
+ for filter in reversed(self._filter_list):
489
+ inputs = filter.decoder(inputs)
490
+ return inputs
491
+
pinyin.txt ADDED
@@ -0,0 +1,408 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a
2
+ ai
3
+ an
4
+ ang
5
+ ao
6
+ ba
7
+ bai
8
+ ban
9
+ bang
10
+ bao
11
+ bei
12
+ ben
13
+ beng
14
+ bi
15
+ bian
16
+ biao
17
+ bie
18
+ bin
19
+ bing
20
+ bo
21
+ bu
22
+ ca
23
+ cai
24
+ can
25
+ cang
26
+ cao
27
+ ce
28
+ cen
29
+ ceng
30
+ cha
31
+ chai
32
+ chan
33
+ chang
34
+ chao
35
+ che
36
+ chen
37
+ cheng
38
+ chi
39
+ chong
40
+ chou
41
+ chu
42
+ chua
43
+ chuai
44
+ chuan
45
+ chuang
46
+ chui
47
+ chun
48
+ chuo
49
+ ci
50
+ cong
51
+ cou
52
+ cu
53
+ cuan
54
+ cui
55
+ cun
56
+ cuo
57
+ da
58
+ dai
59
+ dan
60
+ dang
61
+ dao
62
+ de
63
+ dei
64
+ den
65
+ deng
66
+ di
67
+ dia
68
+ dian
69
+ diao
70
+ die
71
+ ding
72
+ diu
73
+ dong
74
+ dou
75
+ du
76
+ duan
77
+ dui
78
+ dun
79
+ duo
80
+ e
81
+ ei
82
+ en
83
+ eng
84
+ er
85
+ fa
86
+ fan
87
+ fang
88
+ fei
89
+ fen
90
+ feng
91
+ fo
92
+ fou
93
+ fu
94
+ ga
95
+ gai
96
+ gan
97
+ gang
98
+ gao
99
+ ge
100
+ gei
101
+ gen
102
+ geng
103
+ gong
104
+ gou
105
+ gu
106
+ gua
107
+ guai
108
+ guan
109
+ guang
110
+ gui
111
+ gun
112
+ guo
113
+ ha
114
+ hai
115
+ han
116
+ hang
117
+ hao
118
+ he
119
+ hei
120
+ hen
121
+ heng
122
+ hong
123
+ hou
124
+ hu
125
+ hua
126
+ huai
127
+ huan
128
+ huang
129
+ hui
130
+ hun
131
+ huo
132
+ ji
133
+ jia
134
+ jian
135
+ jiang
136
+ jiao
137
+ jie
138
+ jin
139
+ jing
140
+ jiong
141
+ jiu
142
+ ju
143
+ juan
144
+ jue
145
+ jun
146
+ ka
147
+ kai
148
+ kan
149
+ kang
150
+ kao
151
+ ke
152
+ ken
153
+ keng
154
+ kong
155
+ kou
156
+ ku
157
+ kua
158
+ kuai
159
+ kuan
160
+ kuang
161
+ kui
162
+ kun
163
+ kuo
164
+ la
165
+ lai
166
+ lan
167
+ lang
168
+ lao
169
+ le
170
+ lei
171
+ leng
172
+ li
173
+ lia
174
+ lian
175
+ liang
176
+ liao
177
+ lie
178
+ lin
179
+ ling
180
+ liu
181
+ long
182
+ lou
183
+ lu
184
+ luan
185
+
186
+ lüe
187
+ lun
188
+ luo
189
+ ma
190
+ mai
191
+ man
192
+ mang
193
+ mao
194
+ me
195
+ mei
196
+ men
197
+ meng
198
+ mi
199
+ mian
200
+ miao
201
+ mie
202
+ min
203
+ ming
204
+ miu
205
+ mo
206
+ mou
207
+ mu
208
+ na
209
+ nai
210
+ nan
211
+ nang
212
+ nao
213
+ ne
214
+ nei
215
+ nen
216
+ neng
217
+ ni
218
+ nian
219
+ niang
220
+ niao
221
+ nie
222
+ nin
223
+ ning
224
+ niu
225
+ nong
226
+ nou
227
+ nu
228
+
229
+ nuan
230
+ nüe
231
+ nuo
232
+ nun
233
+ o
234
+ ou
235
+ pa
236
+ pai
237
+ pan
238
+ pang
239
+ pao
240
+ pei
241
+ pen
242
+ peng
243
+ pi
244
+ pian
245
+ piao
246
+ pie
247
+ pin
248
+ ping
249
+ po
250
+ pou
251
+ pu
252
+ qi
253
+ qia
254
+ qian
255
+ qiang
256
+ qiao
257
+ qie
258
+ qin
259
+ qing
260
+ qiong
261
+ qiu
262
+ qu
263
+ quan
264
+ que
265
+ qun
266
+ ran
267
+ rang
268
+ rao
269
+ re
270
+ ren
271
+ reng
272
+ ri
273
+ rong
274
+ rou
275
+ ru
276
+ ruan
277
+ rui
278
+ run
279
+ ruo
280
+ sa
281
+ sai
282
+ san
283
+ sang
284
+ sao
285
+ se
286
+ sen
287
+ seng
288
+ sha
289
+ shai
290
+ shan
291
+ shang
292
+ shao
293
+ she
294
+ shei
295
+ shen
296
+ sheng
297
+ shi
298
+ shou
299
+ shu
300
+ shua
301
+ shuai
302
+ shuan
303
+ shuang
304
+ shui
305
+ shun
306
+ shuo
307
+ si
308
+ song
309
+ sou
310
+ su
311
+ suan
312
+ sui
313
+ sun
314
+ suo
315
+ ta
316
+ tai
317
+ tan
318
+ tang
319
+ tao
320
+ te
321
+ teng
322
+ ti
323
+ tian
324
+ tiao
325
+ tie
326
+ ting
327
+ tong
328
+ tou
329
+ tu
330
+ tuan
331
+ tui
332
+ tun
333
+ tuo
334
+ wa
335
+ wai
336
+ wan
337
+ wang
338
+ wei
339
+ wen
340
+ weng
341
+ wo
342
+ wu
343
+ xi
344
+ xia
345
+ xian
346
+ xiang
347
+ xiao
348
+ xie
349
+ xin
350
+ xing
351
+ xiong
352
+ xiu
353
+ xu
354
+ xuan
355
+ xue
356
+ xun
357
+ ya
358
+ yan
359
+ yang
360
+ yao
361
+ ye
362
+ yi
363
+ yin
364
+ ying
365
+ yo
366
+ yong
367
+ you
368
+ yu
369
+ yuan
370
+ yue
371
+ yun
372
+ za
373
+ zai
374
+ zan
375
+ zang
376
+ zao
377
+ ze
378
+ zei
379
+ zen
380
+ zeng
381
+ zha
382
+ zhai
383
+ zhan
384
+ zhang
385
+ zhao
386
+ zhe
387
+ zhei
388
+ zhen
389
+ zheng
390
+ zhi
391
+ zhong
392
+ zhou
393
+ zhu
394
+ zhua
395
+ zhuai
396
+ zhuan
397
+ zhuang
398
+ zhui
399
+ zhun
400
+ zhuo
401
+ zi
402
+ zong
403
+ zou
404
+ zu
405
+ zuan
406
+ zui
407
+ zun
408
+ zuo