csukuangfj commited on
Commit
f38f2ec
·
1 Parent(s): b19bad6
Files changed (2) hide show
  1. tokens.txt +0 -2
  2. vits-coqui.py +12 -4
tokens.txt CHANGED
@@ -19,7 +19,6 @@ G 10
19
  h 11
20
  H 11
21
  i 12
22
- I 12
23
  j 13
24
  J 13
25
  k 14
@@ -128,7 +127,6 @@ Z 29
128
  ī 67
129
  Ī 67
130
  ı 68
131
- I 68
132
  ķ 69
133
  Ķ 69
134
  ļ 70
 
19
  h 11
20
  H 11
21
  i 12
 
22
  j 13
23
  J 13
24
  k 14
 
127
  ī 67
128
  Ī 67
129
  ı 68
 
130
  ķ 69
131
  Ķ 69
132
  ļ 70
vits-coqui.py CHANGED
@@ -1,5 +1,6 @@
1
  #!/usr/bin/env python3
2
 
 
3
  import os
4
  from typing import Any, Dict
5
 
@@ -105,6 +106,15 @@ def main():
105
  add_meta_data(filename="model.onnx", meta_data=meta_data)
106
 
107
  # Now generate tokens.txt
 
 
 
 
 
 
 
 
 
108
  with open("tokens.txt", "w", encoding="utf-8") as f:
109
  for token, idx in vits.tokenizer.characters._char_to_id.items():
110
  f.write(f"{token} {idx}\n")
@@ -114,11 +124,9 @@ def main():
114
  token not in ("<PAD>", "<EOS>", "BOS", "<BLNK>")
115
  and token.lower() != token.upper()
116
  and len(token.upper()) == 1
 
117
  ):
118
- if token == "i" and "I" in vits.tokenizer.characters._char_to_id:
119
- pass
120
- else:
121
- f.write(f"{token.upper()} {idx}\n")
122
 
123
 
124
  if __name__ == "__main__":
 
1
  #!/usr/bin/env python3
2
 
3
+ import collections
4
  import os
5
  from typing import Any, Dict
6
 
 
106
  add_meta_data(filename="model.onnx", meta_data=meta_data)
107
 
108
  # Now generate tokens.txt
109
+ all_upper_tokens = [i.upper() for i in vits.tokenizer.characters._char_to_id.keys()]
110
+ duplicate = set(
111
+ [
112
+ item
113
+ for item, count in collections.Counter(all_upper_tokens).items()
114
+ if count > 1
115
+ ]
116
+ )
117
+
118
  with open("tokens.txt", "w", encoding="utf-8") as f:
119
  for token, idx in vits.tokenizer.characters._char_to_id.items():
120
  f.write(f"{token} {idx}\n")
 
124
  token not in ("<PAD>", "<EOS>", "BOS", "<BLNK>")
125
  and token.lower() != token.upper()
126
  and len(token.upper()) == 1
127
+ and token.upper() not in duplicate
128
  ):
129
+ f.write(f"{token.upper()} {idx}\n")
 
 
 
130
 
131
 
132
  if __name__ == "__main__":