mlinmg commited on
Commit
604db92
·
verified ·
1 Parent(s): aebdb64

Update tokenizer.py

Browse files
Files changed (1) hide show
  1. tokenizer.py +1 -1
tokenizer.py CHANGED
@@ -773,7 +773,7 @@ class XTTSTokenizerFast(PreTrainedTokenizerFast):
773
  char_limit = self.char_limits.get(base_lang, 250)
774
 
775
  # Clean and preprocess
776
- text = self.preprocess_text(text, text_lang)
777
 
778
  # Split text into sentences/chunks based on language
779
  chunk_list = split_sentence(text, base_lang, text_split_length=char_limit)
 
773
  char_limit = self.char_limits.get(base_lang, 250)
774
 
775
  # Clean and preprocess
776
+ #text = self.preprocess_text(text, text_lang) we do this in the hidden function
777
 
778
  # Split text into sentences/chunks based on language
779
  chunk_list = split_sentence(text, base_lang, text_split_length=char_limit)