wuzhiying2023 commited on
Commit
e3e1498
1 Parent(s): e580bc8

[Fix] compatibility with higher transformers version

Browse files
Files changed (1) hide show
  1. tokenization_baichuan.py +5 -5
tokenization_baichuan.py CHANGED
@@ -52,6 +52,11 @@ class BaichuanTokenizer(PreTrainedTokenizer):
52
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
53
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
54
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
 
 
 
 
 
55
  super().__init__(
56
  bos_token=bos_token,
57
  eos_token=eos_token,
@@ -63,11 +68,6 @@ class BaichuanTokenizer(PreTrainedTokenizer):
63
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
64
  **kwargs,
65
  )
66
- self.vocab_file = vocab_file
67
- self.add_bos_token = add_bos_token
68
- self.add_eos_token = add_eos_token
69
- self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
70
- self.sp_model.Load(vocab_file)
71
 
72
  def __getstate__(self):
73
  state = self.__dict__.copy()
 
52
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
53
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
54
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
55
+ self.vocab_file = vocab_file
56
+ self.add_bos_token = add_bos_token
57
+ self.add_eos_token = add_eos_token
58
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
59
+ self.sp_model.Load(vocab_file)
60
  super().__init__(
61
  bos_token=bos_token,
62
  eos_token=eos_token,
 
68
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
69
  **kwargs,
70
  )
 
 
 
 
 
71
 
72
  def __getstate__(self):
73
  state = self.__dict__.copy()