pranjalchitale
commited on
Commit
•
77b8bab
1
Parent(s):
4198e8c
Update tokenization_indictrans.py
Browse files
tokenization_indictrans.py
CHANGED
@@ -87,10 +87,10 @@ class IndicTransTokenizer(PreTrainedTokenizer):
|
|
87 |
self.src_spm_fp = src_spm_fp
|
88 |
self.tgt_spm_fp = tgt_spm_fp
|
89 |
|
90 |
-
self.unk_token = unk_token
|
91 |
-
self.pad_token = pad_token
|
92 |
-
self.eos_token = eos_token
|
93 |
-
self.bos_token = bos_token
|
94 |
|
95 |
self.encoder = self._load_json(self.src_vocab_fp)
|
96 |
if self.unk_token not in self.encoder:
|
|
|
87 |
self.src_spm_fp = src_spm_fp
|
88 |
self.tgt_spm_fp = tgt_spm_fp
|
89 |
|
90 |
+
self.unk_token = unk_token.content
|
91 |
+
self.pad_token = pad_token.content
|
92 |
+
self.eos_token = eos_token.content
|
93 |
+
self.bos_token = bos_token.content
|
94 |
|
95 |
self.encoder = self._load_json(self.src_vocab_fp)
|
96 |
if self.unk_token not in self.encoder:
|