p1atdev commited on
Commit
010caf6
·
verified ·
1 Parent(s): 964d3b3

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenization_dart.py +8 -1
tokenization_dart.py CHANGED
@@ -15,6 +15,12 @@ VOCAB_FILES_NAMES = {
15
  "tag_category": "tag_category.json",
16
  }
17
 
 
 
 
 
 
 
18
 
19
  @dataclass
20
  class Category:
@@ -63,6 +69,7 @@ class DartTokenizer(PreTrainedTokenizerFast):
63
  """Dart tokenizer"""
64
 
65
  vocab_files_names = VOCAB_FILES_NAMES
 
66
 
67
  def __init__(self, tag_category, **kwargs):
68
  super().__init__(**kwargs)
@@ -137,7 +144,7 @@ class DartTokenizer(PreTrainedTokenizerFast):
137
  input_ids: List[int],
138
  category_mask: Optional[Dict[str, np.ndarray]] = None,
139
  ) -> Tuple[np.ndarray, Dict[str, np.ndarray]]:
140
- """Get the next token's vocab mask to be used"""
141
 
142
  if category_mask == None:
143
  category_mask = self.category_mask
 
15
  "tag_category": "tag_category.json",
16
  }
17
 
18
+ PRETRAINED_VOCAB_FILES_MAP = {
19
+ "tag_category": {
20
+ "p1atdev/tokenizer_test_1": "https://huggingface.co/p1atdev/tokenizer_test_1/resolve/main/tag_category.json"
21
+ }
22
+ }
23
+
24
 
25
  @dataclass
26
  class Category:
 
69
  """Dart tokenizer"""
70
 
71
  vocab_files_names = VOCAB_FILES_NAMES
72
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
73
 
74
  def __init__(self, tag_category, **kwargs):
75
  super().__init__(**kwargs)
 
144
  input_ids: List[int],
145
  category_mask: Optional[Dict[str, np.ndarray]] = None,
146
  ) -> Tuple[np.ndarray, Dict[str, np.ndarray]]:
147
+ """Get the next token's vocab mask and a category mask"""
148
 
149
  if category_mask == None:
150
  category_mask = self.category_mask