jwengr commited on
Commit
87a6fe0
·
verified ·
1 Parent(s): 315c544

Upload folder using huggingface_hub

Browse files
modeling_sentence_tokenizer.py CHANGED
@@ -1,6 +1,7 @@
1
  import torch
2
  import re
3
 
 
4
  from transformers import PretrainedConfig, PreTrainedModel
5
 
6
  class SentenceTokenizerConfig(PretrainedConfig):
@@ -164,4 +165,4 @@ class SentenceTokenizer(PreTrainedModel):
164
  if roll==True:
165
  res = res[len(chunks[0][2]):-len(chunks[-1][2])]
166
 
167
- return res
 
1
  import torch
2
  import re
3
 
4
+ from collections import Counter
5
  from transformers import PretrainedConfig, PreTrainedModel
6
 
7
  class SentenceTokenizerConfig(PretrainedConfig):
 
165
  if roll==True:
166
  res = res[len(chunks[0][2]):-len(chunks[-1][2])]
167
 
168
+ return res
sentence_tokenizer/modeling_sentence_tokenizer.py CHANGED
@@ -1,6 +1,7 @@
1
  import torch
2
  import re
3
 
 
4
  from transformers import PretrainedConfig, PreTrainedModel
5
 
6
  class SentenceTokenizerConfig(PretrainedConfig):
@@ -164,4 +165,4 @@ class SentenceTokenizer(PreTrainedModel):
164
  if roll==True:
165
  res = res[len(chunks[0][2]):-len(chunks[-1][2])]
166
 
167
- return res
 
1
  import torch
2
  import re
3
 
4
+ from collections import Counter
5
  from transformers import PretrainedConfig, PreTrainedModel
6
 
7
  class SentenceTokenizerConfig(PretrainedConfig):
 
165
  if roll==True:
166
  res = res[len(chunks[0][2]):-len(chunks[-1][2])]
167
 
168
+ return res