Upload folder using huggingface_hub
Browse files
modeling_sentence_tokenizer.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import torch
|
| 2 |
import re
|
| 3 |
|
|
|
|
| 4 |
from transformers import PretrainedConfig, PreTrainedModel
|
| 5 |
|
| 6 |
class SentenceTokenizerConfig(PretrainedConfig):
|
|
@@ -164,4 +165,4 @@ class SentenceTokenizer(PreTrainedModel):
|
|
| 164 |
if roll==True:
|
| 165 |
res = res[len(chunks[0][2]):-len(chunks[-1][2])]
|
| 166 |
|
| 167 |
-
return res
|
|
|
|
| 1 |
import torch
|
| 2 |
import re
|
| 3 |
|
| 4 |
+
from collections import Counter
|
| 5 |
from transformers import PretrainedConfig, PreTrainedModel
|
| 6 |
|
| 7 |
class SentenceTokenizerConfig(PretrainedConfig):
|
|
|
|
| 165 |
if roll==True:
|
| 166 |
res = res[len(chunks[0][2]):-len(chunks[-1][2])]
|
| 167 |
|
| 168 |
+
return res
|
sentence_tokenizer/modeling_sentence_tokenizer.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import torch
|
| 2 |
import re
|
| 3 |
|
|
|
|
| 4 |
from transformers import PretrainedConfig, PreTrainedModel
|
| 5 |
|
| 6 |
class SentenceTokenizerConfig(PretrainedConfig):
|
|
@@ -164,4 +165,4 @@ class SentenceTokenizer(PreTrainedModel):
|
|
| 164 |
if roll==True:
|
| 165 |
res = res[len(chunks[0][2]):-len(chunks[-1][2])]
|
| 166 |
|
| 167 |
-
return res
|
|
|
|
| 1 |
import torch
|
| 2 |
import re
|
| 3 |
|
| 4 |
+
from collections import Counter
|
| 5 |
from transformers import PretrainedConfig, PreTrainedModel
|
| 6 |
|
| 7 |
class SentenceTokenizerConfig(PretrainedConfig):
|
|
|
|
| 165 |
if roll==True:
|
| 166 |
res = res[len(chunks[0][2]):-len(chunks[-1][2])]
|
| 167 |
|
| 168 |
+
return res
|