File size: 391 Bytes
487613f
30d0611
 
487613f
 
 
 
30d0611
 
 
487613f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
from spacy.util import registry
from spacy.tokenizer import Tokenizer
import pathlib

@registry.tokenizers("customize_tokenizer")
def make_customize_tokenizer():
    def customize_tokenizer(nlp):
        tokenizer = Tokenizer(nlp.vocab)
        script_dir = pathlib.Path(__file__).parent.resolve()
        return tokenizer.from_disk(script_dir / "tokenizer")

    return customize_tokenizer