File size: 391 Bytes
487613f 30d0611 487613f 30d0611 487613f |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
from spacy.util import registry
from spacy.tokenizer import Tokenizer
import pathlib
@registry.tokenizers("customize_tokenizer")
def make_customize_tokenizer():
def customize_tokenizer(nlp):
tokenizer = Tokenizer(nlp.vocab)
script_dir = pathlib.Path(__file__).parent.resolve()
return tokenizer.from_disk(script_dir / "tokenizer")
return customize_tokenizer
|