Update spaCy pipeline
Browse files- README.md +2 -2
- __pycache__/use_custom_tokenizer.cpython-312.pyc +0 -0
- base_transformer/model +1 -1
- config.cfg +1 -4
- fr_trf_nrp-any-py3-none-any.whl +2 -2
- meta.json +0 -2
- ner_transformer/model +1 -1
- use_custom_tokenizer.py +5 -6
README.md
CHANGED
@@ -26,8 +26,8 @@ model-index:
|
|
26 |
| **Name** | `fr_trf_nrp` |
|
27 |
| **Version** | `0.0.0` |
|
28 |
| **spaCy** | `>=3.8.3,<3.9.0` |
|
29 |
-
| **Default Pipeline** | `ner_transformer`, `ner`, `
|
30 |
-
| **Components** | `ner_transformer`, `ner`, `
|
31 |
| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
|
32 |
| **Sources** | n/a |
|
33 |
| **License** | n/a |
|
|
|
26 |
| **Name** | `fr_trf_nrp` |
|
27 |
| **Version** | `0.0.0` |
|
28 |
| **spaCy** | `>=3.8.3,<3.9.0` |
|
29 |
+
| **Default Pipeline** | `ner_transformer`, `ner`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
|
30 |
+
| **Components** | `ner_transformer`, `ner`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
|
31 |
| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
|
32 |
| **Sources** | n/a |
|
33 |
| **License** | n/a |
|
__pycache__/use_custom_tokenizer.cpython-312.pyc
ADDED
Binary file (1.04 kB). View file
|
|
base_transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 443537828
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bee41def552f5762e57377fc1a6e91f6bc085801a1d67d74a51387d947181fbd
|
3 |
size 443537828
|
config.cfg
CHANGED
@@ -10,7 +10,7 @@ seed = 17
|
|
10 |
|
11 |
[nlp]
|
12 |
lang = "fr"
|
13 |
-
pipeline = ["ner_transformer","ner","
|
14 |
batch_size = 512
|
15 |
disabled = []
|
16 |
before_creation = null
|
@@ -43,9 +43,6 @@ use_fast = true
|
|
43 |
|
44 |
[components.base_transformer.model.transformer_config]
|
45 |
|
46 |
-
[components.merge_entities]
|
47 |
-
factory = "merge_entities"
|
48 |
-
|
49 |
[components.morphologizer]
|
50 |
factory = "morphologizer"
|
51 |
extend = false
|
|
|
10 |
|
11 |
[nlp]
|
12 |
lang = "fr"
|
13 |
+
pipeline = ["ner_transformer","ner","base_transformer","morphologizer","tagger","parser","trainable_lemmatizer"]
|
14 |
batch_size = 512
|
15 |
disabled = []
|
16 |
before_creation = null
|
|
|
43 |
|
44 |
[components.base_transformer.model.transformer_config]
|
45 |
|
|
|
|
|
|
|
46 |
[components.morphologizer]
|
47 |
factory = "morphologizer"
|
48 |
extend = false
|
fr_trf_nrp-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39f5ef22d177266bdc2c9d17caa66688d4f5158d39761527a0e54ca3430ede3e
|
3 |
+
size 822748847
|
meta.json
CHANGED
@@ -567,7 +567,6 @@
|
|
567 |
"pipeline":[
|
568 |
"ner_transformer",
|
569 |
"ner",
|
570 |
-
"merge_entities",
|
571 |
"base_transformer",
|
572 |
"morphologizer",
|
573 |
"tagger",
|
@@ -577,7 +576,6 @@
|
|
577 |
"components":[
|
578 |
"ner_transformer",
|
579 |
"ner",
|
580 |
-
"merge_entities",
|
581 |
"base_transformer",
|
582 |
"morphologizer",
|
583 |
"tagger",
|
|
|
567 |
"pipeline":[
|
568 |
"ner_transformer",
|
569 |
"ner",
|
|
|
570 |
"base_transformer",
|
571 |
"morphologizer",
|
572 |
"tagger",
|
|
|
576 |
"components":[
|
577 |
"ner_transformer",
|
578 |
"ner",
|
|
|
579 |
"base_transformer",
|
580 |
"morphologizer",
|
581 |
"tagger",
|
ner_transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 440759145
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:160182b4b82beb281481ab70990e790227fc111d80961ae73b375f48b1e924a5
|
3 |
size 440759145
|
use_custom_tokenizer.py
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
from spacy.util import registry
|
2 |
-
|
3 |
-
|
4 |
-
custom_tokenizer,
|
5 |
-
)
|
6 |
-
|
7 |
|
8 |
@registry.tokenizers("customize_tokenizer")
|
9 |
def make_customize_tokenizer():
|
10 |
def customize_tokenizer(nlp):
|
11 |
-
|
|
|
|
|
12 |
|
13 |
return customize_tokenizer
|
|
|
1 |
from spacy.util import registry
|
2 |
+
from spacy.tokenizer import Tokenizer
|
3 |
+
import pathlib
|
|
|
|
|
|
|
4 |
|
5 |
@registry.tokenizers("customize_tokenizer")
|
6 |
def make_customize_tokenizer():
|
7 |
def customize_tokenizer(nlp):
|
8 |
+
tokenizer = Tokenizer(nlp.vocab)
|
9 |
+
script_dir = pathlib.Path(__file__).parent.resolve()
|
10 |
+
return tokenizer.from_disk(script_dir / "tokenizer")
|
11 |
|
12 |
return customize_tokenizer
|