Update spaCy pipeline
Browse files- README.md +2 -2
- __pycache__/use_custom_tokenizer.cpython-312.pyc +0 -0
- base_transformer/model +1 -1
- config.cfg +1 -4
- it_trf_nrp-any-py3-none-any.whl +2 -2
- meta.json +0 -2
- ner_transformer/model +1 -1
- use_custom_tokenizer.py +5 -6
README.md
CHANGED
@@ -26,8 +26,8 @@ model-index:
|
|
26 |
| **Name** | `it_trf_nrp` |
|
27 |
| **Version** | `0.0.0` |
|
28 |
| **spaCy** | `>=3.8.3,<3.9.0` |
|
29 |
-
| **Default Pipeline** | `ner_transformer`, `ner`, `
|
30 |
-
| **Components** | `ner_transformer`, `ner`, `
|
31 |
| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
|
32 |
| **Sources** | n/a |
|
33 |
| **License** | n/a |
|
|
|
26 |
| **Name** | `it_trf_nrp` |
|
27 |
| **Version** | `0.0.0` |
|
28 |
| **spaCy** | `>=3.8.3,<3.9.0` |
|
29 |
+
| **Default Pipeline** | `ner_transformer`, `ner`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
|
30 |
+
| **Components** | `ner_transformer`, `ner`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
|
31 |
| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
|
32 |
| **Sources** | n/a |
|
33 |
| **License** | n/a |
|
__pycache__/use_custom_tokenizer.cpython-312.pyc
ADDED
Binary file (1.04 kB). View file
|
|
base_transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 443821706
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:382b251bd81e6c65fe4033674bc76b90eee93409d6c567aafbfc11d94c2e22cc
|
3 |
size 443821706
|
config.cfg
CHANGED
@@ -10,7 +10,7 @@ seed = 17
|
|
10 |
|
11 |
[nlp]
|
12 |
lang = "it"
|
13 |
-
pipeline = ["ner_transformer","ner","
|
14 |
batch_size = 512
|
15 |
disabled = []
|
16 |
before_creation = null
|
@@ -43,9 +43,6 @@ use_fast = true
|
|
43 |
|
44 |
[components.base_transformer.model.transformer_config]
|
45 |
|
46 |
-
[components.merge_entities]
|
47 |
-
factory = "merge_entities"
|
48 |
-
|
49 |
[components.morphologizer]
|
50 |
factory = "morphologizer"
|
51 |
extend = false
|
|
|
10 |
|
11 |
[nlp]
|
12 |
lang = "it"
|
13 |
+
pipeline = ["ner_transformer","ner","base_transformer","morphologizer","tagger","parser","trainable_lemmatizer"]
|
14 |
batch_size = 512
|
15 |
disabled = []
|
16 |
before_creation = null
|
|
|
43 |
|
44 |
[components.base_transformer.model.transformer_config]
|
45 |
|
|
|
|
|
|
|
46 |
[components.morphologizer]
|
47 |
factory = "morphologizer"
|
48 |
extend = false
|
it_trf_nrp-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1c331f6965c59e36deb2bb546991791a0055e2abd1c39a00bda81d5590bdacd
|
3 |
+
size 825600865
|
meta.json
CHANGED
@@ -827,7 +827,6 @@
|
|
827 |
"pipeline":[
|
828 |
"ner_transformer",
|
829 |
"ner",
|
830 |
-
"merge_entities",
|
831 |
"base_transformer",
|
832 |
"morphologizer",
|
833 |
"tagger",
|
@@ -837,7 +836,6 @@
|
|
837 |
"components":[
|
838 |
"ner_transformer",
|
839 |
"ner",
|
840 |
-
"merge_entities",
|
841 |
"base_transformer",
|
842 |
"morphologizer",
|
843 |
"tagger",
|
|
|
827 |
"pipeline":[
|
828 |
"ner_transformer",
|
829 |
"ner",
|
|
|
830 |
"base_transformer",
|
831 |
"morphologizer",
|
832 |
"tagger",
|
|
|
836 |
"components":[
|
837 |
"ner_transformer",
|
838 |
"ner",
|
|
|
839 |
"base_transformer",
|
840 |
"morphologizer",
|
841 |
"tagger",
|
ner_transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 440759145
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82e6707a6d7df1d48c0ade9bf95d437a23daf1a125a1f5457d957a29a007be3a
|
3 |
size 440759145
|
use_custom_tokenizer.py
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
from spacy.util import registry
|
2 |
-
|
3 |
-
|
4 |
-
custom_tokenizer,
|
5 |
-
)
|
6 |
-
|
7 |
|
8 |
@registry.tokenizers("customize_tokenizer")
|
9 |
def make_customize_tokenizer():
|
10 |
def customize_tokenizer(nlp):
|
11 |
-
|
|
|
|
|
12 |
|
13 |
return customize_tokenizer
|
|
|
1 |
from spacy.util import registry
|
2 |
+
from spacy.tokenizer import Tokenizer
|
3 |
+
import pathlib
|
|
|
|
|
|
|
4 |
|
5 |
@registry.tokenizers("customize_tokenizer")
|
6 |
def make_customize_tokenizer():
|
7 |
def customize_tokenizer(nlp):
|
8 |
+
tokenizer = Tokenizer(nlp.vocab)
|
9 |
+
script_dir = pathlib.Path(__file__).parent.resolve()
|
10 |
+
return tokenizer.from_disk(script_dir / "tokenizer")
|
11 |
|
12 |
return customize_tokenizer
|