Lazy-Val commited on
Commit
30d0611
·
verified ·
1 Parent(s): 487613f

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -26,8 +26,8 @@ model-index:
26
  | **Name** | `it_trf_nrp` |
27
  | **Version** | `0.0.0` |
28
  | **spaCy** | `>=3.8.3,<3.9.0` |
29
- | **Default Pipeline** | `ner_transformer`, `ner`, `merge_entities`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
30
- | **Components** | `ner_transformer`, `ner`, `merge_entities`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
31
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
32
  | **Sources** | n/a |
33
  | **License** | n/a |
 
26
  | **Name** | `it_trf_nrp` |
27
  | **Version** | `0.0.0` |
28
  | **spaCy** | `>=3.8.3,<3.9.0` |
29
+ | **Default Pipeline** | `ner_transformer`, `ner`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
30
+ | **Components** | `ner_transformer`, `ner`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
31
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
32
  | **Sources** | n/a |
33
  | **License** | n/a |
__pycache__/use_custom_tokenizer.cpython-312.pyc ADDED
Binary file (1.04 kB). View file
 
base_transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc821393399de287ef8756d0ee3ea86ced3e509d0f5196a8d9d8ca32a54507b5
3
  size 443821706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:382b251bd81e6c65fe4033674bc76b90eee93409d6c567aafbfc11d94c2e22cc
3
  size 443821706
config.cfg CHANGED
@@ -10,7 +10,7 @@ seed = 17
10
 
11
  [nlp]
12
  lang = "it"
13
- pipeline = ["ner_transformer","ner","merge_entities","base_transformer","morphologizer","tagger","parser","trainable_lemmatizer"]
14
  batch_size = 512
15
  disabled = []
16
  before_creation = null
@@ -43,9 +43,6 @@ use_fast = true
43
 
44
  [components.base_transformer.model.transformer_config]
45
 
46
- [components.merge_entities]
47
- factory = "merge_entities"
48
-
49
  [components.morphologizer]
50
  factory = "morphologizer"
51
  extend = false
 
10
 
11
  [nlp]
12
  lang = "it"
13
+ pipeline = ["ner_transformer","ner","base_transformer","morphologizer","tagger","parser","trainable_lemmatizer"]
14
  batch_size = 512
15
  disabled = []
16
  before_creation = null
 
43
 
44
  [components.base_transformer.model.transformer_config]
45
 
 
 
 
46
  [components.morphologizer]
47
  factory = "morphologizer"
48
  extend = false
it_trf_nrp-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e36485c33f32539c299614cba011ff61388adb7ce4464f9adb8e268f4c36824d
3
- size 825599590
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c331f6965c59e36deb2bb546991791a0055e2abd1c39a00bda81d5590bdacd
3
+ size 825600865
meta.json CHANGED
@@ -827,7 +827,6 @@
827
  "pipeline":[
828
  "ner_transformer",
829
  "ner",
830
- "merge_entities",
831
  "base_transformer",
832
  "morphologizer",
833
  "tagger",
@@ -837,7 +836,6 @@
837
  "components":[
838
  "ner_transformer",
839
  "ner",
840
- "merge_entities",
841
  "base_transformer",
842
  "morphologizer",
843
  "tagger",
 
827
  "pipeline":[
828
  "ner_transformer",
829
  "ner",
 
830
  "base_transformer",
831
  "morphologizer",
832
  "tagger",
 
836
  "components":[
837
  "ner_transformer",
838
  "ner",
 
839
  "base_transformer",
840
  "morphologizer",
841
  "tagger",
ner_transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8241c52299b144357c0ee8000b1246d52fad6788100ba00a24392e0a2819f39
3
  size 440759145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82e6707a6d7df1d48c0ade9bf95d437a23daf1a125a1f5457d957a29a007be3a
3
  size 440759145
use_custom_tokenizer.py CHANGED
@@ -1,13 +1,12 @@
1
  from spacy.util import registry
2
-
3
- from commercial_registry_ner.spacy.custom_tokenizer.custom_tokenizer import (
4
- custom_tokenizer,
5
- )
6
-
7
 
8
  @registry.tokenizers("customize_tokenizer")
9
  def make_customize_tokenizer():
10
  def customize_tokenizer(nlp):
11
- return custom_tokenizer(nlp)
 
 
12
 
13
  return customize_tokenizer
 
1
  from spacy.util import registry
2
+ from spacy.tokenizer import Tokenizer
3
+ import pathlib
 
 
 
4
 
5
  @registry.tokenizers("customize_tokenizer")
6
  def make_customize_tokenizer():
7
  def customize_tokenizer(nlp):
8
+ tokenizer = Tokenizer(nlp.vocab)
9
+ script_dir = pathlib.Path(__file__).parent.resolve()
10
+ return tokenizer.from_disk(script_dir / "tokenizer")
11
 
12
  return customize_tokenizer