Lazy-Val commited on
Commit
ae7950f
·
verified ·
1 Parent(s): a800d1a

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -26,8 +26,8 @@ model-index:
26
  | **Name** | `fr_trf_nrp` |
27
  | **Version** | `0.0.0` |
28
  | **spaCy** | `>=3.8.3,<3.9.0` |
29
- | **Default Pipeline** | `ner_transformer`, `ner`, `merge_entities`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
30
- | **Components** | `ner_transformer`, `ner`, `merge_entities`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
31
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
32
  | **Sources** | n/a |
33
  | **License** | n/a |
 
26
  | **Name** | `fr_trf_nrp` |
27
  | **Version** | `0.0.0` |
28
  | **spaCy** | `>=3.8.3,<3.9.0` |
29
+ | **Default Pipeline** | `ner_transformer`, `ner`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
30
+ | **Components** | `ner_transformer`, `ner`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
31
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
32
  | **Sources** | n/a |
33
  | **License** | n/a |
__pycache__/use_custom_tokenizer.cpython-312.pyc ADDED
Binary file (1.04 kB). View file
 
base_transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae82900483b15cbd017ba63a5a7e8833ffa9d295fe764b168d33281a2bcd746c
3
  size 443537828
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bee41def552f5762e57377fc1a6e91f6bc085801a1d67d74a51387d947181fbd
3
  size 443537828
config.cfg CHANGED
@@ -10,7 +10,7 @@ seed = 17
10
 
11
  [nlp]
12
  lang = "fr"
13
- pipeline = ["ner_transformer","ner","merge_entities","base_transformer","morphologizer","tagger","parser","trainable_lemmatizer"]
14
  batch_size = 512
15
  disabled = []
16
  before_creation = null
@@ -43,9 +43,6 @@ use_fast = true
43
 
44
  [components.base_transformer.model.transformer_config]
45
 
46
- [components.merge_entities]
47
- factory = "merge_entities"
48
-
49
  [components.morphologizer]
50
  factory = "morphologizer"
51
  extend = false
 
10
 
11
  [nlp]
12
  lang = "fr"
13
+ pipeline = ["ner_transformer","ner","base_transformer","morphologizer","tagger","parser","trainable_lemmatizer"]
14
  batch_size = 512
15
  disabled = []
16
  before_creation = null
 
43
 
44
  [components.base_transformer.model.transformer_config]
45
 
 
 
 
46
  [components.morphologizer]
47
  factory = "morphologizer"
48
  extend = false
fr_trf_nrp-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21aa8991ffd5c4f6327d8c41b4deef58c781fb3c5575f8ae7169e89f6a477854
3
- size 822747526
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39f5ef22d177266bdc2c9d17caa66688d4f5158d39761527a0e54ca3430ede3e
3
+ size 822748847
meta.json CHANGED
@@ -567,7 +567,6 @@
567
  "pipeline":[
568
  "ner_transformer",
569
  "ner",
570
- "merge_entities",
571
  "base_transformer",
572
  "morphologizer",
573
  "tagger",
@@ -577,7 +576,6 @@
577
  "components":[
578
  "ner_transformer",
579
  "ner",
580
- "merge_entities",
581
  "base_transformer",
582
  "morphologizer",
583
  "tagger",
 
567
  "pipeline":[
568
  "ner_transformer",
569
  "ner",
 
570
  "base_transformer",
571
  "morphologizer",
572
  "tagger",
 
576
  "components":[
577
  "ner_transformer",
578
  "ner",
 
579
  "base_transformer",
580
  "morphologizer",
581
  "tagger",
ner_transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec954b90f0e7d6ddb6e3473315aa58d75ce25f4afb0340776ec337a7817ea614
3
  size 440759145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:160182b4b82beb281481ab70990e790227fc111d80961ae73b375f48b1e924a5
3
  size 440759145
use_custom_tokenizer.py CHANGED
@@ -1,13 +1,12 @@
1
  from spacy.util import registry
2
-
3
- from commercial_registry_ner.spacy.custom_tokenizer.custom_tokenizer import (
4
- custom_tokenizer,
5
- )
6
-
7
 
8
  @registry.tokenizers("customize_tokenizer")
9
  def make_customize_tokenizer():
10
  def customize_tokenizer(nlp):
11
- return custom_tokenizer(nlp)
 
 
12
 
13
  return customize_tokenizer
 
1
  from spacy.util import registry
2
+ from spacy.tokenizer import Tokenizer
3
+ import pathlib
 
 
 
4
 
5
  @registry.tokenizers("customize_tokenizer")
6
  def make_customize_tokenizer():
7
  def customize_tokenizer(nlp):
8
+ tokenizer = Tokenizer(nlp.vocab)
9
+ script_dir = pathlib.Path(__file__).parent.resolve()
10
+ return tokenizer.from_disk(script_dir / "tokenizer")
11
 
12
  return customize_tokenizer