Jacobo commited on
Commit
dc17a5f
·
1 Parent(s): 0c312b7

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -13,57 +13,57 @@ model-index:
13
  metrics:
14
  - name: TAG (XPOS) Accuracy
15
  type: accuracy
16
- value: 0.976560211
17
  - task:
18
  name: POS
19
  type: token-classification
20
  metrics:
21
  - name: POS (UPOS) Accuracy
22
  type: accuracy
23
- value: 0.9737034867
24
  - task:
25
  name: MORPH
26
  type: token-classification
27
  metrics:
28
  - name: Morph (UFeats) Accuracy
29
  type: accuracy
30
- value: 0.9067535892
31
  - task:
32
  name: LEMMA
33
  type: token-classification
34
  metrics:
35
  - name: Lemma Accuracy
36
  type: accuracy
37
- value: 0.9690155289
38
  - task:
39
  name: UNLABELED_DEPENDENCIES
40
  type: token-classification
41
  metrics:
42
  - name: Unlabeled Attachment Score (UAS)
43
  type: f_score
44
- value: 0.8079402285
45
  - task:
46
  name: LABELED_DEPENDENCIES
47
  type: token-classification
48
  metrics:
49
  - name: Labeled Attachment Score (LAS)
50
  type: f_score
51
- value: 0.7629651333
52
  - task:
53
  name: SENTS
54
  type: token-classification
55
  metrics:
56
  - name: Sentences F-Score
57
  type: f_score
58
- value: 0.6096618357
59
  ---
60
  | Feature | Description |
61
  | --- | --- |
62
  | **Name** | `grc_proiel_lg` |
63
- | **Version** | `3.5.2` |
64
- | **spaCy** | `>=3.5.2,<3.6.0` |
65
- | **Default Pipeline** | `tok2vec`, `morphologizer`, `tagger`, `parser`, `senter`, `lemmatizer`, `attribute_ruler` |
66
- | **Components** | `tok2vec`, `morphologizer`, `tagger`, `parser`, `senter`, `lemmatizer`, `attribute_ruler` |
67
  | **Vectors** | -1 keys, 200000 unique vectors (300 dimensions) |
68
  | **Sources** | n/a |
69
  | **License** | n/a |
@@ -87,16 +87,16 @@ model-index:
87
 
88
  | Type | Score |
89
  | --- | --- |
90
- | `POS_ACC` | 97.37 |
91
- | `MORPH_ACC` | 90.68 |
92
- | `TAG_ACC` | 97.66 |
93
- | `DEP_UAS` | 80.79 |
94
- | `DEP_LAS` | 76.30 |
95
- | `SENTS_P` | 60.04 |
96
- | `SENTS_R` | 61.92 |
97
- | `SENTS_F` | 60.97 |
98
- | `LEMMA_ACC` | 96.90 |
99
- | `TOK2VEC_LOSS` | 7368709.89 |
100
- | `MORPHOLOGIZER_LOSS` | 69168.26 |
101
- | `TAGGER_LOSS` | 19978.23 |
102
- | `PARSER_LOSS` | 1707276.97 |
 
13
  metrics:
14
  - name: TAG (XPOS) Accuracy
15
  type: accuracy
16
+ value: 0.9750219748
17
  - task:
18
  name: POS
19
  type: token-classification
20
  metrics:
21
  - name: POS (UPOS) Accuracy
22
  type: accuracy
23
+ value: 0.9726047466
24
  - task:
25
  name: MORPH
26
  type: token-classification
27
  metrics:
28
  - name: Morph (UFeats) Accuracy
29
  type: accuracy
30
+ value: 0.9088045708
31
  - task:
32
  name: LEMMA
33
  type: token-classification
34
  metrics:
35
  - name: Lemma Accuracy
36
  type: accuracy
37
+ value: 0.9732639906
38
  - task:
39
  name: UNLABELED_DEPENDENCIES
40
  type: token-classification
41
  metrics:
42
  - name: Unlabeled Attachment Score (UAS)
43
  type: f_score
44
+ value: 0.8111631995
45
  - task:
46
  name: LABELED_DEPENDENCIES
47
  type: token-classification
48
  metrics:
49
  - name: Labeled Attachment Score (LAS)
50
  type: f_score
51
+ value: 0.7622326399
52
  - task:
53
  name: SENTS
54
  type: token-classification
55
  metrics:
56
  - name: Sentences F-Score
57
  type: f_score
58
+ value: 0.6111922141
59
  ---
60
  | Feature | Description |
61
  | --- | --- |
62
  | **Name** | `grc_proiel_lg` |
63
+ | **Version** | `3.5.3` |
64
+ | **spaCy** | `>=3.5.3,<3.6.0` |
65
+ | **Default Pipeline** | `tok2vec`, `morphologizer`, `tagger`, `parser`, `lemmatizer`, `attribute_ruler` |
66
+ | **Components** | `tok2vec`, `morphologizer`, `tagger`, `parser`, `lemmatizer`, `attribute_ruler` |
67
  | **Vectors** | -1 keys, 200000 unique vectors (300 dimensions) |
68
  | **Sources** | n/a |
69
  | **License** | n/a |
 
87
 
88
  | Type | Score |
89
  | --- | --- |
90
+ | `POS_ACC` | 97.30 |
91
+ | `MORPH_ACC` | 90.60 |
92
+ | `TAG_ACC` | 97.70 |
93
+ | `DEP_UAS` | 80.68 |
94
+ | `DEP_LAS` | 76.07 |
95
+ | `SENTS_P` | 56.43 |
96
+ | `SENTS_R` | 62.02 |
97
+ | `SENTS_F` | 59.09 |
98
+ | `LEMMA_ACC` | 97.33 |
99
+ | `TOK2VEC_LOSS` | 7384506.01 |
100
+ | `MORPHOLOGIZER_LOSS` | 80044.78 |
101
+ | `TAGGER_LOSS` | 25253.04 |
102
+ | `PARSER_LOSS` | 1795439.09 |
accuracy.json CHANGED
@@ -3,71 +3,71 @@
3
  "token_p":1.0,
4
  "token_r":1.0,
5
  "token_f":1.0,
6
- "pos_acc":0.9701817636,
7
- "morph_acc":0.9025086375,
8
- "morph_micro_p":0.9633002644,
9
- "morph_micro_r":0.9624954841,
10
- "morph_micro_f":0.9628977061,
11
  "morph_per_feat":{
12
  "Case":{
13
- "p":0.9719293622,
14
- "r":0.9700118374,
15
- "f":0.9709696531
16
  },
17
  "Gender":{
18
- "p":0.9147584381,
19
- "r":0.9164567034,
20
- "f":0.9156067833
21
  },
22
  "Number":{
23
- "p":0.9901160292,
24
- "r":0.9888412017,
25
- "f":0.9894782049
26
  },
27
  "Aspect":{
28
- "p":0.9491643454,
29
- "r":0.9551506657,
30
- "f":0.9521480964
31
  },
32
  "Mood":{
33
- "p":0.9622093023,
34
- "r":0.9638905067,
35
- "f":0.9630491708
36
  },
37
  "Person":{
38
- "p":0.971788029,
39
- "r":0.9692015209,
40
- "f":0.9704930516
41
  },
42
  "Tense":{
43
- "p":0.956043956,
44
- "r":0.9525547445,
45
- "f":0.9542961609
46
  },
47
  "VerbForm":{
48
- "p":0.9882783883,
49
- "r":0.9843122948,
50
- "f":0.9862913544
51
  },
52
  "Voice":{
53
- "p":0.9494505495,
54
- "r":0.9459854015,
55
- "f":0.947714808
56
  },
57
  "PronType":{
58
- "p":0.9815436242,
59
- "r":0.9782608696,
60
- "f":0.9798994975
61
  },
62
  "Degree":{
63
- "p":0.8735632184,
64
- "r":0.8952879581,
65
- "f":0.8842921784
66
  },
67
  "Definite":{
68
- "p":0.9899888765,
69
  "r":0.991643454,
70
- "f":0.9908154745
71
  },
72
  "Reflex":{
73
  "p":1.0,
@@ -75,97 +75,97 @@
75
  "f":0.989010989
76
  },
77
  "Polarity":{
78
- "p":1.0,
79
  "r":0.9819004525,
80
- "f":0.9908675799
81
  },
82
  "Poss":{
83
- "p":0.875,
84
- "r":0.5384615385,
85
- "f":0.6666666667
86
  }
87
  },
88
  "tag_acc":0.9742376446,
89
- "sents_p":0.5938086304,
90
- "sents_r":0.6045845272,
91
- "sents_f":0.5991481306,
92
- "dep_uas":0.7957788794,
93
- "dep_las":0.7485353763,
94
  "dep_las_per_type":{
95
  "iobj":{
96
- "p":0.7371007371,
97
- "r":0.6864988558,
98
- "f":0.7109004739
99
  },
100
  "root":{
101
- "p":0.7711069418,
102
- "r":0.7851002865,
103
- "f":0.7780407004
104
  },
105
  "nsubj":{
106
- "p":0.7373271889,
107
  "r":0.7174887892,
108
- "f":0.7272727273
109
  },
110
  "advmod":{
111
- "p":0.6891117479,
112
- "r":0.6680555556,
113
- "f":0.6784203103
114
  },
115
  "advcl":{
116
- "p":0.644200627,
117
- "r":0.6815920398,
118
- "f":0.6623690572
119
  },
120
  "ccomp":{
121
- "p":0.5797101449,
122
- "r":0.4938271605,
123
- "f":0.5333333333
124
  },
125
  "discourse":{
126
- "p":0.80945758,
127
- "r":0.8005502063,
128
- "f":0.8049792531
129
  },
130
  "obj":{
131
- "p":0.763130793,
132
- "r":0.8169790518,
133
- "f":0.7891373802
134
  },
135
  "det":{
136
- "p":0.9290123457,
137
- "r":0.9245283019,
138
- "f":0.9267648999
139
  },
140
  "nmod":{
141
- "p":0.7,
142
  "r":0.7162162162,
143
- "f":0.7080152672
144
  },
145
  "cop":{
146
- "p":0.7061403509,
147
- "r":0.7594339623,
148
- "f":0.7318181818
149
  },
150
  "appos":{
151
- "p":0.4588235294,
152
- "r":0.237804878,
153
- "f":0.313253012
154
  },
155
  "case":{
156
- "p":0.9635949943,
157
- "r":0.9635949943,
158
- "f":0.9635949943
159
  },
160
  "acl":{
161
- "p":0.5285714286,
162
- "r":0.4512195122,
163
- "f":0.4868421053
164
  },
165
  "mark":{
166
- "p":0.8785046729,
167
- "r":0.8703703704,
168
- "f":0.8744186047
169
  },
170
  "obl":{
171
  "p":0.6829558999,
@@ -173,79 +173,74 @@
173
  "f":0.7009174312
174
  },
175
  "nsubj:pass":{
176
- "p":0.6777777778,
177
- "r":0.6421052632,
178
- "f":0.6594594595
179
  },
180
  "xcomp":{
181
- "p":0.5462962963,
182
- "r":0.4916666667,
183
- "f":0.5175438596
184
  },
185
  "cc":{
186
- "p":0.634194831,
187
- "r":0.6316831683,
188
- "f":0.6329365079
189
  },
190
  "conj":{
191
- "p":0.5798742138,
192
- "r":0.5987012987,
193
- "f":0.5891373802
194
  },
195
  "dislocated":{
196
- "p":0.3,
197
- "r":0.1071428571,
198
- "f":0.1578947368
199
  },
200
  "amod":{
201
- "p":0.7142857143,
202
- "r":0.6944444444,
203
- "f":0.7042253521
204
  },
205
  "parataxis":{
206
  "p":0.0,
207
  "r":0.0,
208
  "f":0.0
209
  },
210
- "obl:agent":{
211
- "p":0.7777777778,
212
- "r":0.3181818182,
213
- "f":0.4516129032
214
- },
215
  "dep":{
216
  "p":0.0,
217
  "r":0.0,
218
  "f":0.0
219
  },
220
  "nummod":{
221
- "p":0.8571428571,
222
- "r":0.8571428571,
223
- "f":0.8571428571
224
  },
225
  "fixed":{
226
- "p":0.6363636364,
227
  "r":0.875,
228
- "f":0.7368421053
229
  },
230
  "csubj:pass":{
231
- "p":0.0,
232
- "r":0.0,
233
- "f":0.0
 
 
 
 
 
234
  },
235
  "orphan":{
236
- "p":0.2142857143,
237
- "r":0.0857142857,
238
- "f":0.1224489796
239
  },
240
  "vocative":{
241
- "p":0.6428571429,
242
- "r":0.5901639344,
243
- "f":0.6153846154
244
- },
245
- "flat:name":{
246
- "p":0.8333333333,
247
- "r":0.7142857143,
248
- "f":0.7692307692
249
  },
250
  "aux":{
251
  "p":0.0,
@@ -256,8 +251,13 @@
256
  "p":0.0,
257
  "r":0.0,
258
  "f":0.0
 
 
 
 
 
259
  }
260
  },
261
- "lemma_acc":0.9693555656,
262
- "speed":5916.9433597887
263
  }
 
3
  "token_p":1.0,
4
  "token_r":1.0,
5
  "token_f":1.0,
6
+ "pos_acc":0.9705573081,
7
+ "morph_acc":0.9017575484,
8
+ "morph_micro_p":0.9639012033,
9
+ "morph_micro_r":0.9622471098,
10
+ "morph_micro_f":0.9630734463,
11
  "morph_per_feat":{
12
  "Case":{
13
+ "p":0.9706231063,
14
+ "r":0.9690911482,
15
+ "f":0.9698565223
16
  },
17
  "Gender":{
18
+ "p":0.9175940647,
19
+ "r":0.9184458295,
20
+ "f":0.9180197495
21
  },
22
  "Number":{
23
+ "p":0.9906521973,
24
+ "r":0.9892703863,
25
+ "f":0.9899608096
26
  },
27
  "Aspect":{
28
+ "p":0.9545454545,
29
+ "r":0.9565522074,
30
+ "f":0.9555477774
31
  },
32
  "Mood":{
33
+ "p":0.9621212121,
34
+ "r":0.961560862,
35
+ "f":0.9618409554
36
  },
37
  "Person":{
38
+ "p":0.9724980901,
39
+ "r":0.9680608365,
40
+ "f":0.9702743902
41
  },
42
  "Tense":{
43
+ "p":0.9582570487,
44
+ "r":0.9551094891,
45
+ "f":0.9566806799
46
  },
47
  "VerbForm":{
48
+ "p":0.9871841816,
49
+ "r":0.9835826341,
50
+ "f":0.985380117
51
  },
52
  "Voice":{
53
+ "p":0.9469058953,
54
+ "r":0.9437956204,
55
+ "f":0.9453481996
56
  },
57
  "PronType":{
58
+ "p":0.9835183317,
59
+ "r":0.9779264214,
60
+ "f":0.9807144055
61
  },
62
  "Degree":{
63
+ "p":0.8667529107,
64
+ "r":0.8769633508,
65
+ "f":0.8718282368
66
  },
67
  "Definite":{
68
+ "p":0.9921962096,
69
  "r":0.991643454,
70
+ "f":0.9919197548
71
  },
72
  "Reflex":{
73
  "p":1.0,
 
75
  "f":0.989010989
76
  },
77
  "Polarity":{
78
+ "p":0.995412844,
79
  "r":0.9819004525,
80
+ "f":0.9886104784
81
  },
82
  "Poss":{
83
+ "p":0.8333333333,
84
+ "r":0.3846153846,
85
+ "f":0.5263157895
86
  }
87
  },
88
  "tag_acc":0.9742376446,
89
+ "sents_p":0.5994180407,
90
+ "sents_r":0.5902578797,
91
+ "sents_f":0.5948026949,
92
+ "dep_uas":0.7958539883,
93
+ "dep_las":0.7480847228,
94
  "dep_las_per_type":{
95
  "iobj":{
96
+ "p":0.7191780822,
97
+ "r":0.7208237986,
98
+ "f":0.72
99
  },
100
  "root":{
101
+ "p":0.786614937,
102
+ "r":0.7745940783,
103
+ "f":0.7805582291
104
  },
105
  "nsubj":{
106
+ "p":0.701754386,
107
  "r":0.7174887892,
108
+ "f":0.7095343681
109
  },
110
  "advmod":{
111
+ "p":0.7222222222,
112
+ "r":0.6861111111,
113
+ "f":0.7037037037
114
  },
115
  "advcl":{
116
+ "p":0.6400651466,
117
+ "r":0.6517412935,
118
+ "f":0.6458504519
119
  },
120
  "ccomp":{
121
+ "p":0.5155279503,
122
+ "r":0.512345679,
123
+ "f":0.5139318885
124
  },
125
  "discourse":{
126
+ "p":0.8063186813,
127
+ "r":0.8074277854,
128
+ "f":0.8068728522
129
  },
130
  "obj":{
131
+ "p":0.7548717949,
132
+ "r":0.8114663727,
133
+ "f":0.7821466525
134
  },
135
  "det":{
136
+ "p":0.9367201426,
137
+ "r":0.9223343572,
138
+ "f":0.9294715897
139
  },
140
  "nmod":{
141
+ "p":0.7134615385,
142
  "r":0.7162162162,
143
+ "f":0.7148362235
144
  },
145
  "cop":{
146
+ "p":0.72,
147
+ "r":0.7641509434,
148
+ "f":0.7414187643
149
  },
150
  "appos":{
151
+ "p":0.3916666667,
152
+ "r":0.2865853659,
153
+ "f":0.3309859155
154
  },
155
  "case":{
156
+ "p":0.9578587699,
157
+ "r":0.9567690557,
158
+ "f":0.9573136027
159
  },
160
  "acl":{
161
+ "p":0.4932432432,
162
+ "r":0.4451219512,
163
+ "f":0.4679487179
164
  },
165
  "mark":{
166
+ "p":0.8930232558,
167
+ "r":0.8888888889,
168
+ "f":0.8909512761
169
  },
170
  "obl":{
171
  "p":0.6829558999,
 
173
  "f":0.7009174312
174
  },
175
  "nsubj:pass":{
176
+ "p":0.776119403,
177
+ "r":0.5473684211,
178
+ "f":0.6419753086
179
  },
180
  "xcomp":{
181
+ "p":0.6,
182
+ "r":0.475,
183
+ "f":0.5302325581
184
  },
185
  "cc":{
186
+ "p":0.6378968254,
187
+ "r":0.6366336634,
188
+ "f":0.6372646184
189
  },
190
  "conj":{
191
+ "p":0.5728643216,
192
+ "r":0.5922077922,
193
+ "f":0.5823754789
194
  },
195
  "dislocated":{
196
+ "p":0.0,
197
+ "r":0.0,
198
+ "f":0.0
199
  },
200
  "amod":{
201
+ "p":0.6968325792,
202
+ "r":0.712962963,
203
+ "f":0.704805492
204
  },
205
  "parataxis":{
206
  "p":0.0,
207
  "r":0.0,
208
  "f":0.0
209
  },
 
 
 
 
 
210
  "dep":{
211
  "p":0.0,
212
  "r":0.0,
213
  "f":0.0
214
  },
215
  "nummod":{
216
+ "p":0.8805970149,
217
+ "r":0.8428571429,
218
+ "f":0.8613138686
219
  },
220
  "fixed":{
221
+ "p":0.7777777778,
222
  "r":0.875,
223
+ "f":0.8235294118
224
  },
225
  "csubj:pass":{
226
+ "p":0.5,
227
+ "r":0.1428571429,
228
+ "f":0.2222222222
229
+ },
230
+ "obl:agent":{
231
+ "p":0.8571428571,
232
+ "r":0.2727272727,
233
+ "f":0.4137931034
234
  },
235
  "orphan":{
236
+ "p":0.1481481481,
237
+ "r":0.1142857143,
238
+ "f":0.1290322581
239
  },
240
  "vocative":{
241
+ "p":0.7755102041,
242
+ "r":0.6229508197,
243
+ "f":0.6909090909
 
 
 
 
 
244
  },
245
  "aux":{
246
  "p":0.0,
 
251
  "p":0.0,
252
  "r":0.0,
253
  "f":0.0
254
+ },
255
+ "flat:name":{
256
+ "p":0.8571428571,
257
+ "r":0.8571428571,
258
+ "f":0.8571428571
259
  }
260
  },
261
+ "lemma_acc":0.9724350308,
262
+ "speed":3885.4826243627
263
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -11,7 +11,7 @@ seed = 0
11
 
12
  [nlp]
13
  lang = "grc"
14
- pipeline = ["tok2vec","morphologizer","tagger","parser","senter","lemmatizer","attribute_ruler"]
15
  batch_size = 128
16
  disabled = []
17
  before_creation = null
@@ -86,26 +86,6 @@ nO = null
86
  width = ${components.tok2vec.model.encode.width}
87
  upstream = "tok2vec"
88
 
89
- [components.senter]
90
- factory = "senter"
91
- overwrite = false
92
- scorer = {"@scorers":"spacy.senter_scorer.v1"}
93
-
94
- [components.senter.model]
95
- @architectures = "spacy.Tagger.v2"
96
- nO = null
97
- normalize = false
98
-
99
- [components.senter.model.tok2vec]
100
- @architectures = "spacy.HashEmbedCNN.v2"
101
- pretrained_vectors = true
102
- width = 12
103
- depth = 1
104
- embed_size = 2000
105
- window_size = 1
106
- maxout_pieces = 2
107
- subword_features = true
108
-
109
  [components.tagger]
110
  factory = "tagger"
111
  neg_prefix = "!"
@@ -177,7 +157,7 @@ patience = 5000
177
  max_epochs = 0
178
  max_steps = 20000
179
  eval_frequency = 200
180
- frozen_components = ["lemmatizer","senter"]
181
  annotating_components = []
182
  before_to_disk = null
183
  before_update = null
@@ -197,7 +177,7 @@ t = 0.0
197
 
198
  [training.logger]
199
  @loggers = "spacy.WandbLogger.v3"
200
- project_name = "proiel"
201
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
202
  log_dataset_dir = "./corpus"
203
  model_log_interval = 1000
 
11
 
12
  [nlp]
13
  lang = "grc"
14
+ pipeline = ["tok2vec","morphologizer","tagger","parser","lemmatizer","attribute_ruler"]
15
  batch_size = 128
16
  disabled = []
17
  before_creation = null
 
86
  width = ${components.tok2vec.model.encode.width}
87
  upstream = "tok2vec"
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  [components.tagger]
90
  factory = "tagger"
91
  neg_prefix = "!"
 
157
  max_epochs = 0
158
  max_steps = 20000
159
  eval_frequency = 200
160
+ frozen_components = ["lemmatizer"]
161
  annotating_components = []
162
  before_to_disk = null
163
  before_update = null
 
177
 
178
  [training.logger]
179
  @loggers = "spacy.WandbLogger.v3"
180
+ project_name = "greCy"
181
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
182
  log_dataset_dir = "./corpus"
183
  model_log_interval = 1000
grc_proiel_lg-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b42e5ef499b2d62a26171c3185b49068788e95372d8bc5e5017df6f74546bf4
3
- size 275792393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f6d52ca36e56051ee6dedbf75068347011d027c2593ea9108d76aeb69033bf9
3
+ size 279450617
lemmatizer/cfg CHANGED
The diff for this file is too large to render. See raw diff
 
lemmatizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf615bba8c5f7d45fe4067080123c26a556e77975a56a34327b6b884147e0efb
3
- size 20965398
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e4bd7cc76a93e28df338b870437f588562b373304ed017c70e86372846d794
3
+ size 24489602
lemmatizer/trees CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1955a3f6c2d5cf3de0861432f755a153c2781b03abe897c1db6c972ddd3f260
3
- size 4089585
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0b4cacd6282ecfa887b731fe8aed793709e236050f81662e72cd06fe73a6458
3
+ size 5318689
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"grc",
3
  "name":"proiel_lg",
4
- "version":"3.5.2",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
- "spacy_version":">=3.5.2,<3.6.0",
11
- "spacy_git_version":"Unknown",
12
  "vectors":{
13
  "width":300,
14
  "vectors":200000,
@@ -1117,7 +1117,6 @@
1117
  "morphologizer",
1118
  "tagger",
1119
  "parser",
1120
- "senter",
1121
  "lemmatizer",
1122
  "attribute_ruler"
1123
  ],
@@ -1126,7 +1125,6 @@
1126
  "morphologizer",
1127
  "tagger",
1128
  "parser",
1129
- "senter",
1130
  "lemmatizer",
1131
  "attribute_ruler"
1132
  ],
@@ -1134,73 +1132,73 @@
1134
 
1135
  ],
1136
  "performance":{
1137
- "pos_acc":0.9737034867,
1138
- "morph_acc":0.9067535892,
1139
  "morph_per_feat":{
1140
  "Case":{
1141
- "p":0.9723310171,
1142
- "r":0.9712266532,
1143
- "f":0.9717785214
1144
  },
1145
  "Gender":{
1146
- "p":0.9182302231,
1147
- "r":0.924205691,
1148
- "f":0.9212082671
1149
  },
1150
  "Number":{
1151
- "p":0.9897462455,
1152
- "r":0.9881087788,
1153
- "f":0.9889268343
1154
  },
1155
  "Person":{
1156
- "p":0.9769858946,
1157
- "r":0.9715762274,
1158
- "f":0.9742735517
1159
  },
1160
  "PronType":{
1161
- "p":0.9849673203,
1162
- "r":0.9785714286,
1163
- "f":0.9817589577
1164
  },
1165
  "Polarity":{
1166
  "p":1.0,
1167
- "r":0.9740932642,
1168
- "f":0.9868766404
1169
  },
1170
  "Aspect":{
1171
- "p":0.9714950553,
1172
- "r":0.9477866061,
1173
- "f":0.9594943982
1174
  },
1175
  "Mood":{
1176
- "p":0.974137931,
1177
- "r":0.970234688,
1178
- "f":0.9721823917
1179
  },
1180
  "Tense":{
1181
- "p":0.9599170698,
1182
- "r":0.9543112333,
1183
- "f":0.9571059432
1184
  },
1185
  "VerbForm":{
1186
- "p":0.9927436075,
1187
- "r":0.9862684518,
1188
- "f":0.9894954365
1189
  },
1190
  "Voice":{
1191
- "p":0.9568071873,
1192
- "r":0.9512195122,
1193
- "f":0.954005168
1194
  },
1195
  "Degree":{
1196
- "p":0.8930987821,
1197
- "r":0.8870967742,
1198
- "f":0.8900876601
1199
  },
1200
  "Definite":{
1201
- "p":0.9914024718,
1202
- "r":0.9967585089,
1203
- "f":0.9940732759
1204
  },
1205
  "Reflex":{
1206
  "p":1.0,
@@ -1209,138 +1207,138 @@
1209
  },
1210
  "Poss":{
1211
  "p":1.0,
1212
- "r":0.6842105263,
1213
- "f":0.8125
1214
  }
1215
  },
1216
- "tag_acc":0.976560211,
1217
- "dep_uas":0.8079402285,
1218
- "dep_las":0.7629651333,
1219
  "dep_las_per_type":{
1220
  "nsubj":{
1221
- "p":0.7510775862,
1222
- "r":0.7551462622,
1223
- "f":0.753106429
1224
  },
1225
  "discourse":{
1226
- "p":0.8236111111,
1227
- "r":0.8145604396,
1228
- "f":0.8190607735
1229
  },
1230
  "mark":{
1231
- "p":0.8644067797,
1232
- "r":0.8225806452,
1233
- "f":0.8429752066
1234
  },
1235
  "advmod":{
1236
- "p":0.7159763314,
1237
- "r":0.7191679049,
1238
- "f":0.7175685693
1239
  },
1240
  "advcl":{
1241
- "p":0.6649616368,
1242
- "r":0.7103825137,
1243
- "f":0.6869220608
1244
  },
1245
  "xcomp":{
1246
- "p":0.6435185185,
1247
- "r":0.556,
1248
- "f":0.5965665236
1249
  },
1250
  "cop":{
1251
- "p":0.7710280374,
1252
  "r":0.7710280374,
1253
- "f":0.7710280374
1254
  },
1255
  "root":{
1256
- "p":0.7906755471,
1257
- "r":0.8155053974,
1258
- "f":0.8028985507
1259
  },
1260
  "det":{
1261
- "p":0.9321444106,
1262
- "r":0.9293148309,
1263
- "f":0.9307274701
1264
  },
1265
  "nmod":{
1266
- "p":0.7436823105,
1267
- "r":0.7253521127,
1268
- "f":0.734402852
1269
  },
1270
  "obj":{
1271
- "p":0.7935103245,
1272
- "r":0.8380062305,
1273
- "f":0.8151515152
1274
  },
1275
  "case":{
1276
- "p":0.9585605234,
1277
  "r":0.9638157895,
1278
- "f":0.9611809732
1279
  },
1280
  "obl":{
1281
- "p":0.7083811711,
1282
- "r":0.7533577534,
1283
- "f":0.7301775148
1284
  },
1285
  "cc":{
1286
- "p":0.6361767729,
1287
- "r":0.6368312757,
1288
- "f":0.636503856
1289
  },
1290
  "conj":{
1291
- "p":0.5797101449,
1292
- "r":0.5774278215,
1293
- "f":0.5785667324
1294
  },
1295
  "obl:agent":{
1296
- "p":0.8333333333,
1297
- "r":0.4054054054,
1298
- "f":0.5454545455
1299
  },
1300
  "ccomp":{
1301
- "p":0.5142857143,
1302
- "r":0.447761194,
1303
- "f":0.4787234043
1304
  },
1305
  "nsubj:pass":{
1306
- "p":0.6421052632,
1307
- "r":0.5700934579,
1308
- "f":0.603960396
1309
  },
1310
  "amod":{
1311
- "p":0.7142857143,
1312
- "r":0.7276995305,
1313
- "f":0.7209302326
1314
  },
1315
  "acl":{
1316
- "p":0.512195122,
1317
- "r":0.3818181818,
1318
- "f":0.4375
1319
  },
1320
  "iobj":{
1321
- "p":0.7417061611,
1322
  "r":0.7228637413,
1323
- "f":0.7321637427
1324
  },
1325
  "appos":{
1326
- "p":0.4756097561,
1327
- "r":0.2765957447,
1328
- "f":0.3497757848
1329
  },
1330
  "nummod":{
1331
- "p":0.9016393443,
1332
- "r":0.8088235294,
1333
- "f":0.8527131783
1334
  },
1335
  "vocative":{
1336
- "p":0.7246376812,
1337
- "r":0.7246376812,
1338
- "f":0.7246376812
1339
  },
1340
  "orphan":{
1341
- "p":0.1111111111,
1342
- "r":0.0465116279,
1343
- "f":0.0655737705
1344
  },
1345
  "dep":{
1346
  "p":0.0,
@@ -1348,19 +1346,19 @@
1348
  "f":0.0
1349
  },
1350
  "parataxis":{
1351
- "p":0.25,
1352
  "r":0.1,
1353
- "f":0.1428571429
1354
  },
1355
  "dislocated":{
1356
- "p":0.4285714286,
1357
- "r":0.1153846154,
1358
- "f":0.1818181818
1359
  },
1360
  "csubj:pass":{
1361
- "p":0.0,
1362
- "r":0.0,
1363
- "f":0.0
1364
  },
1365
  "flat:name":{
1366
  "p":0.9285714286,
@@ -1373,9 +1371,9 @@
1373
  "f":0.0
1374
  },
1375
  "fixed":{
1376
- "p":1.0,
1377
  "r":0.5,
1378
- "f":0.6666666667
1379
  },
1380
  "aux":{
1381
  "p":0.0,
@@ -1383,14 +1381,14 @@
1383
  "f":0.0
1384
  }
1385
  },
1386
- "sents_p":0.6003805899,
1387
- "sents_r":0.6192345437,
1388
- "sents_f":0.6096618357,
1389
- "lemma_acc":0.9690155289,
1390
- "tok2vec_loss":73687.0988674555,
1391
- "morphologizer_loss":691.6825536788,
1392
- "tagger_loss":199.7822767168,
1393
- "parser_loss":17072.7697484601
1394
  },
1395
  "requirements":[
1396
 
 
1
  {
2
  "lang":"grc",
3
  "name":"proiel_lg",
4
+ "version":"3.5.3",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
+ "spacy_version":">=3.5.3,<3.6.0",
11
+ "spacy_git_version":"512241e12",
12
  "vectors":{
13
  "width":300,
14
  "vectors":200000,
 
1117
  "morphologizer",
1118
  "tagger",
1119
  "parser",
 
1120
  "lemmatizer",
1121
  "attribute_ruler"
1122
  ],
 
1125
  "morphologizer",
1126
  "tagger",
1127
  "parser",
 
1128
  "lemmatizer",
1129
  "attribute_ruler"
1130
  ],
 
1132
 
1133
  ],
1134
  "performance":{
1135
+ "pos_acc":0.9726047466,
1136
+ "morph_acc":0.9088045708,
1137
  "morph_per_feat":{
1138
  "Case":{
1139
+ "p":0.9753507774,
1140
+ "r":0.973750631,
1141
+ "f":0.9745500474
1142
  },
1143
  "Gender":{
1144
+ "p":0.9225486456,
1145
+ "r":0.9256092893,
1146
+ "f":0.9240764331
1147
  },
1148
  "Number":{
1149
+ "p":0.9908836631,
1150
+ "r":0.9890393961,
1151
+ "f":0.9899606707
1152
  },
1153
  "Person":{
1154
+ "p":0.9730029586,
1155
+ "r":0.9712070875,
1156
+ "f":0.9721041936
1157
  },
1158
  "PronType":{
1159
+ "p":0.9859293194,
1160
+ "r":0.9782467532,
1161
+ "f":0.9820730117
1162
  },
1163
  "Polarity":{
1164
  "p":1.0,
1165
+ "r":0.9844559585,
1166
+ "f":0.9921671018
1167
  },
1168
  "Aspect":{
1169
+ "p":0.9670710572,
1170
+ "r":0.9500567537,
1171
+ "f":0.9584884054
1172
  },
1173
  "Mood":{
1174
+ "p":0.9747416762,
1175
+ "r":0.9719519176,
1176
+ "f":0.9733447979
1177
  },
1178
  "Tense":{
1179
+ "p":0.9620034542,
1180
+ "r":0.9567159052,
1181
+ "f":0.9593523941
1182
  },
1183
  "VerbForm":{
1184
+ "p":0.9906735751,
1185
+ "r":0.9845520082,
1186
+ "f":0.9876033058
1187
  },
1188
  "Voice":{
1189
+ "p":0.9537132988,
1190
+ "r":0.9484713157,
1191
+ "f":0.9510850844
1192
  },
1193
  "Degree":{
1194
+ "p":0.9129834254,
1195
+ "r":0.8884408602,
1196
+ "f":0.9005449591
1197
  },
1198
  "Definite":{
1199
+ "p":0.9919441461,
1200
+ "r":0.9978390059,
1201
+ "f":0.9948828441
1202
  },
1203
  "Reflex":{
1204
  "p":1.0,
 
1207
  },
1208
  "Poss":{
1209
  "p":1.0,
1210
+ "r":0.7368421053,
1211
+ "f":0.8484848485
1212
  }
1213
  },
1214
+ "tag_acc":0.9750219748,
1215
+ "dep_uas":0.8111631995,
1216
+ "dep_las":0.7622326399,
1217
  "dep_las_per_type":{
1218
  "nsubj":{
1219
+ "p":0.7627659574,
1220
+ "r":0.7768147346,
1221
+ "f":0.769726248
1222
  },
1223
  "discourse":{
1224
+ "p":0.8019125683,
1225
+ "r":0.8063186813,
1226
+ "f":0.804109589
1227
  },
1228
  "mark":{
1229
+ "p":0.8448979592,
1230
+ "r":0.8346774194,
1231
+ "f":0.8397565923
1232
  },
1233
  "advmod":{
1234
+ "p":0.7207207207,
1235
+ "r":0.7132243685,
1236
+ "f":0.71695295
1237
  },
1238
  "advcl":{
1239
+ "p":0.6761133603,
1240
+ "r":0.6844262295,
1241
+ "f":0.6802443992
1242
  },
1243
  "xcomp":{
1244
+ "p":0.6180904523,
1245
+ "r":0.492,
1246
+ "f":0.5478841871
1247
  },
1248
  "cop":{
1249
+ "p":0.7466063348,
1250
  "r":0.7710280374,
1251
+ "f":0.7586206897
1252
  },
1253
  "root":{
1254
+ "p":0.8030888031,
1255
+ "r":0.8164867517,
1256
+ "f":0.8097323601
1257
  },
1258
  "det":{
1259
+ "p":0.9293501962,
1260
+ "r":0.9241110147,
1261
+ "f":0.9267232007
1262
  },
1263
  "nmod":{
1264
+ "p":0.731316726,
1265
+ "r":0.7235915493,
1266
+ "f":0.7274336283
1267
  },
1268
  "obj":{
1269
+ "p":0.7871720117,
1270
+ "r":0.8411214953,
1271
+ "f":0.813253012
1272
  },
1273
  "case":{
1274
+ "p":0.9564744287,
1275
  "r":0.9638157895,
1276
+ "f":0.9601310759
1277
  },
1278
  "obl":{
1279
+ "p":0.6971098266,
1280
+ "r":0.7362637363,
1281
+ "f":0.716152019
1282
  },
1283
  "cc":{
1284
+ "p":0.6632337796,
1285
+ "r":0.6625514403,
1286
+ "f":0.6628924344
1287
  },
1288
  "conj":{
1289
+ "p":0.6045576408,
1290
+ "r":0.5918635171,
1291
+ "f":0.5981432361
1292
  },
1293
  "obl:agent":{
1294
+ "p":0.9166666667,
1295
+ "r":0.2972972973,
1296
+ "f":0.4489795918
1297
  },
1298
  "ccomp":{
1299
+ "p":0.5187165775,
1300
+ "r":0.4825870647,
1301
+ "f":0.5
1302
  },
1303
  "nsubj:pass":{
1304
+ "p":0.6904761905,
1305
+ "r":0.5420560748,
1306
+ "f":0.6073298429
1307
  },
1308
  "amod":{
1309
+ "p":0.6901408451,
1310
+ "r":0.6901408451,
1311
+ "f":0.6901408451
1312
  },
1313
  "acl":{
1314
+ "p":0.4444444444,
1315
+ "r":0.4363636364,
1316
+ "f":0.4403669725
1317
  },
1318
  "iobj":{
1319
+ "p":0.7162471396,
1320
  "r":0.7228637413,
1321
+ "f":0.7195402299
1322
  },
1323
  "appos":{
1324
+ "p":0.4074074074,
1325
+ "r":0.3120567376,
1326
+ "f":0.3534136546
1327
  },
1328
  "nummod":{
1329
+ "p":0.8644067797,
1330
+ "r":0.75,
1331
+ "f":0.8031496063
1332
  },
1333
  "vocative":{
1334
+ "p":0.7118644068,
1335
+ "r":0.6086956522,
1336
+ "f":0.65625
1337
  },
1338
  "orphan":{
1339
+ "p":0.1428571429,
1340
+ "r":0.0930232558,
1341
+ "f":0.1126760563
1342
  },
1343
  "dep":{
1344
  "p":0.0,
 
1346
  "f":0.0
1347
  },
1348
  "parataxis":{
1349
+ "p":0.1666666667,
1350
  "r":0.1,
1351
+ "f":0.125
1352
  },
1353
  "dislocated":{
1354
+ "p":0.5555555556,
1355
+ "r":0.1923076923,
1356
+ "f":0.2857142857
1357
  },
1358
  "csubj:pass":{
1359
+ "p":0.25,
1360
+ "r":0.2,
1361
+ "f":0.2222222222
1362
  },
1363
  "flat:name":{
1364
  "p":0.9285714286,
 
1371
  "f":0.0
1372
  },
1373
  "fixed":{
1374
+ "p":0.8333333333,
1375
  "r":0.5,
1376
+ "f":0.625
1377
  },
1378
  "aux":{
1379
  "p":0.0,
 
1381
  "f":0.0
1382
  }
1383
  },
1384
+ "sents_p":0.6061776062,
1385
+ "sents_r":0.6162904809,
1386
+ "sents_f":0.6111922141,
1387
+ "lemma_acc":0.9732639906,
1388
+ "tok2vec_loss":73002.5595581873,
1389
+ "morphologizer_loss":779.3010618389,
1390
+ "tagger_loss":218.2356933637,
1391
+ "parser_loss":17925.5136122362
1392
  },
1393
  "requirements":[
1394
 
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b1c3a61faa9b14f58459078510d7627055309b97490b8c50cebd9102b7b6b9b
3
  size 1058262
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e8d56f2fd0e91993c9950c7db2020a989445e6e3fe6696b22066aed68711527
3
  size 1058262
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e32ba9d0a7065e66166a0561901cdf9ffefb2670fba6cdfb3a6c8c2b1cd7309b
3
  size 1782009
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de71e831cfc42e7cf212530b32489a38fa6a56e791c0102fcfd7dd086b535279
3
  size 1782009
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b326a74ae339e633ed84b4de13ab888ed8fbf46e6c982fcca02efad920c8dc1
3
  size 35970008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6ebd7dcc4d53847a71bf3c39cb7ae9bb8fd984132dbe83cd0510aec777b735e
3
  size 35970008
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14cf2b2886c99f239c793ff56226859855214b5a9fcaa2df46c93dd68f3a3ec8
3
- size 11888249
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e11087e9bbbfcc4871885e5930e003630da432f3a604f83a687b9f53efe5869a
3
+ size 17727257