KurtMica commited on
Commit
f28817d
1 Parent(s): 13c30e2

Model files.

Browse files
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.th filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - allennlp
4
+ ---
5
+
6
+ # TODO: Fill this model card
best.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:547d784aabec69b259f6f40ad469a2ccdf25a83673587a54cb01ee57da0c9ce5
3
+ size 504059851
config.json ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_reader": {
3
+ "type": "multitask",
4
+ "readers": {
5
+ "ner": {
6
+ "type": "wikiann",
7
+ "token_indexers": {
8
+ "transformer": {
9
+ "type": "pretrained_transformer_mismatched",
10
+ "max_length": 512,
11
+ "model_name": "MLRS/BERTu"
12
+ }
13
+ }
14
+ }
15
+ }
16
+ },
17
+ "model": {
18
+ "type": "multitask",
19
+ "arg_name_mapping": {
20
+ "backbone": {
21
+ "tokens": "text",
22
+ "words": "text"
23
+ }
24
+ },
25
+ "backbone": {
26
+ "type": "embedder_and_mask",
27
+ "text_field_embedder": {
28
+ "token_embedders": {
29
+ "transformer": {
30
+ "type": "pretrained_transformer_mismatched_with_dropout",
31
+ "last_layer_only": false,
32
+ "layer_dropout": 0.1,
33
+ "max_length": 512,
34
+ "model_name": "MLRS/BERTu",
35
+ "tokenizer_kwargs": {},
36
+ "train_parameters": true
37
+ }
38
+ }
39
+ }
40
+ },
41
+ "heads": {
42
+ "ner": {
43
+ "type": "crf_tagger",
44
+ "calculate_span_f1": true,
45
+ "constrain_crf_decoding": true,
46
+ "dropout": 0.2,
47
+ "encoder": {
48
+ "type": "pass_through",
49
+ "input_dim": 768
50
+ },
51
+ "include_start_end_transitions": false,
52
+ "label_encoding": "BIO"
53
+ }
54
+ }
55
+ },
56
+ "train_data_path": {
57
+ "ner": "panx_dataset/mt/train"
58
+ },
59
+ "validation_data_path": {
60
+ "ner": "panx_dataset/mt/dev"
61
+ },
62
+ "trainer": {
63
+ "callbacks": [
64
+ {
65
+ "tensorboard_writer": {
66
+ "should_log_learning_rate": true,
67
+ "should_log_parameter_statistics": true
68
+ },
69
+ "type": "tensorboard"
70
+ }
71
+ ],
72
+ "cuda_device": 0,
73
+ "grad_norm": 5,
74
+ "learning_rate_scheduler": {
75
+ "type": "ulmfit_sqrt",
76
+ "affected_group_count": 2,
77
+ "decay_factor": 0.05,
78
+ "discriminative_fine_tuning": true,
79
+ "factor": 5,
80
+ "gradual_unfreezing": true,
81
+ "model_size": 1,
82
+ "start_step": 2,
83
+ "warmup_steps": 2
84
+ },
85
+ "num_epochs": 200,
86
+ "optimizer": {
87
+ "type": "huggingface_adamw",
88
+ "betas": [
89
+ 0.9,
90
+ 0.999
91
+ ],
92
+ "correct_bias": false,
93
+ "lr": 0.0005,
94
+ "parameter_groups": [
95
+ [
96
+ [
97
+ "text_field_embedder.*transformer_model.embeddings.*_embeddings.*",
98
+ "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).weight"
99
+ ],
100
+ {}
101
+ ],
102
+ [
103
+ [
104
+ "text_field_embedder.*transformer_model.embeddings.LayerNorm.*",
105
+ "text_field_embedder.*transformer_model.encoder.*.output.LayerNorm.*",
106
+ "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).bias",
107
+ "text_field_embedder.*transformer_model.pooler.dense.bias"
108
+ ],
109
+ {
110
+ "weight_decay": 0
111
+ }
112
+ ],
113
+ [
114
+ [
115
+ "text_field_embedder.*._scalar_mix.*",
116
+ "text_field_embedder.*transformer_model.pooler.dense.weight",
117
+ "_head_sentinel",
118
+ "head_arc_feedforward._linear_layers.*.weight",
119
+ "child_arc_feedforward._linear_layers.*.weight",
120
+ "head_tag_feedforward._linear_layers.*.weight",
121
+ "child_tag_feedforward._linear_layers.*.weight",
122
+ "arc_attention._weight_matrix",
123
+ "tag_bilinear.weight",
124
+ "tag_projection_layer._module.weight",
125
+ "crf",
126
+ "linear.weight",
127
+ "tagger_linear.weight"
128
+ ],
129
+ {}
130
+ ],
131
+ [
132
+ [
133
+ "head_arc_feedforward._linear_layers.*.bias",
134
+ "child_arc_feedforward._linear_layers.*.bias",
135
+ "head_tag_feedforward._linear_layers.*.bias",
136
+ "child_tag_feedforward._linear_layers.*.bias",
137
+ "arc_attention._bias",
138
+ "tag_bilinear.bias",
139
+ "tag_projection_layer._module.bias",
140
+ "linear.bias",
141
+ "tagger_linear.bias"
142
+ ],
143
+ {
144
+ "weight_decay": 0
145
+ }
146
+ ]
147
+ ],
148
+ "weight_decay": 0.01
149
+ },
150
+ "patience": 20,
151
+ "validation_metric": [
152
+ "+ner_f1-measure-overall"
153
+ ]
154
+ },
155
+ "data_loader": {
156
+ "type": "multitask",
157
+ "scheduler": {
158
+ "type": "unbalanced_homogeneous_roundrobin",
159
+ "batch_size": 64,
160
+ "dataset_sizes": {
161
+ "ner": 100
162
+ }
163
+ },
164
+ "shuffle": true
165
+ },
166
+ "numpy_seed": 1337,
167
+ "pytorch_seed": 133,
168
+ "random_seed": 13370,
169
+ "validation_data_loader": {
170
+ "type": "multitask",
171
+ "scheduler": {
172
+ "type": "homogeneous_roundrobin",
173
+ "batch_size": 64
174
+ },
175
+ "shuffle": true
176
+ }
177
+ }
log/train/events.out.tfevents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74d9f10dae08413337cb5f4642d4a33d011931ebc1d467fc252d1c5a962379ee
3
+ size 651225
log/validation/events.out.tfevents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c75764f815d7056d7e8b0983fae22d57a1811b2ae2643db7f6fd72a70b1db68
3
+ size 16894
metrics.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_epoch": 33,
3
+ "peak_worker_0_memory_MB": 4608.48828125,
4
+ "peak_gpu_0_memory_MB": 4295.302734375,
5
+ "training_duration": "0:02:50.854943",
6
+ "training_start_epoch": 0,
7
+ "training_epochs": 52,
8
+ "epoch": 52,
9
+ "training_ner_accuracy": 1.0,
10
+ "training_ner_accuracy3": 1.0,
11
+ "training_ner_precision-overall": 0.9999999999999991,
12
+ "training_ner_recall-overall": 0.9999999999999991,
13
+ "training_ner_f1-measure-overall": 0.9999999999999492,
14
+ "training_loss": 7.62939453125e-06,
15
+ "training_worker_0_memory_MB": 4608.48828125,
16
+ "training_gpu_0_memory_MB": 4295.302734375,
17
+ "validation_ner_accuracy": 0.9371293001186239,
18
+ "validation_ner_accuracy3": 0.9572953736654805,
19
+ "validation_ner_precision-overall": 0.7948717948717942,
20
+ "validation_ner_recall-overall": 0.7815126050420161,
21
+ "validation_ner_f1-measure-overall": 0.7881355932202884,
22
+ "validation_loss": 234.9528350830078,
23
+ "best_validation_ner_accuracy": 0.9442467378410438,
24
+ "best_validation_ner_accuracy3": 0.9584816132858838,
25
+ "best_validation_ner_precision-overall": 0.822033898305084,
26
+ "best_validation_ner_recall-overall": 0.8151260504201674,
27
+ "best_validation_ner_f1-measure-overall": 0.8185654008438312,
28
+ "best_validation_loss": 242.68551635742188
29
+ }
vocabulary/.lock ADDED
File without changes
vocabulary/labels.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ O
2
+ I-LOC
3
+ I-ORG
4
+ I-PER
5
+ B-ORG
6
+ B-LOC
7
+ B-PER
vocabulary/non_padded_namespaces.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *tags
2
+ *labels