File size: 5,924 Bytes
f28817d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
{
"dataset_reader": {
"type": "multitask",
"readers": {
"ner": {
"type": "wikiann",
"token_indexers": {
"transformer": {
"type": "pretrained_transformer_mismatched",
"max_length": 512,
"model_name": "MLRS/BERTu"
}
}
}
}
},
"model": {
"type": "multitask",
"arg_name_mapping": {
"backbone": {
"tokens": "text",
"words": "text"
}
},
"backbone": {
"type": "embedder_and_mask",
"text_field_embedder": {
"token_embedders": {
"transformer": {
"type": "pretrained_transformer_mismatched_with_dropout",
"last_layer_only": false,
"layer_dropout": 0.1,
"max_length": 512,
"model_name": "MLRS/BERTu",
"tokenizer_kwargs": {},
"train_parameters": true
}
}
}
},
"heads": {
"ner": {
"type": "crf_tagger",
"calculate_span_f1": true,
"constrain_crf_decoding": true,
"dropout": 0.2,
"encoder": {
"type": "pass_through",
"input_dim": 768
},
"include_start_end_transitions": false,
"label_encoding": "BIO"
}
}
},
"train_data_path": {
"ner": "panx_dataset/mt/train"
},
"validation_data_path": {
"ner": "panx_dataset/mt/dev"
},
"trainer": {
"callbacks": [
{
"tensorboard_writer": {
"should_log_learning_rate": true,
"should_log_parameter_statistics": true
},
"type": "tensorboard"
}
],
"cuda_device": 0,
"grad_norm": 5,
"learning_rate_scheduler": {
"type": "ulmfit_sqrt",
"affected_group_count": 2,
"decay_factor": 0.05,
"discriminative_fine_tuning": true,
"factor": 5,
"gradual_unfreezing": true,
"model_size": 1,
"start_step": 2,
"warmup_steps": 2
},
"num_epochs": 200,
"optimizer": {
"type": "huggingface_adamw",
"betas": [
0.9,
0.999
],
"correct_bias": false,
"lr": 0.0005,
"parameter_groups": [
[
[
"text_field_embedder.*transformer_model.embeddings.*_embeddings.*",
"text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).weight"
],
{}
],
[
[
"text_field_embedder.*transformer_model.embeddings.LayerNorm.*",
"text_field_embedder.*transformer_model.encoder.*.output.LayerNorm.*",
"text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).bias",
"text_field_embedder.*transformer_model.pooler.dense.bias"
],
{
"weight_decay": 0
}
],
[
[
"text_field_embedder.*._scalar_mix.*",
"text_field_embedder.*transformer_model.pooler.dense.weight",
"_head_sentinel",
"head_arc_feedforward._linear_layers.*.weight",
"child_arc_feedforward._linear_layers.*.weight",
"head_tag_feedforward._linear_layers.*.weight",
"child_tag_feedforward._linear_layers.*.weight",
"arc_attention._weight_matrix",
"tag_bilinear.weight",
"tag_projection_layer._module.weight",
"crf",
"linear.weight",
"tagger_linear.weight"
],
{}
],
[
[
"head_arc_feedforward._linear_layers.*.bias",
"child_arc_feedforward._linear_layers.*.bias",
"head_tag_feedforward._linear_layers.*.bias",
"child_tag_feedforward._linear_layers.*.bias",
"arc_attention._bias",
"tag_bilinear.bias",
"tag_projection_layer._module.bias",
"linear.bias",
"tagger_linear.bias"
],
{
"weight_decay": 0
}
]
],
"weight_decay": 0.01
},
"patience": 20,
"validation_metric": [
"+ner_f1-measure-overall"
]
},
"data_loader": {
"type": "multitask",
"scheduler": {
"type": "unbalanced_homogeneous_roundrobin",
"batch_size": 64,
"dataset_sizes": {
"ner": 100
}
},
"shuffle": true
},
"numpy_seed": 1337,
"pytorch_seed": 133,
"random_seed": 13370,
"validation_data_loader": {
"type": "multitask",
"scheduler": {
"type": "homogeneous_roundrobin",
"batch_size": 64
},
"shuffle": true
}
} |