File size: 3,589 Bytes
2890e34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
local env = import "../env.jsonnet";
local dataset_path = env.str("DATA_PATH", "data/ace/events");
local ontology_path = "data/ace/ontology.tsv";
local debug = false;
# embedding
local label_dim = 64;
local pretrained_model = env.str("ENCODER", "roberta-large");
# module
local dropout = 0.2;
local bio_dim = 512;
local bio_layers = 2;
local span_typing_dims = [256, 256];
local event_smoothing_factor = env.json("SMOOTHING", "0.0");
local arg_smoothing_factor = env.json("SMOOTHING", "0.0");
local layer_fix = 0;
# training
local typing_loss_factor = 8.0;
local grad_acc = env.json("GRAD_ACC", "1");
local max_training_tokens = 512;
local max_inference_tokens = 1024;
local lr = env.json("LR", "1e-3");
local cuda_devices = env.json("CUDA_DEVICES", "[0]");
{
dataset_reader: {
type: "concrete",
debug: debug,
pretrained_model: pretrained_model,
ignore_label: false,
[ if debug then "max_instances" ]: 128,
event_smoothing_factor: event_smoothing_factor,
arg_smoothing_factor: event_smoothing_factor,
},
train_data_path: dataset_path + "/train.tar.gz",
validation_data_path: dataset_path + "/dev.tar.gz",
test_data_path: dataset_path + "/test.tar.gz",
datasets_for_vocab_creation: ["train"],
data_loader: {
batch_sampler: {
type: "max_tokens_sampler",
max_tokens: max_training_tokens,
sorting_keys: ['tokens']
}
},
validation_data_loader: {
batch_sampler: {
type: "max_tokens_sampler",
max_tokens: max_inference_tokens,
sorting_keys: ['tokens']
}
},
model: {
type: "span",
word_embedding: {
token_embedders: {
"pieces": {
type: "pretrained_transformer",
model_name: pretrained_model,
}
},
},
span_extractor: {
type: 'combo',
sub_extractors: [
{
type: 'self_attentive',
},
{
type: 'bidirectional_endpoint',
}
]
},
span_finder: {
type: "bio",
bio_encoder: {
type: "lstm",
hidden_size: bio_dim,
num_layers: bio_layers,
bidirectional: true,
dropout: dropout,
},
no_label: false,
},
span_typing: {
type: 'mlp',
hidden_dims: span_typing_dims,
},
metrics: [{type: "srl"}],
ontology_path: ontology_path,
typing_loss_factor: typing_loss_factor,
label_dim: label_dim,
max_decoding_spans: 128,
max_recursion_depth: 2,
debug: debug,
},
trainer: {
num_epochs: 128,
patience: null,
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
validation_metric: "+arg-c_f",
num_gradient_accumulation_steps: grad_acc,
optimizer: {
type: "transformer",
base: {
type: "adam",
lr: lr,
},
embeddings_lr: 0.0,
encoder_lr: 1e-5,
pooler_lr: 1e-5,
layer_fix: layer_fix,
}
},
cuda_devices:: cuda_devices,
[if std.length(cuda_devices) > 1 then "distributed"]: {
"cuda_devices": cuda_devices
},
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true,
}
|