{ "dataset_reader": { "type": "multitask", "readers": { "ner": { "type": "wikiann", "token_indexers": { "transformer": { "type": "pretrained_transformer_mismatched", "max_length": 512, "model_name": "MLRS/BERTu" } } } } }, "model": { "type": "multitask", "arg_name_mapping": { "backbone": { "tokens": "text", "words": "text" } }, "backbone": { "type": "embedder_and_mask", "text_field_embedder": { "token_embedders": { "transformer": { "type": "pretrained_transformer_mismatched_with_dropout", "last_layer_only": false, "layer_dropout": 0.1, "max_length": 512, "model_name": "MLRS/BERTu", "tokenizer_kwargs": {}, "train_parameters": true } } } }, "heads": { "ner": { "type": "crf_tagger", "calculate_span_f1": true, "constrain_crf_decoding": true, "dropout": 0.2, "encoder": { "type": "pass_through", "input_dim": 768 }, "include_start_end_transitions": false, "label_encoding": "BIO" } } }, "train_data_path": { "ner": "panx_dataset/mt/train" }, "validation_data_path": { "ner": "panx_dataset/mt/dev" }, "trainer": { "callbacks": [ { "tensorboard_writer": { "should_log_learning_rate": true, "should_log_parameter_statistics": true }, "type": "tensorboard" } ], "cuda_device": 0, "grad_norm": 5, "learning_rate_scheduler": { "type": "ulmfit_sqrt", "affected_group_count": 2, "decay_factor": 0.05, "discriminative_fine_tuning": true, "factor": 5, "gradual_unfreezing": true, "model_size": 1, "start_step": 2, "warmup_steps": 2 }, "num_epochs": 200, "optimizer": { "type": "huggingface_adamw", "betas": [ 0.9, 0.999 ], "correct_bias": false, "lr": 0.0005, "parameter_groups": [ [ [ "text_field_embedder.*transformer_model.embeddings.*_embeddings.*", "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).weight" ], {} ], [ [ "text_field_embedder.*transformer_model.embeddings.LayerNorm.*", "text_field_embedder.*transformer_model.encoder.*.output.LayerNorm.*", "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).bias", "text_field_embedder.*transformer_model.pooler.dense.bias" ], { "weight_decay": 0 } ], [ [ "text_field_embedder.*._scalar_mix.*", "text_field_embedder.*transformer_model.pooler.dense.weight", "_head_sentinel", "head_arc_feedforward._linear_layers.*.weight", "child_arc_feedforward._linear_layers.*.weight", "head_tag_feedforward._linear_layers.*.weight", "child_tag_feedforward._linear_layers.*.weight", "arc_attention._weight_matrix", "tag_bilinear.weight", "tag_projection_layer._module.weight", "crf", "linear.weight", "tagger_linear.weight" ], {} ], [ [ "head_arc_feedforward._linear_layers.*.bias", "child_arc_feedforward._linear_layers.*.bias", "head_tag_feedforward._linear_layers.*.bias", "child_tag_feedforward._linear_layers.*.bias", "arc_attention._bias", "tag_bilinear.bias", "tag_projection_layer._module.bias", "linear.bias", "tagger_linear.bias" ], { "weight_decay": 0 } ] ], "weight_decay": 0.01 }, "patience": 20, "validation_metric": [ "+ner_f1-measure-overall" ] }, "data_loader": { "type": "multitask", "scheduler": { "type": "unbalanced_homogeneous_roundrobin", "batch_size": 64, "dataset_sizes": { "ner": 100 } }, "shuffle": true }, "numpy_seed": 1337, "pytorch_seed": 133, "random_seed": 13370, "validation_data_loader": { "type": "multitask", "scheduler": { "type": "homogeneous_roundrobin", "batch_size": 64 }, "shuffle": true } }