Noemi Aepli commited on
Commit
90da933
1 Parent(s): f8a82af

first model version

Browse files
README.md CHANGED
@@ -1,3 +1,55 @@
1
  ---
2
- license: cc
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
 
2
  ---
3
+
4
+ # swiss\_german\_pos\_model
5
+
6
+ The *swiss_german_pos_model* is a part-of-speech tagging model for Swiss German. The model is trained on [Universal POS tags (upos)](https://universaldependencies.org/u/pos/).
7
+
8
+ ### Training procedure and data sets
9
+
10
+ - Base model: German LM: [dbmdz/bert-base-german-cased](https://huggingface.co/dbmdz/bert-base-german-cased)
11
+ - Continued LM training with [swisscrawl data](https://icosys.ch/swisscrawl)
12
+ - Task fine-tuning on the [UD\_German-HDT](https://github.com/UniversalDependencies/UD_German-HDT/tree/master) data set with [character-level noise](https://aclanthology.org/2022.findings-acl.321/)
13
+ - Task fine-tuning on the Swiss German [NOAH-Corpus](https://noe-eva.github.io/NOAH-Corpus/) (train + dev split)
14
+
15
+ Accuracy on NOAH test split: 0.9587
16
+
17
+
18
+
19
+ ### Training hyperparameters
20
+
21
+ The following hyperparameters were used during training:
22
+ - learning_rate: 5e-05
23
+ - train_batch_size: 8
24
+ - eval_batch_size: 8
25
+ - seed: 1
26
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
27
+ - lr_scheduler_type: linear
28
+ - num_epochs: 5.0
29
+
30
+ ### Framework versions
31
+
32
+ - Transformers 4.25.0.dev0
33
+ - Pytorch 1.13.1
34
+ - Datasets 2.8.0
35
+ - Tokenizers 0.13.2
36
+
37
+
38
+ ### Citation
39
+
40
+ ``` @inproceedings{aepli-sennrich-2022-improving,
41
+ title = "Improving Zero-Shot Cross-lingual Transfer Between Closely Related Languages by Injecting Character-Level Noise",
42
+ author = {Aepli, No{\"e}mi and
43
+ Sennrich, Rico},
44
+ booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
45
+ month = may,
46
+ year = "2022",
47
+ address = "Dublin, Ireland",
48
+ publisher = "Association for Computational Linguistics",
49
+ url = "https://aclanthology.org/2022.findings-acl.321",
50
+ doi = "10.18653/v1/2022.findings-acl.321",
51
+ pages = "4074--4083",
52
+ abstract = "Cross-lingual transfer between a high-resource language and its dialects or closely related language varieties should be facilitated by their similarity. However, current approaches that operate in the embedding space do not take surface similarity into account. This work presents a simple yet effective strategy to improve cross-lingual transfer between closely related varieties. We propose to augment the data of the high-resource source language with character-level noise to make the model more robust towards spelling variations. Our strategy shows consistent improvements over several languages and tasks: Zero-shot transfer of POS tagging and topic identification between language varieties from the Finnic, West and North Germanic, and Western Romance language branches. Our work provides evidence for the usefulness of simple surface-level noise in improving transfer between language varieties.",
53
+ } ```
54
+
55
+
config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/user/naepli/noisepp/gsw_pos_best/DEbert_swisscrawl_ftDE-noise",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "finetuning_task": "ner",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "ADJ",
14
+ "1": "ADP",
15
+ "2": "ADV",
16
+ "3": "AUX",
17
+ "4": "CCONJ",
18
+ "5": "DET",
19
+ "6": "INTJ",
20
+ "7": "NOUN",
21
+ "8": "NUM",
22
+ "9": "PART",
23
+ "10": "PRON",
24
+ "11": "PROPN",
25
+ "12": "PUNCT",
26
+ "13": "SCONJ",
27
+ "14": "VERB",
28
+ "15": "X"
29
+ },
30
+ "initializer_range": 0.02,
31
+ "intermediate_size": 3072,
32
+ "label2id": {
33
+ "ADJ": 0,
34
+ "ADP": 1,
35
+ "ADV": 2,
36
+ "AUX": 3,
37
+ "CCONJ": 4,
38
+ "DET": 5,
39
+ "INTJ": 6,
40
+ "NOUN": 7,
41
+ "NUM": 8,
42
+ "PART": 9,
43
+ "PRON": 10,
44
+ "PROPN": 11,
45
+ "PUNCT": 12,
46
+ "SCONJ": 13,
47
+ "VERB": 14,
48
+ "X": 15
49
+ },
50
+ "layer_norm_eps": 1e-12,
51
+ "max_position_embeddings": 512,
52
+ "model_type": "bert",
53
+ "num_attention_heads": 12,
54
+ "num_hidden_layers": 12,
55
+ "output_past": true,
56
+ "pad_token_id": 0,
57
+ "position_embedding_type": "absolute",
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.25.0.dev0",
60
+ "type_vocab_size": 2,
61
+ "use_cache": true,
62
+ "vocab_size": 31102
63
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62da48eb0d63620e2ee9154e6cb1d244298016bea8ba3ba7f2722cb8a5afaffa
3
+ size 437469741
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_basic_tokenize": true,
4
+ "do_lower_case": false,
5
+ "mask_token": "[MASK]",
6
+ "max_len": 512,
7
+ "name_or_path": "/home/user/naepli/noisepp/gsw_pos_best/DEbert_swisscrawl_ftDE-noise",
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "special_tokens_map_file": null,
12
+ "strip_accents": null,
13
+ "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "BertTokenizer",
15
+ "unk_token": "[UNK]"
16
+ }
trainer_state.json ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 3205,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.78,
12
+ "learning_rate": 4.219968798751951e-05,
13
+ "loss": 0.2448,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 1.56,
18
+ "learning_rate": 3.4399375975039005e-05,
19
+ "loss": 0.117,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 1.56,
24
+ "eval_accuracy": 0.9510651573223995,
25
+ "eval_f1": 0.9413793103448275,
26
+ "eval_loss": 0.17917607724666595,
27
+ "eval_precision": 0.942101226993865,
28
+ "eval_recall": 0.9406584992343032,
29
+ "eval_runtime": 1.3355,
30
+ "eval_samples_per_second": 548.105,
31
+ "eval_steps_per_second": 68.887,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 2.34,
36
+ "learning_rate": 2.65990639625585e-05,
37
+ "loss": 0.0721,
38
+ "step": 1500
39
+ },
40
+ {
41
+ "epoch": 3.12,
42
+ "learning_rate": 1.8798751950078e-05,
43
+ "loss": 0.045,
44
+ "step": 2000
45
+ },
46
+ {
47
+ "epoch": 3.12,
48
+ "eval_accuracy": 0.9569480345841875,
49
+ "eval_f1": 0.9488834696122029,
50
+ "eval_loss": 0.20152758061885834,
51
+ "eval_precision": 0.948112756808409,
52
+ "eval_recall": 0.9496554364471669,
53
+ "eval_runtime": 1.2725,
54
+ "eval_samples_per_second": 575.238,
55
+ "eval_steps_per_second": 72.298,
56
+ "step": 2000
57
+ },
58
+ {
59
+ "epoch": 3.9,
60
+ "learning_rate": 1.0998439937597505e-05,
61
+ "loss": 0.0252,
62
+ "step": 2500
63
+ },
64
+ {
65
+ "epoch": 4.68,
66
+ "learning_rate": 3.198127925117005e-06,
67
+ "loss": 0.0142,
68
+ "step": 3000
69
+ },
70
+ {
71
+ "epoch": 4.68,
72
+ "eval_accuracy": 0.957215438096087,
73
+ "eval_f1": 0.94943793350873,
74
+ "eval_loss": 0.22939395904541016,
75
+ "eval_precision": 0.9490293583245673,
76
+ "eval_recall": 0.9498468606431854,
77
+ "eval_runtime": 1.2703,
78
+ "eval_samples_per_second": 576.223,
79
+ "eval_steps_per_second": 72.421,
80
+ "step": 3000
81
+ },
82
+ {
83
+ "epoch": 5.0,
84
+ "step": 3205,
85
+ "total_flos": 824467030346496.0,
86
+ "train_loss": 0.0817642333912961,
87
+ "train_runtime": 184.8416,
88
+ "train_samples_per_second": 138.605,
89
+ "train_steps_per_second": 17.339
90
+ }
91
+ ],
92
+ "max_steps": 3205,
93
+ "num_train_epochs": 5,
94
+ "total_flos": 824467030346496.0,
95
+ "trial_name": null,
96
+ "trial_params": null
97
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fec32aecbf7fe52f78338d4fb3a3ffe068a6b8604bc7ef398d124d9931953d4
3
+ size 3515
vocab.txt ADDED
The diff for this file is too large to render. See raw diff