apapagi commited on
Commit
b284bd5
·
verified ·
1 Parent(s): fa5bbf2

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ architecture.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,76 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: eupl-1.1
3
+ datasets:
4
+ - EuropeanParliament/cellar_eurovoc
5
+ language:
6
+ - en
7
+ tags:
8
+ - eurovoc
9
+ pipeline_tag: text-classification
10
+
11
+ widget:
12
+ - text: "The Union condemns the continuing grave human rights violations by the Myanmar armed forces, including torture, sexual and gender-based violence, the persecution of civil society actors, human rights defenders and journalists, and attacks on the civilian population, including ethnic and religious minorities."
13
+
14
+ ---
15
+
16
+ # Eurovoc Multilabel Classifer 🇪🇺
17
+
18
+ [EuroVoc](https://op.europa.eu/fr/web/eu-vocabularies) is a large multidisciplinary multilingual (24 languages of 🇪🇺) hierarchical thesaurus of more than 7000 classes covering the activities of EU institutions.
19
+ Given the number of legal documents produced every day and the huge mass of pre-existing documents to be classified high quality automated or semi-automated classification methods are most welcome in this domain.
20
+
21
+ This model based on BERT Deep Neural Network was trained on more than 3.9 million documents to achieve that task and is used in a production environment via the huggingface inference endpoint.
22
+ This model support the 24 languages of the European Union.
23
+
24
+ ## Architecture
25
+
26
+ ![architecture](architecture.png)
27
+
28
+ This classification model is built on top of [EUBERT](https://huggingface.co/EuropeanParliament/EUBERT) with 7331 Eurovoc labels
29
+
30
+ With less than 100 million parameters, it can be deployed on commodity hardware without GPU acceleration (around 200 ms per inference for 2000 characters).
31
+
32
+ Parameters :
33
+ - Number of epochs 16
34
+ - Batch size 10
35
+ - Max lenght 512
36
+ - Learning Rate 5e-05
37
+
38
+ ## Usage
39
+
40
+
41
+ ```python
42
+ from eurovoc import EurovocTagger
43
+ model = EurovocTagger.from_pretrained("EuropeanParliament/eurovoc_eu")
44
+ ```
45
+ see the source code also
46
+
47
+ ### Payload example
48
+
49
+ ```json
50
+ {
51
+ "inputs": "The Union condemns the continuing grave human rights violations by the Myanmar armed forces, including torture, sexual and gender-based violence, the persecution of civil society actors, human rights defenders and journalists, and attacks on the civilian population, including ethnic and religious minorities. ",
52
+ "topk": 10,
53
+ "threshold": 0.16
54
+ }
55
+
56
+ ```
57
+
58
+ result:
59
+
60
+ ```json
61
+ {'results': [{'label': 'international sanctions', 'score': 0.9994925260543823},
62
+ {'label': 'economic sanctions', 'score': 0.9991770386695862},
63
+ {'label': 'natural person', 'score': 0.9591936469078064},
64
+ {'label': 'EU restrictive measure', 'score': 0.8388392329216003},
65
+ {'label': 'legal person', 'score': 0.45630475878715515},
66
+ {'label': 'Burma/Myanmar', 'score': 0.43375277519226074}]}
67
+ ```
68
+
69
+ Only six results, because the following one score is less that 0.16
70
+
71
+ Default value, topk = 5 and threshold = 0.16
72
+
73
+
74
+ ## Author(s)
75
+
76
+ Andreas Papagiannis <[email protected]>
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 65537,
3
+ "<mask>": 65540,
4
+ "<pad>": 65539,
5
+ "<s>": 65536,
6
+ "<unk>": 65538
7
+ }
architecture.png ADDED

Git LFS Details

  • SHA256: b1913dd26f85243cbb6cec67f771627d6729aa453d77b09a469f64abd29f0913
  • Pointer size: 131 Bytes
  • Size of remote file: 352 kB
eurovoc.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.utils.data import Dataset, DataLoader
3
+ import numpy as np
4
+ import pytorch_lightning as pl
5
+ import torch.nn as nn
6
+ from transformers import BertTokenizerFast as BertTokenizer, AdamW, get_linear_schedule_with_warmup, AutoTokenizer, AutoModel
7
+ from huggingface_hub import PyTorchModelHubMixin
8
+
9
+
10
+ class EurovocDataset(Dataset):
11
+
12
+ def __init__(
13
+ self,
14
+ text: np.array,
15
+ labels: np.array,
16
+ tokenizer: BertTokenizer,
17
+ max_token_len: int = 128
18
+ ):
19
+ self.tokenizer = tokenizer
20
+ self.text = text
21
+ self.labels = labels
22
+ self.max_token_len = max_token_len
23
+
24
+ def __len__(self):
25
+ return len(self.labels)
26
+
27
+ def __getitem__(self, index: int):
28
+ text = self.text[index][0]
29
+ labels = self.labels[index]
30
+
31
+ encoding = self.tokenizer.encode_plus(
32
+ text,
33
+ add_special_tokens=True,
34
+ max_length=self.max_token_len,
35
+ return_token_type_ids=False,
36
+ padding="max_length",
37
+ truncation=True,
38
+ return_attention_mask=True,
39
+ return_tensors='pt',
40
+ )
41
+
42
+ return dict(
43
+ text=text,
44
+ input_ids=encoding["input_ids"].flatten(),
45
+ attention_mask=encoding["attention_mask"].flatten(),
46
+ labels=torch.FloatTensor(labels)
47
+ )
48
+
49
+
50
+ class EuroVocLongTextDataset(Dataset):
51
+
52
+ def __splitter__(text, max_lenght):
53
+ l = text.split()
54
+ for i in range(0, len(l), max_lenght):
55
+ yield l[i:i + max_lenght]
56
+
57
+ def __init__(
58
+ self,
59
+ text: np.array,
60
+ labels: np.array,
61
+ tokenizer: BertTokenizer,
62
+ max_token_len: int = 128
63
+ ):
64
+ self.tokenizer = tokenizer
65
+ self.text = text
66
+ self.labels = labels
67
+ self.max_token_len = max_token_len
68
+
69
+ self.chunks_and_labels = [(c, l) for t, l in zip(self.text, self.labels) for c in self.__splitter__(t)]
70
+
71
+ self.encoding = self.tokenizer.batch_encode_plus(
72
+ [c for c, _ in self.chunks_and_labels],
73
+ add_special_tokens=True,
74
+ max_length=self.max_token_len,
75
+ return_token_type_ids=False,
76
+ padding="max_length",
77
+ truncation=True,
78
+ return_attention_mask=True,
79
+ return_tensors='pt',
80
+ )
81
+
82
+ def __len__(self):
83
+ return len(self.chunks_and_labels)
84
+
85
+ def __getitem__(self, index: int):
86
+ text, labels = self.chunks_and_labels[index]
87
+
88
+ return dict(
89
+ text=text,
90
+ input_ids=self.encoding[index]["input_ids"].flatten(),
91
+ attention_mask=self.encoding[index]["attention_mask"].flatten(),
92
+ labels=torch.FloatTensor(labels)
93
+ )
94
+
95
+
96
+ class EurovocDataModule(pl.LightningDataModule):
97
+
98
+ def __init__(self, bert_model_name, x_tr, y_tr, x_test, y_test, batch_size=8, max_token_len=512):
99
+ super().__init__()
100
+
101
+ self.batch_size = batch_size
102
+ self.x_tr = x_tr
103
+ self.y_tr = y_tr
104
+ self.x_test = x_test
105
+ self.y_test = y_test
106
+ self.tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
107
+ self.max_token_len = max_token_len
108
+
109
+ def setup(self, stage=None):
110
+ self.train_dataset = EurovocDataset(
111
+ self.x_tr,
112
+ self.y_tr,
113
+ self.tokenizer,
114
+ self.max_token_len
115
+ )
116
+
117
+ self.test_dataset = EurovocDataset(
118
+ self.x_test,
119
+ self.y_test,
120
+ self.tokenizer,
121
+ self.max_token_len
122
+ )
123
+
124
+ def train_dataloader(self):
125
+ return DataLoader(
126
+ self.train_dataset,
127
+ batch_size=self.batch_size,
128
+ shuffle=True,
129
+ num_workers=2
130
+ )
131
+
132
+ def val_dataloader(self):
133
+ return DataLoader(
134
+ self.test_dataset,
135
+ batch_size=self.batch_size,
136
+ num_workers=2
137
+ )
138
+
139
+ def test_dataloader(self):
140
+ return DataLoader(
141
+ self.test_dataset,
142
+ batch_size=self.batch_size,
143
+ num_workers=2
144
+ )
145
+
146
+
147
+ class EurovocTagger(pl.LightningModule, PyTorchModelHubMixin):
148
+
149
+ def __init__(self, bert_model_name, n_classes, lr=2e-5, eps=1e-8):
150
+ super().__init__()
151
+ self.bert = AutoModel.from_pretrained(bert_model_name)
152
+ self.dropout = nn.Dropout(p=0.2)
153
+ self.classifier1 = nn.Linear(self.bert.config.hidden_size, n_classes)
154
+ self.criterion = nn.BCELoss()
155
+ self.lr = lr
156
+ self.eps = eps
157
+
158
+ def forward(self, input_ids, attention_mask, labels=None):
159
+ output = self.bert(input_ids, attention_mask=attention_mask)
160
+ output = self.dropout(output.pooler_output)
161
+ output = self.classifier1(output)
162
+ output = torch.sigmoid(output)
163
+ loss = 0
164
+ if labels is not None:
165
+ loss = self.criterion(output, labels)
166
+ return loss, output
167
+
168
+ def training_step(self, batch, batch_idx):
169
+ input_ids = batch["input_ids"]
170
+ attention_mask = batch["attention_mask"]
171
+ labels = batch["labels"]
172
+ loss, outputs = self(input_ids, attention_mask, labels)
173
+ self.log("train_loss", loss, prog_bar=True, logger=True)
174
+ return {"loss": loss, "predictions": outputs, "labels": labels}
175
+
176
+ def validation_step(self, batch, batch_idx):
177
+ input_ids = batch["input_ids"]
178
+ attention_mask = batch["attention_mask"]
179
+ labels = batch["labels"]
180
+ loss, outputs = self(input_ids, attention_mask, labels)
181
+ self.log("val_loss", loss, prog_bar=True, logger=True)
182
+ return loss
183
+
184
+ def test_step(self, batch, batch_idx):
185
+ input_ids = batch["input_ids"]
186
+ attention_mask = batch["attention_mask"]
187
+ labels = batch["labels"]
188
+ loss, outputs = self(input_ids, attention_mask, labels)
189
+ self.log("test_loss", loss, prog_bar=True, logger=True)
190
+ return loss
191
+
192
+ def on_train_epoch_end(self, *args, **kwargs):
193
+ return
194
+ #labels = []
195
+ #predictions = []
196
+ #for output in args['outputs']:
197
+ # for out_labels in output["labels"].detach().cpu():
198
+ # labels.append(out_labels)
199
+ # for out_predictions in output["predictions"].detach().cpu():
200
+ # predictions.append(out_predictions)
201
+
202
+ #labels = torch.stack(labels).int()
203
+ #predictions = torch.stack(predictions)
204
+
205
+ #for i, name in enumerate(mlb.classes_):
206
+ # class_roc_auc = auroc(predictions[:, i], labels[:, i])
207
+ # self.logger.experiment.add_scalar(f"{name}_roc_auc/Train", class_roc_auc, self.current_epoch)
208
+
209
+
210
+ def configure_optimizers(self):
211
+ return torch.optim.AdamW(self.parameters(), lr=self.lr, eps=self.eps)
212
+
handler.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+ import numpy as np
3
+ import pickle
4
+
5
+ from sklearn.preprocessing import MultiLabelBinarizer
6
+ from transformers import AutoTokenizer
7
+ import torch
8
+
9
+ from eurovoc import EurovocTagger
10
+
11
+ BERT_MODEL_NAME = "EuropeanParliament/EUBERT"
12
+ MAX_LEN = 512
13
+ TEXT_MAX_LEN = MAX_LEN * 50
14
+ tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME)
15
+
16
+
17
+ class EndpointHandler:
18
+ mlb = MultiLabelBinarizer()
19
+
20
+ def __init__(self, path=""):
21
+ self.mlb = pickle.load(open(f"{path}/mlb.pickle", "rb"))
22
+
23
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
+ self.model = EurovocTagger.from_pretrained(path,
25
+ bert_model_name=BERT_MODEL_NAME,
26
+ n_classes=len(self.mlb.classes_),
27
+ map_location=self.device)
28
+ self.model.eval()
29
+ self.model.freeze()
30
+
31
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
32
+ """
33
+ data args:
34
+ inputs (:obj: `str` | `PIL.Image` | `np.array`)
35
+ kwargs
36
+ Return:
37
+ A :obj:`list` | `dict`: will be serialized and returned
38
+ """
39
+
40
+ text = data.pop("inputs", data)
41
+ topk = data.pop("topk", 5)
42
+ threshold = data.pop("threshold", 0.16)
43
+ debug = data.pop("debug", False)
44
+ prediction = self.get_prediction(text)
45
+ results = [{"label": label, "score": float(score)} for label, score in
46
+ zip(self.mlb.classes_, prediction[0].tolist())]
47
+ results = sorted(results, key=lambda x: x["score"], reverse=True)
48
+ results = [r for r in results if r["score"] > threshold]
49
+ results = results[:topk]
50
+ if debug:
51
+ return {"results": results, "values": prediction, "input": text}
52
+ else:
53
+ return {"results": results}
54
+
55
+ def get_prediction(self, text):
56
+ # split text into chunks of MAX_LEN and get average prediction for each chunk
57
+ chunks = [text[i:i + MAX_LEN] for i in range(0, min(len(text), TEXT_MAX_LEN), MAX_LEN)]
58
+ predictions = [self._get_prediction(chunk) for chunk in chunks]
59
+ predictions = np.array(predictions).mean(axis=0)
60
+ return predictions
61
+
62
+ def _get_prediction(self, text):
63
+ item = tokenizer.encode_plus(
64
+ text,
65
+ add_special_tokens=True,
66
+ max_length=MAX_LEN,
67
+ return_token_type_ids=False,
68
+ padding="max_length",
69
+ truncation=True,
70
+ return_attention_mask=True,
71
+ return_tensors='pt')
72
+ item.to(self.device)
73
+ _, prediction = self.model(item["input_ids"], item["attention_mask"])
74
+ prediction = prediction.cpu().detach().numpy()
75
+ print(text, prediction)
76
+ return prediction
77
+
mlb.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef6c77d4be99dc73994099ea02207deca2449b7f4675464285fd41262146f49
3
+ size 131
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1638381e5fafe20ad1f06b8662a69d369fefdb435f6aecf61bb6ef8e5ed1780
3
+ size 134
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8d7dac5e88e6a751793812b04a026786e6f84c8ba2c20c9a1e3693ad8a5b65a
3
+ size 134
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ nltk==3.8.1
2
+ aiohttp==3.8.5
3
+ ipython==8.14.0
4
+ pip-chill==1.0.3
5
+ pytorch-lightning==2.0.5
6
+ scikit-learn==1.3.0
7
+ transformers==4.38.2
8
+
special_tokens_map.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[UNK]",
4
+ "[PAD]",
5
+ "[CLS]",
6
+ "[SEP]",
7
+ "[MASK]",
8
+ "<s>",
9
+ "</s>",
10
+ "<unk>",
11
+ "<pad>",
12
+ "<mask>"
13
+ ],
14
+ "bos_token": {
15
+ "content": "[CLS]",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "cls_token": {
22
+ "content": "[CLS]",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ "eos_token": {
29
+ "content": "[SEP]",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ },
35
+ "mask_token": {
36
+ "content": "[MASK]",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false
41
+ },
42
+ "pad_token": {
43
+ "content": "[PAD]",
44
+ "lstrip": false,
45
+ "normalized": false,
46
+ "rstrip": false,
47
+ "single_word": false
48
+ },
49
+ "sep_token": {
50
+ "content": "[SEP]",
51
+ "lstrip": false,
52
+ "normalized": false,
53
+ "rstrip": false,
54
+ "single_word": false
55
+ },
56
+ "unk_token": {
57
+ "content": "[UNK]",
58
+ "lstrip": false,
59
+ "normalized": false,
60
+ "rstrip": false,
61
+ "single_word": false
62
+ }
63
+ }
test_handler.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pprint import pprint
2
+ from handler import EndpointHandler
3
+
4
+ # init handler
5
+ my_handler = EndpointHandler(path=".")
6
+
7
+ # prepare sample payload
8
+
9
+ payload = {"inputs": "The Union condemns the continuing grave human rights violations by the Myanmar armed forces, including torture, sexual and gender-based violence, the persecution of civil society actors, human rights defenders and journalists, and attacks on the civilian population, including ethnic and religious minorities.",
10
+ "topk": 10,
11
+ "threshold": 0
12
+ }
13
+
14
+ #payload = {"inputs": "EN Official Journal of the European Union LI 183/19 COUNCIL IMPLEMENTING REGULATION (EU) 2023/1497 of 20 July 2023 implementing Regulation (EU) No 401/2013 concerning restrictive measures in view of the situation in Myanmar/Burma THE COUNCIL OF THE EUROPEAN UNION, Having regard to the Treaty on the Functioning of the European Union, Having regard to Council Regulation (EU) No 401/2013 of 2 May 2013 concerning restrictive measures in view of the situation in Myanmar/Burma and repealing Regulation (EC) No 194/2008 (1), and in particular Article 4i thereof, Having regard to the proposal from the High Representative of the Union for Foreign Affairs and Security Policy, Whereas: (1) On 2 May 2013, the Council adopted Regulation (EU) No 401/2013. (2) On 31 January 2023, the High Representative of the Union for Foreign Affairs and Security Policy issued a declaration on behalf of the Union strongly condemning the overthrow of Myanmar’s democratically-elected government by the Myanmar armed forces in blatant violation of the will of the people as expressed in the general election of 8 November 2020. This illegitimate act reversed the country’s democratic transition and led to disastrous humanitarian, social, security, economic and human rights consequences. (3) The Union remains deeply concerned by the continuing escalation of violence and the evolution towards a protracted conflict with regional implications. The Union condemns the continuing grave human rights violations by the Myanmar armed forces, including torture, sexual and gender-based violence, the persecution of civil society actors, human rights defenders and journalists, and attacks on the civilian population, including ethnic and religious minorities. (4) In the absence of swift progress in the situation in Myanmar/Burma, the Union has expressed several times its readiness to adopt further restrictive measures against those responsible for undermining democracy and the rule of law and for the serious human rights violations taking place in that country. (5) In view of the continuing grave situation in Myanmar/Burma, six persons and one entity should be added to the list of natural and legal persons, entities and bodies subject to restrictive measures in Annex IV to Regulation (EU) No 401/2013. (6) Regulation (EU) No 401/2013 should therefore be amended accordingly, HAS ADOPTED THIS REGULATION: Article 1 Annex IV to Regulation (EU) No 401/2013 is amended as set out in the Annex to this Regulation. Article 2 This Regulation shall enter into force on the date of its publication in the Official Journal of the European Union. This Regulation shall be binding in its entirety and directly applicable in all Member States. Done at Brussels, 20 July 2023. For the Council The President J. BORRELL FONTELLES (1)  OJ L 121, 3. 5. 2013, p. 1. ANNEX Annex IV to Regulation (EU) No 401/2013 is amended as follows: (1) the following entries are added to the list headed ‘A. Natural persons referred to in Article 4a’:   Name Identifying information Reasons Date of listing ‘94. Aung Kyaw Min Nationality: Myanmar/Burma; Date of birth: circa 1958; Place of birth: Myanmar/Burma; Gender: male; Function: Member of State Administration Council Aung Kyaw Min has been a member of the State Administration Council (SAC) since 1 February 2023. He is also the former Chief-Minister of Rakhine State. SAC is led by Commander in Chief Min Aung Hlaing, who took over the legislative, executive and judicial powers of the State as of 1 February 2021, preventing the democratically-elected government from fulfilling its mandate. As member of the SAC, Aung Kyaw Min has been directly involved in and responsible for decision-making concerning state functions and is therefore responsible for undermining democracy and the rule of law in Myanmar/Burma. Additionally, the SAC has adopted decisions restricting the rights of freedom of expression, including access to information, and peaceful assembly. The military forces and authorities operating under the control of the SAC have committed serious human rights violations since 1 February 2021, killing civilian and unarmed protestors, and have restricted freedom of assembly and of expression. As a member of the SAC, Aung Kyaw Min is directly responsible for those repressive decisions and for serious human rights violations. 20. 7. 2023 95. Kyaw Swar Lin a. k. a Kyaw Swar Linn Nationality: Myanmar/Burma; Place of birth: Myanmar/Burma; Gender: male; Function: Quartermaster General of the Myanmar armed forces Lieutenant General Kyaw Swar Lin was been appointed as Quartermaster General in May 2020. It is the sixth highest position in the military of Myanmar/Burma. The Office of the Quartermaster General is a department under the jurisdiction of the Ministry of Defense and is involved in arms and military equipment procurement for the Myanmar Armed Forces. In addition, Kyaw Swar Lin runs the Myanmar Economic Corporation (MEC), which is one of the two major conglomerates and holding companies operated by the military, generating revenue for the Myanmar armed forces (Tatmadaw). As Quartermaster General, he forms part of the military regime which has seized power in a military coup and overthrown the legitimately elected leaders of Myanmar/Burma. Kyaw Swar Lin is therefore a natural person whose policies and activities undermine democracy and the rule of law in Myanmar/Burma, and who provides support for actions that threaten the peace, security and stability of Myanmar/Burma. 20. 7. 2023 96. Myint Kyaing a. k. a. U Myint Kyaing Nationality: Myanmar/Burma; Date of birth: 17. 4. 1957 Place of birth: Myanmar/Burma; Gender: male; Function: Union Minister of Immigration and Population Myint Kyaing has been the Union Minister for Immigration and Population since 19 August 2021. Before that, he was Union Minister of Labour following the coup of 1 February 2021. He is a member of the State Administration Council (SAC), led by Commander-in-Chief Min Aung Hlaing, which took over the legislative, executive and judicial powers of the State in a military coup on 1 February 2021. As a government Minister, he forms part of the military regime which has seized power in a military coup and overthrown the legitimately elected leaders of Myanmar/Burma. In his capacity as Union Minister, he carries out duties in support of military regime’s repressive immigration and population policy such as restrictions for citizens to travel within the country as well as the policy of the regime towards the minority of the Rohingya in violation of human rights. As Minister for Immigration and Population he also participates in preparations for the elections announced by the military in order to legitimise the illegal coup of February 2021. Myint Kyaing is therefore responsible for undermining democracy and the rule of law in Myanmar/Burma and for providing support for actions that threaten the peace, security and stability of Myanmar/Burma. 20. 7. 2023 97.",
15
+ # "topk": 10,
16
+ # "threshold": 0
17
+ # }
18
+
19
+
20
+
21
+ # test the handler
22
+ payload_pred = my_handler(payload)
23
+
24
+ pprint(payload_pred)
25
+
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "[UNK]",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "[PAD]",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "[CLS]",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "[SEP]",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "[MASK]",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "65536": {
45
+ "content": "<s>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "65537": {
53
+ "content": "</s>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "65538": {
61
+ "content": "<unk>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "65539": {
69
+ "content": "<pad>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "65540": {
77
+ "content": "<mask>",
78
+ "lstrip": true,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ }
84
+ },
85
+ "additional_special_tokens": [
86
+ "[UNK]",
87
+ "[PAD]",
88
+ "[CLS]",
89
+ "[SEP]",
90
+ "[MASK]",
91
+ "<s>",
92
+ "</s>",
93
+ "<unk>",
94
+ "<pad>",
95
+ "<mask>"
96
+ ],
97
+ "bos_token": "[CLS]",
98
+ "clean_up_tokenization_spaces": true,
99
+ "cls_token": "[CLS]",
100
+ "eos_token": "[SEP]",
101
+ "errors": "replace",
102
+ "mask_token": "[MASK]",
103
+ "max_len": 512,
104
+ "model_max_length": 512,
105
+ "pad_token": "[PAD]",
106
+ "sep_token": "[SEP]",
107
+ "tokenizer_class": "RobertaTokenizer",
108
+ "trim_offsets": true,
109
+ "unk_token": "[UNK]"
110
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff