Commit
·
a2f12e0
1
Parent(s):
65c6644
model improved
Browse files- config.json +2 -1
- maker.py +1 -1
- pytorch_model.bin +2 -2
- tokenizer_config.json +1 -0
config.json
CHANGED
@@ -9366,6 +9366,7 @@
|
|
9366 |
"X|_|root": 4672
|
9367 |
},
|
9368 |
"layer_norm_eps": 1e-07,
|
|
|
9369 |
"max_position_embeddings": 512,
|
9370 |
"max_relative_positions": -1,
|
9371 |
"model_type": "deberta-v2",
|
@@ -9383,7 +9384,7 @@
|
|
9383 |
"relative_attention": true,
|
9384 |
"tokenizer_class": "BertTokenizer",
|
9385 |
"torch_dtype": "float32",
|
9386 |
-
"transformers_version": "4.
|
9387 |
"type_vocab_size": 0,
|
9388 |
"vocab_size": 30000
|
9389 |
}
|
|
|
9366 |
"X|_|root": 4672
|
9367 |
},
|
9368 |
"layer_norm_eps": 1e-07,
|
9369 |
+
"legacy": true,
|
9370 |
"max_position_embeddings": 512,
|
9371 |
"max_relative_positions": -1,
|
9372 |
"model_type": "deberta-v2",
|
|
|
9384 |
"relative_attention": true,
|
9385 |
"tokenizer_class": "BertTokenizer",
|
9386 |
"torch_dtype": "float32",
|
9387 |
+
"transformers_version": "4.48.3",
|
9388 |
"type_vocab_size": 0,
|
9389 |
"vocab_size": 30000
|
9390 |
}
|
maker.py
CHANGED
@@ -52,7 +52,7 @@ devDS=UDgoeswithDataset("dev.conllu",tkz)
|
|
52 |
testDS=UDgoeswithDataset("test.conllu",tkz)
|
53 |
lid=trainDS(devDS,testDS)
|
54 |
cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},ignore_mismatched_sizes=True)
|
55 |
-
arg=TrainingArguments(num_train_epochs=
|
56 |
trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True),train_dataset=trainDS,eval_dataset=devDS)
|
57 |
trn.train()
|
58 |
trn.save_model(tgt)
|
|
|
52 |
testDS=UDgoeswithDataset("test.conllu",tkz)
|
53 |
lid=trainDS(devDS,testDS)
|
54 |
cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},ignore_mismatched_sizes=True)
|
55 |
+
arg=TrainingArguments(num_train_epochs=10,per_device_train_batch_size=8,output_dir="/tmp",overwrite_output_dir=True,save_total_limit=2,evaluation_strategy="epoch",learning_rate=5e-05,warmup_ratio=0.1,save_safetensors=False)
|
56 |
trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True),train_dataset=trainDS,eval_dataset=devDS)
|
57 |
trn.train()
|
58 |
trn.save_model(tgt)
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf0b46037eb8048f79e62efe3aa96f219b5b84b938752ba2dc0dbc2108303ca6
|
3 |
+
size 506686562
|
tokenizer_config.json
CHANGED
@@ -45,6 +45,7 @@
|
|
45 |
"cls_token": "[CLS]",
|
46 |
"do_basic_tokenize": true,
|
47 |
"do_lower_case": true,
|
|
|
48 |
"mask_token": "[MASK]",
|
49 |
"model_max_length": 512,
|
50 |
"never_split": [
|
|
|
45 |
"cls_token": "[CLS]",
|
46 |
"do_basic_tokenize": true,
|
47 |
"do_lower_case": true,
|
48 |
+
"extra_special_tokens": {},
|
49 |
"mask_token": "[MASK]",
|
50 |
"model_max_length": 512,
|
51 |
"never_split": [
|