KoichiYasuoka commited on
Commit
a2f12e0
·
1 Parent(s): 65c6644

model improved

Browse files
Files changed (4) hide show
  1. config.json +2 -1
  2. maker.py +1 -1
  3. pytorch_model.bin +2 -2
  4. tokenizer_config.json +1 -0
config.json CHANGED
@@ -9366,6 +9366,7 @@
9366
  "X|_|root": 4672
9367
  },
9368
  "layer_norm_eps": 1e-07,
 
9369
  "max_position_embeddings": 512,
9370
  "max_relative_positions": -1,
9371
  "model_type": "deberta-v2",
@@ -9383,7 +9384,7 @@
9383
  "relative_attention": true,
9384
  "tokenizer_class": "BertTokenizer",
9385
  "torch_dtype": "float32",
9386
- "transformers_version": "4.40.1",
9387
  "type_vocab_size": 0,
9388
  "vocab_size": 30000
9389
  }
 
9366
  "X|_|root": 4672
9367
  },
9368
  "layer_norm_eps": 1e-07,
9369
+ "legacy": true,
9370
  "max_position_embeddings": 512,
9371
  "max_relative_positions": -1,
9372
  "model_type": "deberta-v2",
 
9384
  "relative_attention": true,
9385
  "tokenizer_class": "BertTokenizer",
9386
  "torch_dtype": "float32",
9387
+ "transformers_version": "4.48.3",
9388
  "type_vocab_size": 0,
9389
  "vocab_size": 30000
9390
  }
maker.py CHANGED
@@ -52,7 +52,7 @@ devDS=UDgoeswithDataset("dev.conllu",tkz)
52
  testDS=UDgoeswithDataset("test.conllu",tkz)
53
  lid=trainDS(devDS,testDS)
54
  cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},ignore_mismatched_sizes=True)
55
- arg=TrainingArguments(num_train_epochs=3,per_device_train_batch_size=8,output_dir="/tmp",overwrite_output_dir=True,save_total_limit=2,evaluation_strategy="epoch",learning_rate=5e-05,warmup_ratio=0.1,save_safetensors=False)
56
  trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True),train_dataset=trainDS,eval_dataset=devDS)
57
  trn.train()
58
  trn.save_model(tgt)
 
52
  testDS=UDgoeswithDataset("test.conllu",tkz)
53
  lid=trainDS(devDS,testDS)
54
  cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},ignore_mismatched_sizes=True)
55
+ arg=TrainingArguments(num_train_epochs=10,per_device_train_batch_size=8,output_dir="/tmp",overwrite_output_dir=True,save_total_limit=2,evaluation_strategy="epoch",learning_rate=5e-05,warmup_ratio=0.1,save_safetensors=False)
56
  trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True),train_dataset=trainDS,eval_dataset=devDS)
57
  trn.train()
58
  trn.save_model(tgt)
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98c0039ae0ac5f0f44b9c11bce933ae5d3c30672146177d4b99c6ec85ebc5e91
3
- size 506684053
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf0b46037eb8048f79e62efe3aa96f219b5b84b938752ba2dc0dbc2108303ca6
3
+ size 506686562
tokenizer_config.json CHANGED
@@ -45,6 +45,7 @@
45
  "cls_token": "[CLS]",
46
  "do_basic_tokenize": true,
47
  "do_lower_case": true,
 
48
  "mask_token": "[MASK]",
49
  "model_max_length": 512,
50
  "never_split": [
 
45
  "cls_token": "[CLS]",
46
  "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
50
  "model_max_length": 512,
51
  "never_split": [