KoichiYasuoka commited on
Commit
dc42f39
·
1 Parent(s): 223b87d

model improved

Browse files
Files changed (2) hide show
  1. maker.py +1 -1
  2. pytorch_model.bin +1 -1
maker.py CHANGED
@@ -99,7 +99,7 @@ from tokenizers import Regex
99
  from copy import deepcopy
100
  otk=AutoTokenizer.from_pretrained(src)
101
  ntk=deepcopy(otk)
102
- ntk.backend_tokenizer.pre_tokenizer=Sequence([Split("[ぁ-ん]","isolated"),otk.backend_tokenizer.pre_tokenizer])
103
  trainDS=UDEmbedsDataset("train.conllu",ntk,otk)
104
  devDS=UDEmbedsDataset("dev.conllu",ntk,otk)
105
  testDS=UDEmbedsDataset("test.conllu",ntk,otk)
 
99
  from copy import deepcopy
100
  otk=AutoTokenizer.from_pretrained(src)
101
  ntk=deepcopy(otk)
102
+ ntk.backend_tokenizer.pre_tokenizer=Sequence([Split(Regex("[ぁ-ん]"),"isolated"),otk.backend_tokenizer.pre_tokenizer])
103
  trainDS=UDEmbedsDataset("train.conllu",ntk,otk)
104
  devDS=UDEmbedsDataset("dev.conllu",ntk,otk)
105
  testDS=UDEmbedsDataset("test.conllu",ntk,otk)
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b74f493a41ee73d8b1dce645987d4daebc5a31f40b8737179b92fa50e1b3a5b
3
  size 530122154
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51561774a9700240973c5c468cfb1a2ac08291e38c5683a74819427ef236904b
3
  size 530122154