codebyzeb commited on
Commit
a5cf887
·
verified ·
1 Parent(s): bf2cdc2

Training in progress, step 50

Browse files
Files changed (5) hide show
  1. config.json +1 -1
  2. model.safetensors +2 -2
  3. tokenizer.json +186 -31
  4. training_args.bin +1 -1
  5. vocab.json +1 -1
config.json CHANGED
@@ -27,5 +27,5 @@
27
  "torch_dtype": "float32",
28
  "transformers_version": "4.48.1",
29
  "use_cache": true,
30
- "vocab_size": 39
31
  }
 
27
  "torch_dtype": "float32",
28
  "transformers_version": "4.48.1",
29
  "use_cache": true,
30
+ "vocab_size": 54
31
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8be81500bcad2109a2fd035be669aa8747d61cb916cd1fb73bf942d6a8166962
3
- size 3415728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:746a8c8ffd22f16480438eebc8301a8321e29b83e3d93a1f9cce810b3c0f781a
3
+ size 3432768
tokenizer.json CHANGED
@@ -43,6 +43,146 @@
43
  "normalizer": {
44
  "type": "Sequence",
45
  "normalizers": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  {
47
  "type": "Strip",
48
  "strip_left": true,
@@ -51,7 +191,7 @@
51
  ]
52
  },
53
  "pre_tokenizer": {
54
- "type": "Whitespace"
55
  },
56
  "post_processor": {
57
  "type": "TemplateProcessing",
@@ -117,39 +257,54 @@
117
  "UTT_BOUNDARY": 3,
118
  "m": 4,
119
  "a": 5,
120
- "ɑ̃": 6,
121
  "d": 7,
122
- "ɔ": 8,
123
  "n": 9,
124
  "b": 10,
125
- "ʁ": 11,
126
- "ə": 12,
127
- "ɡ": 13,
128
- "ʒ": 14,
129
- "i": 15,
130
- "v": 16,
131
- "t": 17,
132
- "k": 18,
133
- "o": 19,
134
- "ɛ̃": 20,
135
- "w": 21,
136
- "y": 22,
137
- "j": 23,
138
- "e": 24,
139
- "ɔ̃": 25,
140
- "p": 26,
141
- "ɛ": 27,
142
- "f": 28,
143
- "s": 29,
144
- "z": 30,
145
- "l": 31,
146
- "u": 32,
147
- "ʃ": 33,
148
- "œ": 34,
149
- "ø": 35,
150
- "ɲ": 36,
151
- "t̠ʃ": 37,
152
- "d̠ʒ": 38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  },
154
  "unk_token": "UNK"
155
  }
 
43
  "normalizer": {
44
  "type": "Sequence",
45
  "normalizers": [
46
+ {
47
+ "type": "Replace",
48
+ "pattern": {
49
+ "String": "e-"
50
+ },
51
+ "content": "ˈe"
52
+ },
53
+ {
54
+ "type": "Replace",
55
+ "pattern": {
56
+ "String": "ə-"
57
+ },
58
+ "content": "ˈə"
59
+ },
60
+ {
61
+ "type": "Replace",
62
+ "pattern": {
63
+ "String": "a-"
64
+ },
65
+ "content": "ˈa"
66
+ },
67
+ {
68
+ "type": "Replace",
69
+ "pattern": {
70
+ "String": "ˌa"
71
+ },
72
+ "content": "ˈa"
73
+ },
74
+ {
75
+ "type": "Replace",
76
+ "pattern": {
77
+ "String": "ˌɛ"
78
+ },
79
+ "content": "ˈɛ"
80
+ },
81
+ {
82
+ "type": "Replace",
83
+ "pattern": {
84
+ "String": "ˌi"
85
+ },
86
+ "content": "ˈi"
87
+ },
88
+ {
89
+ "type": "Replace",
90
+ "pattern": {
91
+ "String": "ˌɔ"
92
+ },
93
+ "content": "ˈɔ"
94
+ },
95
+ {
96
+ "type": "Replace",
97
+ "pattern": {
98
+ "String": "ˌɑ̃"
99
+ },
100
+ "content": "ˈɑ̃"
101
+ },
102
+ {
103
+ "type": "Replace",
104
+ "pattern": {
105
+ "String": "ˈə-"
106
+ },
107
+ "content": "ˈə"
108
+ },
109
+ {
110
+ "type": "Replace",
111
+ "pattern": {
112
+ "String": "y-"
113
+ },
114
+ "content": "ˈy"
115
+ },
116
+ {
117
+ "type": "Replace",
118
+ "pattern": {
119
+ "String": "ˈe-"
120
+ },
121
+ "content": "ˈe"
122
+ },
123
+ {
124
+ "type": "Replace",
125
+ "pattern": {
126
+ "String": "ˈa-"
127
+ },
128
+ "content": "ˈa"
129
+ },
130
+ {
131
+ "type": "Replace",
132
+ "pattern": {
133
+ "String": "ˌɛ̃"
134
+ },
135
+ "content": "ˈɛ̃"
136
+ },
137
+ {
138
+ "type": "Replace",
139
+ "pattern": {
140
+ "String": "ˌu"
141
+ },
142
+ "content": "ˈu"
143
+ },
144
+ {
145
+ "type": "Replace",
146
+ "pattern": {
147
+ "String": "ˌœ"
148
+ },
149
+ "content": "ˈœ"
150
+ },
151
+ {
152
+ "type": "Replace",
153
+ "pattern": {
154
+ "String": "ˌø"
155
+ },
156
+ "content": "ˈø"
157
+ },
158
+ {
159
+ "type": "Replace",
160
+ "pattern": {
161
+ "String": "ˌe"
162
+ },
163
+ "content": "ˈe"
164
+ },
165
+ {
166
+ "type": "Replace",
167
+ "pattern": {
168
+ "String": "ˌə"
169
+ },
170
+ "content": "ˈə"
171
+ },
172
+ {
173
+ "type": "Replace",
174
+ "pattern": {
175
+ "String": "ˌy"
176
+ },
177
+ "content": "ˈy"
178
+ },
179
+ {
180
+ "type": "Replace",
181
+ "pattern": {
182
+ "String": "ˌo"
183
+ },
184
+ "content": "ˈo"
185
+ },
186
  {
187
  "type": "Strip",
188
  "strip_left": true,
 
191
  ]
192
  },
193
  "pre_tokenizer": {
194
+ "type": "WhitespaceSplit"
195
  },
196
  "post_processor": {
197
  "type": "TemplateProcessing",
 
257
  "UTT_BOUNDARY": 3,
258
  "m": 4,
259
  "a": 5,
260
+ "ˈɑ̃": 6,
261
  "d": 7,
262
+ "ˈɔ": 8,
263
  "n": 9,
264
  "b": 10,
265
+ "ˈa": 11,
266
+ "ʁ": 12,
267
+ "ə": 13,
268
+ "ɡ": 14,
269
+ "ɔ": 15,
270
+ "ʒ": 16,
271
+ "ˈi": 17,
272
+ "v": 18,
273
+ "t": 19,
274
+ "k": 20,
275
+ "ˈo": 21,
276
+ "ˈɛ̃": 22,
277
+ "w": 23,
278
+ "y": 24,
279
+ "j": 25,
280
+ "ˈy": 26,
281
+ "e": 27,
282
+ "ɔ̃": 28,
283
+ "ˈe": 29,
284
+ "p": 30,
285
+ "ɛ": 31,
286
+ "f": 32,
287
+ "s": 33,
288
+ "z": 34,
289
+ "l": 35,
290
+ "ˈə": 36,
291
+ "ˈɛ": 37,
292
+ "u": 38,
293
+ "o": 39,
294
+ "ʃ": 40,
295
+ "i": 41,
296
+ "ˈu": 42,
297
+ "ɛ̃": 43,
298
+ "œ": 44,
299
+ "ˈø": 45,
300
+ "ɑ̃": 46,
301
+ "ˈœ": 47,
302
+ "ˈɔ̃": 48,
303
+ "ø": 49,
304
+ "ɲ": 50,
305
+ "ˈw": 51,
306
+ "t̠ʃ": 52,
307
+ "d̠ʒ": 53
308
  },
309
  "unk_token": "UNK"
310
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f638c7b21758ebe8a00a85fa86d18709257799ae97cf8318e82593db8582985
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ddf9aba19f8626fff58dda625a730f79b2c840806845062dbd8136667c3774
3
  size 5496
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"m":4,"a":5,"ɑ̃":6,"d":7,"ɔ":8,"n":9,"b":10,"ʁ":11,"ə":12,"ɡ":13,"ʒ":14,"i":15,"v":16,"t":17,"k":18,"o":19,"ɛ̃":20,"w":21,"y":22,"j":23,"e":24,"ɔ̃":25,"p":26,"ɛ":27,"f":28,"s":29,"z":30,"l":31,"u":32,"ʃ":33,"œ":34,"ø":35,"ɲ":36,"t̠ʃ":37,"d̠ʒ":38}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"m":4,"a":5,"ˈɑ̃":6,"d":7,"ˈɔ":8,"n":9,"b":10,"ˈa":11,"ʁ":12,"ə":13,"ɡ":14,"ɔ":15,"ʒ":16,"ˈi":17,"v":18,"t":19,"k":20,"ˈo":21,"ˈɛ̃":22,"w":23,"y":24,"j":25,"ˈy":26,"e":27,"ɔ̃":28,"ˈe":29,"p":30,"ɛ":31,"f":32,"s":33,"z":34,"l":35,"ˈə":36,"ˈɛ":37,"u":38,"o":39,"ʃ":40,"i":41,"ˈu":42,"ɛ̃":43,"œ":44,"ˈø":45,"ɑ̃":46,"ˈœ":47,"ˈɔ̃":48,"ø":49,"ɲ":50,"ˈw":51,"t̠ʃ":52,"d̠ʒ":53}