fix(tokenizer): Sets fim and think tokens to non-special, and removes unk_token.
Browse files- tokenizer.json +5 -14
tokenizer.json
CHANGED
@@ -3,15 +3,6 @@
|
|
3 |
"truncation": null,
|
4 |
"padding": null,
|
5 |
"added_tokens": [
|
6 |
-
{
|
7 |
-
"id": 5809,
|
8 |
-
"content": "�",
|
9 |
-
"single_word": false,
|
10 |
-
"lstrip": false,
|
11 |
-
"rstrip": false,
|
12 |
-
"normalized": false,
|
13 |
-
"special": true
|
14 |
-
},
|
15 |
{
|
16 |
"id": 100256,
|
17 |
"content": "<|dummy_0|>",
|
@@ -37,7 +28,7 @@
|
|
37 |
"lstrip": true,
|
38 |
"rstrip": true,
|
39 |
"normalized": false,
|
40 |
-
"special":
|
41 |
},
|
42 |
{
|
43 |
"id": 100259,
|
@@ -46,7 +37,7 @@
|
|
46 |
"lstrip": true,
|
47 |
"rstrip": true,
|
48 |
"normalized": false,
|
49 |
-
"special":
|
50 |
},
|
51 |
{
|
52 |
"id": 100260,
|
@@ -55,7 +46,7 @@
|
|
55 |
"lstrip": true,
|
56 |
"rstrip": true,
|
57 |
"normalized": false,
|
58 |
-
"special":
|
59 |
},
|
60 |
{
|
61 |
"id": 100261,
|
@@ -865,7 +856,7 @@
|
|
865 |
"lstrip": true,
|
866 |
"rstrip": true,
|
867 |
"normalized": false,
|
868 |
-
"special":
|
869 |
},
|
870 |
{
|
871 |
"id": 100351,
|
@@ -874,7 +865,7 @@
|
|
874 |
"lstrip": true,
|
875 |
"rstrip": true,
|
876 |
"normalized": false,
|
877 |
-
"special":
|
878 |
}
|
879 |
],
|
880 |
"normalizer": null,
|
|
|
3 |
"truncation": null,
|
4 |
"padding": null,
|
5 |
"added_tokens": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
{
|
7 |
"id": 100256,
|
8 |
"content": "<|dummy_0|>",
|
|
|
28 |
"lstrip": true,
|
29 |
"rstrip": true,
|
30 |
"normalized": false,
|
31 |
+
"special": false
|
32 |
},
|
33 |
{
|
34 |
"id": 100259,
|
|
|
37 |
"lstrip": true,
|
38 |
"rstrip": true,
|
39 |
"normalized": false,
|
40 |
+
"special": false
|
41 |
},
|
42 |
{
|
43 |
"id": 100260,
|
|
|
46 |
"lstrip": true,
|
47 |
"rstrip": true,
|
48 |
"normalized": false,
|
49 |
+
"special": false
|
50 |
},
|
51 |
{
|
52 |
"id": 100261,
|
|
|
856 |
"lstrip": true,
|
857 |
"rstrip": true,
|
858 |
"normalized": false,
|
859 |
+
"special": false
|
860 |
},
|
861 |
{
|
862 |
"id": 100351,
|
|
|
865 |
"lstrip": true,
|
866 |
"rstrip": true,
|
867 |
"normalized": false,
|
868 |
+
"special": false
|
869 |
}
|
870 |
],
|
871 |
"normalizer": null,
|