CISCai commited on
Commit
af72f89
·
verified ·
1 Parent(s): d22cc38

add some initial vocab files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ RWKV/ggml-vocab-rwkv-7-world.gguf filter=lfs diff=lfs merge=lfs -text
37
+ SPM/ggml-vocab-gemma-3.gguf filter=lfs diff=lfs merge=lfs -text
38
+ WPM/ggml-vocab-jina-v2-en.gguf filter=lfs diff=lfs merge=lfs -text
RWKV/ggml-vocab-rwkv-7-world.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e46104c6e917c7c954b3348f865b992d56e012e8799c57d69968e435239b7d86
3
+ size 1365451
RWKV/ggml-vocab-rwkv-7-world.gguf.inp ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ied 4 ½ months
2
+ __ggml_vocab_test__
3
+ Äpfel
4
+ __ggml_vocab_test__
5
+
6
+ __ggml_vocab_test__
7
+
8
+ __ggml_vocab_test__
9
+
10
+ __ggml_vocab_test__
11
+
12
+ __ggml_vocab_test__
13
+
14
+ __ggml_vocab_test__
15
+
16
+
17
+ __ggml_vocab_test__
18
+
19
+
20
+
21
+ __ggml_vocab_test__
22
+
23
+
24
+
25
+
26
+ __ggml_vocab_test__
27
+
28
+
29
+ __ggml_vocab_test__
30
+ Hello world
31
+ __ggml_vocab_test__
32
+ Hello world
33
+ __ggml_vocab_test__
34
+ Hello World
35
+ __ggml_vocab_test__
36
+ Hello World
37
+ __ggml_vocab_test__
38
+ Hello World!
39
+ __ggml_vocab_test__
40
+ Hello, world!
41
+ __ggml_vocab_test__
42
+ Hello, world!
43
+ __ggml_vocab_test__
44
+ this is 🦙.cpp
45
+ __ggml_vocab_test__
46
+ w048 7tuijk dsdfhu
47
+ __ggml_vocab_test__
48
+ нещо на Български
49
+ __ggml_vocab_test__
50
+ កាន់តែពិសេសអាចខលចេញ
51
+ __ggml_vocab_test__
52
+ 🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
53
+ __ggml_vocab_test__
54
+ Hello
55
+ __ggml_vocab_test__
56
+ Hello
57
+ __ggml_vocab_test__
58
+ Hello
59
+ __ggml_vocab_test__
60
+ Hello
61
+ __ggml_vocab_test__
62
+ Hello
63
+ __ggml_vocab_test__
64
+ Hello
65
+ Hello
66
+ __ggml_vocab_test__
67
+ (
68
+ __ggml_vocab_test__
69
+
70
+ =
71
+ __ggml_vocab_test__
72
+ ' era
73
+ __ggml_vocab_test__
74
+ Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
75
+ __ggml_vocab_test__
76
+ !!!!!!
77
+ __ggml_vocab_test__
78
+ 3
79
+ __ggml_vocab_test__
80
+ 33
81
+ __ggml_vocab_test__
82
+ 333
83
+ __ggml_vocab_test__
84
+ 3333
85
+ __ggml_vocab_test__
86
+ 33333
87
+ __ggml_vocab_test__
88
+ 333333
89
+ __ggml_vocab_test__
90
+ 3333333
91
+ __ggml_vocab_test__
92
+ 33333333
93
+ __ggml_vocab_test__
94
+ 333333333
95
+ __ggml_vocab_test__
96
+ Cửa Việt
97
+ __ggml_vocab_test__
98
+ discards
99
+ __ggml_vocab_test__
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+ 🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
112
+ __ggml_vocab_test__
RWKV/ggml-vocab-rwkv-7-world.gguf.out ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 7779 287 4935 46596
2
+ 2466 2116 1851
3
+
4
+ 33
5
+ 267
6
+ 3340
7
+ 10
8
+ 11
9
+ 65530
10
+ 65530 11
11
+ 258
12
+ 33155 40213
13
+ 36786 40213
14
+ 33155 37576
15
+ 36786 37576
16
+ 36786 37576 34
17
+ 33155 45 40213 34
18
+ 36786 45 40213 34
19
+ 32234 4600 33 3319 167 154 47 7282
20
+ 120 624 57 290 2220 7819 4495 1819 1933
21
+ 28007 28136 32739 5063 61263 43081
22
+ 9704 9708 9705 3000 140 2999 144 3000 131 2999 151 2999 184 9707 3000 130 9707 2999 163 9708 2999 134 2999 130 2999 156 2999 134 3000 130 2999 138
23
+ 3319 155 129 275 41894 42 33 3319 153 183 9810 3319 141 172 19103 275 54156 38362 116 64299 42 33 10056 275 26476 38362 32227 21800 21902 22251 39974 42
24
+ 33155
25
+ 36786
26
+ 267 33155
27
+ 3340 33155
28
+ 19250 33155
29
+ 19250 33155 28352 33155
30
+ 275
31
+ 262 62
32
+ 40 21626
33
+ 33155 45 356 40 6984 34 20063 21286 22799 33 3319 153 130 298 12605 12509 11454 33839 12137 10460 633 634 635 50 11639 19219
34
+ 23248 362
35
+ 52
36
+ 653
37
+ 653 52
38
+ 653 653
39
+ 653 653 52
40
+ 653 653 653
41
+ 653 653 653 52
42
+ 653 653 653 653
43
+ 653 653 653 653 52
44
+ 68 28299 45034
45
+ 51480 116
46
+ 262 65530 33 65530 262 259 3322 258 36215 54806 3339 3319 155 129 275 41894 42 33 3319 153 183 9810 3319 141 172 19103 275 54156 38362 116 64299 42 33 10056 33 3319 167 154 3319 167 154 286 3506 3506 52 3506 653 3506 653 52 3506 653 653 3506 653 653 52 3506 653 653 653 286 47 52 286 585 52 286 5362 52 33 9704 9708 9705 3000 140 2999 144 3000 131 2999 151 2999 184 9707 3000 130 9707 2999 163 9708 2999 134 3319 153 130 298 12605 12509 11454 33839 12137 10460 633 634 635 50 11639 19219 43915 48344 32740 28136 32739 5063 61263 43081 19279 5205 6884 6884 97 23252 40269 23248 362 23349 807 308 5231 30259 274 8826 101 4569 460 39934 45 274 1397 22799 32157 64 274 78 22187 32157 308 5229 31358 4601 45 274 69 22799 31296 32074 22575 64 4331 40 1560 332 40 109 77
SPM/ggml-vocab-gemma-3.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68d854494379ec3133f08de356ae37c7ad2c8a0cdcd23ded05e90eeee2c79924
3
+ size 6512800
SPM/ggml-vocab-gemma-3.gguf.inp ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ied 4 ½ months
2
+ __ggml_vocab_test__
3
+ Äpfel
4
+ __ggml_vocab_test__
5
+
6
+ __ggml_vocab_test__
7
+
8
+ __ggml_vocab_test__
9
+
10
+ __ggml_vocab_test__
11
+
12
+ __ggml_vocab_test__
13
+
14
+ __ggml_vocab_test__
15
+
16
+
17
+ __ggml_vocab_test__
18
+
19
+
20
+
21
+ __ggml_vocab_test__
22
+
23
+
24
+
25
+
26
+ __ggml_vocab_test__
27
+
28
+
29
+ __ggml_vocab_test__
30
+ Hello world
31
+ __ggml_vocab_test__
32
+ Hello world
33
+ __ggml_vocab_test__
34
+ Hello World
35
+ __ggml_vocab_test__
36
+ Hello World
37
+ __ggml_vocab_test__
38
+ Hello World!
39
+ __ggml_vocab_test__
40
+ Hello, world!
41
+ __ggml_vocab_test__
42
+ Hello, world!
43
+ __ggml_vocab_test__
44
+ this is 🦙.cpp
45
+ __ggml_vocab_test__
46
+ w048 7tuijk dsdfhu
47
+ __ggml_vocab_test__
48
+ нещо на Български
49
+ __ggml_vocab_test__
50
+ កាន់តែពិសេសអាចខលចេញ
51
+ __ggml_vocab_test__
52
+ 🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
53
+ __ggml_vocab_test__
54
+ Hello
55
+ __ggml_vocab_test__
56
+ Hello
57
+ __ggml_vocab_test__
58
+ Hello
59
+ __ggml_vocab_test__
60
+ Hello
61
+ __ggml_vocab_test__
62
+ Hello
63
+ __ggml_vocab_test__
64
+ Hello
65
+ Hello
66
+ __ggml_vocab_test__
67
+ (
68
+ __ggml_vocab_test__
69
+
70
+ =
71
+ __ggml_vocab_test__
72
+ ' era
73
+ __ggml_vocab_test__
74
+ Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
75
+ __ggml_vocab_test__
76
+ !!!!!!
77
+ __ggml_vocab_test__
78
+ 3
79
+ __ggml_vocab_test__
80
+ 33
81
+ __ggml_vocab_test__
82
+ 333
83
+ __ggml_vocab_test__
84
+ 3333
85
+ __ggml_vocab_test__
86
+ 33333
87
+ __ggml_vocab_test__
88
+ 333333
89
+ __ggml_vocab_test__
90
+ 3333333
91
+ __ggml_vocab_test__
92
+ 33333333
93
+ __ggml_vocab_test__
94
+ 333333333
95
+ __ggml_vocab_test__
96
+ Cửa Việt
97
+ __ggml_vocab_test__
98
+ discards
99
+ __ggml_vocab_test__
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+ 🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
112
+ __ggml_vocab_test__
SPM/ggml-vocab-gemma-3.gguf.out ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1178 236743 236812 47041 3794
2
+ 239122 22744 535
3
+
4
+ 236743
5
+ 138
6
+ 139
7
+ 255968
8
+ 107
9
+ 108
10
+ 109
11
+ 255968 107
12
+ 9259 1902
13
+ 26352 1902
14
+ 9259 4109
15
+ 26352 4109
16
+ 26352 4109 236888
17
+ 9259 236764 1902 236888
18
+ 26352 236764 1902 236888
19
+ 672 563 236743 478 397 404 391 236761 12362
20
+ 236765 236771 236812 236828 236743 236832 11372 12065 31806 3405 9360
21
+ 1337 12515 1333 4632 165543 3830
22
+ 234889 63031 219876 66212 239077 237907 144494
23
+ 242015 568 7382 236768 236743 247717 237243 248989 238178 568 43819 111730 150567 236768 113452 568 8960 64334 600 815 1061 1852 8369 236768
24
+ 9259
25
+ 26352
26
+ 138 9259
27
+ 139 9259
28
+ 140 9259
29
+ 140 9259 107 140 9259
30
+ 568
31
+ 107 578
32
+ 236789 6933
33
+ 9259 236764 570 236789 712 236888 2088 659 611 170124 2360 62133 237075 17641 11700 236770 236800 236770 236812 236770 236810 236770 237471 238352
34
+ 123947
35
+ 236800
36
+ 236800 236800
37
+ 236800 236800 236800
38
+ 236800 236800 236800 236800
39
+ 236800 236800 236800 236800 236800
40
+ 236800 236800 236800 236800 236800 236800
41
+ 236800 236800 236800 236800 236800 236800 236800
42
+ 236800 236800 236800 236800 236800 236800 236800 236800
43
+ 236800 236800 236800 236800 236800 236800 236800 236800 236800
44
+ 236780 29719 33154
45
+ 2243 2206
46
+ 107 236743 108 236743 109 236743 255968 236743 255969 236743 255968 107 138 107 139 107 140 107 141 107 242015 568 7382 236768 236743 247717 237243 248989 238178 568 43819 111730 150567 236768 113452 236743 478 397 404 391 478 397 404 391 236743 236800 236743 236800 236800 236743 236800 236800 236800 236743 236800 236800 236800 236800 236743 236800 236800 236800 236800 236800 236743 236800 236800 236800 236800 236800 236800 236743 236800 236800 236800 236800 236800 236800 236800 236743 236800 236800 236800 236800 236800 236800 236800 236800 236743 236800 236761 236800 236743 236800 856 236800 236743 236800 1390 236800 90986 92814 63031 219876 66212 241702 2360 62133 237075 17641 11700 236770 236800 236770 236812 236770 236810 236770 237471 238352 80448 120697 210119 1333 4632 165543 3830 9451 159561 2629 2629 2717 84491 19938 123947 38950 10371 564 236789 560 1010 756 151812 668 236789 236751 993 236764 756 1357 611 2889 236881 756 236792 711 2889 564 236789 859 1386 625 236764 756 236796 611 1133 1070 11115 236881 1191 236789 32541 496 236789 95635
WPM/ggml-vocab-jina-v2-en.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5124151a3c25b256a56d1bf112dbe3ca878ee99b1649ae270bbbbc52b1bd5f98
3
+ size 627966
WPM/ggml-vocab-jina-v2-en.gguf.inp ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ied 4 ½ months
2
+ __ggml_vocab_test__
3
+ Äpfel
4
+ __ggml_vocab_test__
5
+
6
+ __ggml_vocab_test__
7
+
8
+ __ggml_vocab_test__
9
+
10
+ __ggml_vocab_test__
11
+
12
+ __ggml_vocab_test__
13
+
14
+ __ggml_vocab_test__
15
+
16
+
17
+ __ggml_vocab_test__
18
+
19
+
20
+
21
+ __ggml_vocab_test__
22
+
23
+
24
+
25
+
26
+ __ggml_vocab_test__
27
+
28
+
29
+ __ggml_vocab_test__
30
+ Hello world
31
+ __ggml_vocab_test__
32
+ Hello world
33
+ __ggml_vocab_test__
34
+ Hello World
35
+ __ggml_vocab_test__
36
+ Hello World
37
+ __ggml_vocab_test__
38
+ Hello World!
39
+ __ggml_vocab_test__
40
+ Hello, world!
41
+ __ggml_vocab_test__
42
+ Hello, world!
43
+ __ggml_vocab_test__
44
+ this is 🦙.cpp
45
+ __ggml_vocab_test__
46
+ w048 7tuijk dsdfhu
47
+ __ggml_vocab_test__
48
+ нещо на Български
49
+ __ggml_vocab_test__
50
+ កាន់តែពិសេសអាចខលចេញ
51
+ __ggml_vocab_test__
52
+ 🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
53
+ __ggml_vocab_test__
54
+ Hello
55
+ __ggml_vocab_test__
56
+ Hello
57
+ __ggml_vocab_test__
58
+ Hello
59
+ __ggml_vocab_test__
60
+ Hello
61
+ __ggml_vocab_test__
62
+ Hello
63
+ __ggml_vocab_test__
64
+ Hello
65
+ Hello
66
+ __ggml_vocab_test__
67
+ (
68
+ __ggml_vocab_test__
69
+
70
+ =
71
+ __ggml_vocab_test__
72
+ ' era
73
+ __ggml_vocab_test__
74
+ Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
75
+ __ggml_vocab_test__
76
+ !!!!!!
77
+ __ggml_vocab_test__
78
+ 3
79
+ __ggml_vocab_test__
80
+ 33
81
+ __ggml_vocab_test__
82
+ 333
83
+ __ggml_vocab_test__
84
+ 3333
85
+ __ggml_vocab_test__
86
+ 33333
87
+ __ggml_vocab_test__
88
+ 333333
89
+ __ggml_vocab_test__
90
+ 3333333
91
+ __ggml_vocab_test__
92
+ 33333333
93
+ __ggml_vocab_test__
94
+ 333333333
95
+ __ggml_vocab_test__
96
+ Cửa Việt
97
+ __ggml_vocab_test__
98
+ discards
99
+ __ggml_vocab_test__
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+ 🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
112
+ __ggml_vocab_test__
WPM/ggml-vocab-jina-v2-en.gguf.out ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 29464 2094 1018 1092 2706
2
+ 9706 7959 2140
3
+
4
+
5
+
6
+
7
+
8
+
9
+
10
+
11
+
12
+ 7592 2088
13
+ 7592 2088
14
+ 7592 2088
15
+ 7592 2088
16
+ 7592 2088 999
17
+ 7592 1010 2088 999
18
+ 7592 1010 2088 999
19
+ 2023 2003 100 1012 18133 2361
20
+ 1059 2692 18139 1021 8525 28418 2243 16233 20952 6979
21
+ 1192 15290 29754 14150 1192 10260 1181 29755 29436 29741 10260 16856 29747 23925 10325
22
+ 100
23
+ 100 1006 3671 1007 100 1006 3674 7861 29147 2483 9530 16280 23854 1007 100 1006 2069 7861 29147 2072 2008 2038 2049 2219 19204 1007
24
+ 7592
25
+ 7592
26
+ 7592
27
+ 7592
28
+ 7592
29
+ 7592 7592
30
+ 1006
31
+ 1027
32
+ 1005 3690
33
+ 7592 1010 1061 1005 2035 999 2129 2024 2017 100 1029 1855 100 100 6207 100 100 14677 23632 22203 1811 1995
34
+ 999 999 999 999 999 999
35
+ 1017
36
+ 3943
37
+ 21211
38
+ 21211 2509
39
+ 21211 22394
40
+ 21211 22394 2509
41
+ 21211 22394 22394
42
+ 21211 22394 22394 2509
43
+ 21211 22394 22394 22394
44
+ 12731 2050 19710
45
+ 5860 18117
46
+ 100 1006 3671 1007 100 1006 3674 7861 29147 2483 9530 16280 23854 1007 100 100 1017 3943 21211 21211 2509 21211 22394 21211 22394 2509 21211 22394 22394 21211 22394 22394 2509 1017 1012 1017 1017 1012 1012 1017 1017 1012 1012 1012 1017 100 1029 1855 100 100 6207 100 100 14677 23632 22203 1811 1995 1011 1011 1011 1011 1011 1011 1027 1027 1027 1027 1027 1027 1027 1192 15290 29754 14150 1192 10260 1181 29755 29436 29741 10260 16856 29747 23925 10325 1005 1005 1005 1005 1005 1005 1036 1036 1036 1036 1036 1036 1036 1000 1000 1000 1000 1012 1012 1012 1012 1012 1012 999 999 999 999 999 999 1029 1029 1029 1029 1029 1029 1045 1005 2310 2042 1005 2409 2002 1005 1055 2045 1010 1005 2128 2017 2469 1029 1005 1049 2025 2469 1045 1005 2222 2191 2009 1010 1005 1040 2017 2066 2070 5572 1029 2057 1005 2310 1037 1005 2222