Upload folder using huggingface_hub
Browse files- added_tokens.json +1 -6
- config.json +1 -1
- model-00001-of-00003.safetensors +2 -2
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +2 -2
- model.safetensors.index.json +1 -1
- special_tokens_map.json +18 -3
- tokenizer.json +20 -74
- tokenizer_config.json +12 -57
added_tokens.json
CHANGED
@@ -1,8 +1,3 @@
|
|
1 |
{
|
2 |
-
"</s>":
|
3 |
-
"<s>": 20000,
|
4 |
-
"<|end_header_id|>": 20003,
|
5 |
-
"<|eom_id|>": 20004,
|
6 |
-
"<|eot_id|>": 20005,
|
7 |
-
"<|start_header_id|>": 20002
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"</s>": 19999
|
|
|
|
|
|
|
|
|
|
|
3 |
}
|
config.json
CHANGED
@@ -29,5 +29,5 @@
|
|
29 |
"torch_dtype": "bfloat16",
|
30 |
"transformers_version": "4.46.1",
|
31 |
"use_cache": true,
|
32 |
-
"vocab_size":
|
33 |
}
|
|
|
29 |
"torch_dtype": "bfloat16",
|
30 |
"transformers_version": "4.46.1",
|
31 |
"use_cache": true,
|
32 |
+
"vocab_size": 20000
|
33 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdf97b27df4f3be4003f6272cf388d438aa4b682874a3194142a836b6ccf4e18
|
3 |
+
size 1989271056
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1996220256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1dccbf834ff6ff6a08c9d69c3e9916ab875d1c33ee505561736279ca5a1c828
|
3 |
size 1996220256
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3879139496cf09782a4ff60ab9dced82a5db1a74a2c09f5b15e8fa56b8d9f4ad
|
3 |
+
size 247898056
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00003-of-00003.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 4233355776
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00003-of-00003.safetensors",
|
special_tokens_map.json
CHANGED
@@ -1,19 +1,34 @@
|
|
1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"bos_token": {
|
3 |
-
"content": "
|
4 |
"lstrip": false,
|
5 |
"normalized": false,
|
6 |
"rstrip": false,
|
7 |
"single_word": false
|
8 |
},
|
9 |
"eos_token": {
|
10 |
-
"content": "<|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
"lstrip": false,
|
12 |
"normalized": false,
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
},
|
16 |
-
"pad_token": "<|eot_id|>",
|
17 |
"unk_token": {
|
18 |
"content": "<unk>",
|
19 |
"lstrip": false,
|
|
|
1 |
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
{
|
4 |
+
"content": "<|im_start|>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false
|
9 |
+
}
|
10 |
+
],
|
11 |
"bos_token": {
|
12 |
+
"content": "<s>",
|
13 |
"lstrip": false,
|
14 |
"normalized": false,
|
15 |
"rstrip": false,
|
16 |
"single_word": false
|
17 |
},
|
18 |
"eos_token": {
|
19 |
+
"content": "<|im_end|>",
|
20 |
+
"lstrip": false,
|
21 |
+
"normalized": false,
|
22 |
+
"rstrip": false,
|
23 |
+
"single_word": false
|
24 |
+
},
|
25 |
+
"pad_token": {
|
26 |
+
"content": "<pad>",
|
27 |
"lstrip": false,
|
28 |
"normalized": false,
|
29 |
"rstrip": false,
|
30 |
"single_word": false
|
31 |
},
|
|
|
32 |
"unk_token": {
|
33 |
"content": "<unk>",
|
34 |
"lstrip": false,
|
tokenizer.json
CHANGED
@@ -14,7 +14,7 @@
|
|
14 |
},
|
15 |
{
|
16 |
"id": 1,
|
17 |
-
"content": "
|
18 |
"single_word": false,
|
19 |
"lstrip": false,
|
20 |
"rstrip": false,
|
@@ -23,7 +23,7 @@
|
|
23 |
},
|
24 |
{
|
25 |
"id": 2,
|
26 |
-
"content": "
|
27 |
"single_word": false,
|
28 |
"lstrip": false,
|
29 |
"rstrip": false,
|
@@ -217,7 +217,7 @@
|
|
217 |
"lstrip": false,
|
218 |
"rstrip": false,
|
219 |
"normalized": false,
|
220 |
-
"special":
|
221 |
},
|
222 |
{
|
223 |
"id": 24,
|
@@ -226,7 +226,7 @@
|
|
226 |
"lstrip": false,
|
227 |
"rstrip": false,
|
228 |
"normalized": false,
|
229 |
-
"special":
|
230 |
},
|
231 |
{
|
232 |
"id": 25,
|
@@ -344,60 +344,6 @@
|
|
344 |
"rstrip": false,
|
345 |
"normalized": false,
|
346 |
"special": false
|
347 |
-
},
|
348 |
-
{
|
349 |
-
"id": 20000,
|
350 |
-
"content": "<s>",
|
351 |
-
"single_word": false,
|
352 |
-
"lstrip": false,
|
353 |
-
"rstrip": false,
|
354 |
-
"normalized": false,
|
355 |
-
"special": true
|
356 |
-
},
|
357 |
-
{
|
358 |
-
"id": 20001,
|
359 |
-
"content": "</s>",
|
360 |
-
"single_word": false,
|
361 |
-
"lstrip": false,
|
362 |
-
"rstrip": false,
|
363 |
-
"normalized": false,
|
364 |
-
"special": true
|
365 |
-
},
|
366 |
-
{
|
367 |
-
"id": 20002,
|
368 |
-
"content": "<|start_header_id|>",
|
369 |
-
"single_word": false,
|
370 |
-
"lstrip": false,
|
371 |
-
"rstrip": false,
|
372 |
-
"normalized": false,
|
373 |
-
"special": true
|
374 |
-
},
|
375 |
-
{
|
376 |
-
"id": 20003,
|
377 |
-
"content": "<|end_header_id|>",
|
378 |
-
"single_word": false,
|
379 |
-
"lstrip": false,
|
380 |
-
"rstrip": false,
|
381 |
-
"normalized": false,
|
382 |
-
"special": true
|
383 |
-
},
|
384 |
-
{
|
385 |
-
"id": 20004,
|
386 |
-
"content": "<|eom_id|>",
|
387 |
-
"single_word": false,
|
388 |
-
"lstrip": false,
|
389 |
-
"rstrip": false,
|
390 |
-
"normalized": false,
|
391 |
-
"special": true
|
392 |
-
},
|
393 |
-
{
|
394 |
-
"id": 20005,
|
395 |
-
"content": "<|eot_id|>",
|
396 |
-
"single_word": false,
|
397 |
-
"lstrip": false,
|
398 |
-
"rstrip": false,
|
399 |
-
"normalized": false,
|
400 |
-
"special": true
|
401 |
}
|
402 |
],
|
403 |
"normalizer": {
|
@@ -418,7 +364,7 @@
|
|
418 |
"single": [
|
419 |
{
|
420 |
"SpecialToken": {
|
421 |
-
"id": "
|
422 |
"type_id": 0
|
423 |
}
|
424 |
},
|
@@ -430,7 +376,7 @@
|
|
430 |
},
|
431 |
{
|
432 |
"SpecialToken": {
|
433 |
-
"id": "
|
434 |
"type_id": 0
|
435 |
}
|
436 |
}
|
@@ -438,7 +384,7 @@
|
|
438 |
"pair": [
|
439 |
{
|
440 |
"SpecialToken": {
|
441 |
-
"id": "
|
442 |
"type_id": 0
|
443 |
}
|
444 |
},
|
@@ -450,13 +396,13 @@
|
|
450 |
},
|
451 |
{
|
452 |
"SpecialToken": {
|
453 |
-
"id": "
|
454 |
"type_id": 0
|
455 |
}
|
456 |
},
|
457 |
{
|
458 |
"SpecialToken": {
|
459 |
-
"id": "
|
460 |
"type_id": 1
|
461 |
}
|
462 |
},
|
@@ -468,28 +414,28 @@
|
|
468 |
},
|
469 |
{
|
470 |
"SpecialToken": {
|
471 |
-
"id": "
|
472 |
"type_id": 1
|
473 |
}
|
474 |
}
|
475 |
],
|
476 |
"special_tokens": {
|
477 |
-
"
|
478 |
-
"id": "
|
479 |
"ids": [
|
480 |
-
|
481 |
],
|
482 |
"tokens": [
|
483 |
-
"
|
484 |
]
|
485 |
},
|
486 |
-
"
|
487 |
-
"id": "
|
488 |
"ids": [
|
489 |
-
|
490 |
],
|
491 |
"tokens": [
|
492 |
-
"
|
493 |
]
|
494 |
}
|
495 |
}
|
@@ -523,8 +469,8 @@
|
|
523 |
"ignore_merges": false,
|
524 |
"vocab": {
|
525 |
"<unk>": 0,
|
526 |
-
"
|
527 |
-
"
|
528 |
"<pad>": 3,
|
529 |
"▁▁": 4,
|
530 |
"▁▁▁": 5,
|
|
|
14 |
},
|
15 |
{
|
16 |
"id": 1,
|
17 |
+
"content": "<s>",
|
18 |
"single_word": false,
|
19 |
"lstrip": false,
|
20 |
"rstrip": false,
|
|
|
23 |
},
|
24 |
{
|
25 |
"id": 2,
|
26 |
+
"content": "</s>",
|
27 |
"single_word": false,
|
28 |
"lstrip": false,
|
29 |
"rstrip": false,
|
|
|
217 |
"lstrip": false,
|
218 |
"rstrip": false,
|
219 |
"normalized": false,
|
220 |
+
"special": true
|
221 |
},
|
222 |
{
|
223 |
"id": 24,
|
|
|
226 |
"lstrip": false,
|
227 |
"rstrip": false,
|
228 |
"normalized": false,
|
229 |
+
"special": true
|
230 |
},
|
231 |
{
|
232 |
"id": 25,
|
|
|
344 |
"rstrip": false,
|
345 |
"normalized": false,
|
346 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
}
|
348 |
],
|
349 |
"normalizer": {
|
|
|
364 |
"single": [
|
365 |
{
|
366 |
"SpecialToken": {
|
367 |
+
"id": "<s>",
|
368 |
"type_id": 0
|
369 |
}
|
370 |
},
|
|
|
376 |
},
|
377 |
{
|
378 |
"SpecialToken": {
|
379 |
+
"id": "</s>",
|
380 |
"type_id": 0
|
381 |
}
|
382 |
}
|
|
|
384 |
"pair": [
|
385 |
{
|
386 |
"SpecialToken": {
|
387 |
+
"id": "<s>",
|
388 |
"type_id": 0
|
389 |
}
|
390 |
},
|
|
|
396 |
},
|
397 |
{
|
398 |
"SpecialToken": {
|
399 |
+
"id": "</s>",
|
400 |
"type_id": 0
|
401 |
}
|
402 |
},
|
403 |
{
|
404 |
"SpecialToken": {
|
405 |
+
"id": "<s>",
|
406 |
"type_id": 1
|
407 |
}
|
408 |
},
|
|
|
414 |
},
|
415 |
{
|
416 |
"SpecialToken": {
|
417 |
+
"id": "</s>",
|
418 |
"type_id": 1
|
419 |
}
|
420 |
}
|
421 |
],
|
422 |
"special_tokens": {
|
423 |
+
"</s>": {
|
424 |
+
"id": "</s>",
|
425 |
"ids": [
|
426 |
+
2
|
427 |
],
|
428 |
"tokens": [
|
429 |
+
"</s>"
|
430 |
]
|
431 |
},
|
432 |
+
"<s>": {
|
433 |
+
"id": "<s>",
|
434 |
"ids": [
|
435 |
+
1
|
436 |
],
|
437 |
"tokens": [
|
438 |
+
"<s>"
|
439 |
]
|
440 |
}
|
441 |
}
|
|
|
469 |
"ignore_merges": false,
|
470 |
"vocab": {
|
471 |
"<unk>": 0,
|
472 |
+
"<s>": 1,
|
473 |
+
"</s>": 2,
|
474 |
"<pad>": 3,
|
475 |
"▁▁": 4,
|
476 |
"▁▁▁": 5,
|
tokenizer_config.json
CHANGED
@@ -12,7 +12,7 @@
|
|
12 |
"special": true
|
13 |
},
|
14 |
"1": {
|
15 |
-
"content": "
|
16 |
"lstrip": false,
|
17 |
"normalized": false,
|
18 |
"rstrip": false,
|
@@ -20,7 +20,7 @@
|
|
20 |
"special": true
|
21 |
},
|
22 |
"2": {
|
23 |
-
"content": "
|
24 |
"lstrip": false,
|
25 |
"normalized": false,
|
26 |
"rstrip": false,
|
@@ -193,7 +193,7 @@
|
|
193 |
"normalized": false,
|
194 |
"rstrip": false,
|
195 |
"single_word": false,
|
196 |
-
"special":
|
197 |
},
|
198 |
"24": {
|
199 |
"content": "<|im_end|>",
|
@@ -201,7 +201,7 @@
|
|
201 |
"normalized": false,
|
202 |
"rstrip": false,
|
203 |
"single_word": false,
|
204 |
-
"special":
|
205 |
},
|
206 |
"25": {
|
207 |
"content": "<|system|>",
|
@@ -306,63 +306,18 @@
|
|
306 |
"rstrip": false,
|
307 |
"single_word": false,
|
308 |
"special": false
|
309 |
-
},
|
310 |
-
"20000": {
|
311 |
-
"content": "<s>",
|
312 |
-
"lstrip": false,
|
313 |
-
"normalized": false,
|
314 |
-
"rstrip": false,
|
315 |
-
"single_word": false,
|
316 |
-
"special": true
|
317 |
-
},
|
318 |
-
"20001": {
|
319 |
-
"content": "</s>",
|
320 |
-
"lstrip": false,
|
321 |
-
"normalized": false,
|
322 |
-
"rstrip": false,
|
323 |
-
"single_word": false,
|
324 |
-
"special": true
|
325 |
-
},
|
326 |
-
"20002": {
|
327 |
-
"content": "<|start_header_id|>",
|
328 |
-
"lstrip": false,
|
329 |
-
"normalized": false,
|
330 |
-
"rstrip": false,
|
331 |
-
"single_word": false,
|
332 |
-
"special": true
|
333 |
-
},
|
334 |
-
"20003": {
|
335 |
-
"content": "<|end_header_id|>",
|
336 |
-
"lstrip": false,
|
337 |
-
"normalized": false,
|
338 |
-
"rstrip": false,
|
339 |
-
"single_word": false,
|
340 |
-
"special": true
|
341 |
-
},
|
342 |
-
"20004": {
|
343 |
-
"content": "<|eom_id|>",
|
344 |
-
"lstrip": false,
|
345 |
-
"normalized": false,
|
346 |
-
"rstrip": false,
|
347 |
-
"single_word": false,
|
348 |
-
"special": true
|
349 |
-
},
|
350 |
-
"20005": {
|
351 |
-
"content": "<|eot_id|>",
|
352 |
-
"lstrip": false,
|
353 |
-
"normalized": false,
|
354 |
-
"rstrip": false,
|
355 |
-
"single_word": false,
|
356 |
-
"special": true
|
357 |
}
|
358 |
},
|
359 |
-
"
|
360 |
-
|
361 |
-
|
362 |
-
"
|
|
|
|
|
|
|
363 |
"legacy": true,
|
364 |
"model_max_length": 1000000000000000019884624838656,
|
365 |
-
"pad_token": "
|
366 |
"padding_side": "left",
|
367 |
"sp_model_kwargs": {},
|
368 |
"spaces_between_special_tokens": false,
|
|
|
12 |
"special": true
|
13 |
},
|
14 |
"1": {
|
15 |
+
"content": "<s>",
|
16 |
"lstrip": false,
|
17 |
"normalized": false,
|
18 |
"rstrip": false,
|
|
|
20 |
"special": true
|
21 |
},
|
22 |
"2": {
|
23 |
+
"content": "</s>",
|
24 |
"lstrip": false,
|
25 |
"normalized": false,
|
26 |
"rstrip": false,
|
|
|
193 |
"normalized": false,
|
194 |
"rstrip": false,
|
195 |
"single_word": false,
|
196 |
+
"special": true
|
197 |
},
|
198 |
"24": {
|
199 |
"content": "<|im_end|>",
|
|
|
201 |
"normalized": false,
|
202 |
"rstrip": false,
|
203 |
"single_word": false,
|
204 |
+
"special": true
|
205 |
},
|
206 |
"25": {
|
207 |
"content": "<|system|>",
|
|
|
306 |
"rstrip": false,
|
307 |
"single_word": false,
|
308 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
}
|
310 |
},
|
311 |
+
"additional_special_tokens": [
|
312 |
+
"<|im_start|>"
|
313 |
+
],
|
314 |
+
"bos_token": "<s>",
|
315 |
+
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>\n' }}{% endif %}{% endfor %}",
|
316 |
+
"clean_up_tokenization_spaces": false,
|
317 |
+
"eos_token": "<|im_end|>",
|
318 |
"legacy": true,
|
319 |
"model_max_length": 1000000000000000019884624838656,
|
320 |
+
"pad_token": "<pad>",
|
321 |
"padding_side": "left",
|
322 |
"sp_model_kwargs": {},
|
323 |
"spaces_between_special_tokens": false,
|