johannhartmann commited on
Commit
57c957b
·
verified ·
1 Parent(s): 534eb29

Upload folder using huggingface_hub

Browse files
added_tokens.json CHANGED
@@ -1,8 +1,3 @@
1
  {
2
- "</s>": 20001,
3
- "<s>": 20000,
4
- "<|end_header_id|>": 20003,
5
- "<|eom_id|>": 20004,
6
- "<|eot_id|>": 20005,
7
- "<|start_header_id|>": 20002
8
  }
 
1
  {
2
+ "</s>": 19999
 
 
 
 
 
3
  }
config.json CHANGED
@@ -29,5 +29,5 @@
29
  "torch_dtype": "bfloat16",
30
  "transformers_version": "4.46.1",
31
  "use_cache": true,
32
- "vocab_size": 20032
33
  }
 
29
  "torch_dtype": "bfloat16",
30
  "transformers_version": "4.46.1",
31
  "use_cache": true,
32
+ "vocab_size": 20000
33
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad51782a40518730fd9970ccf9c9c2709170d6059c4ce5a728cad970c4305296
3
- size 1989418512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf97b27df4f3be4003f6272cf388d438aa4b682874a3194142a836b6ccf4e18
3
+ size 1989271056
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67135bd6c8a4eb112ec85adb854be689560f9a8321c73020ee526919cb6f09cb
3
  size 1996220256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1dccbf834ff6ff6a08c9d69c3e9916ab875d1c33ee505561736279ca5a1c828
3
  size 1996220256
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d171150a5e34f6f9434a7eb112b03f7eb4259ae81478e7e7ee045ee9e1822aad
3
- size 248045512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3879139496cf09782a4ff60ab9dced82a5db1a74a2c09f5b15e8fa56b8d9f4ad
3
+ size 247898056
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 4233650688
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 4233355776
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
special_tokens_map.json CHANGED
@@ -1,19 +1,34 @@
1
  {
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
- "content": "<|begin_of_text|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|eot_id|>",
 
 
 
 
 
 
 
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|eot_id|>",
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
  "bos_token": {
12
+ "content": "<s>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false
17
  },
18
  "eos_token": {
19
+ "content": "<|im_end|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "pad_token": {
26
+ "content": "<pad>",
27
  "lstrip": false,
28
  "normalized": false,
29
  "rstrip": false,
30
  "single_word": false
31
  },
 
32
  "unk_token": {
33
  "content": "<unk>",
34
  "lstrip": false,
tokenizer.json CHANGED
@@ -14,7 +14,7 @@
14
  },
15
  {
16
  "id": 1,
17
- "content": "<|begin_of_text|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
@@ -23,7 +23,7 @@
23
  },
24
  {
25
  "id": 2,
26
- "content": "<|end_of_text|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
@@ -217,7 +217,7 @@
217
  "lstrip": false,
218
  "rstrip": false,
219
  "normalized": false,
220
- "special": false
221
  },
222
  {
223
  "id": 24,
@@ -226,7 +226,7 @@
226
  "lstrip": false,
227
  "rstrip": false,
228
  "normalized": false,
229
- "special": false
230
  },
231
  {
232
  "id": 25,
@@ -344,60 +344,6 @@
344
  "rstrip": false,
345
  "normalized": false,
346
  "special": false
347
- },
348
- {
349
- "id": 20000,
350
- "content": "<s>",
351
- "single_word": false,
352
- "lstrip": false,
353
- "rstrip": false,
354
- "normalized": false,
355
- "special": true
356
- },
357
- {
358
- "id": 20001,
359
- "content": "</s>",
360
- "single_word": false,
361
- "lstrip": false,
362
- "rstrip": false,
363
- "normalized": false,
364
- "special": true
365
- },
366
- {
367
- "id": 20002,
368
- "content": "<|start_header_id|>",
369
- "single_word": false,
370
- "lstrip": false,
371
- "rstrip": false,
372
- "normalized": false,
373
- "special": true
374
- },
375
- {
376
- "id": 20003,
377
- "content": "<|end_header_id|>",
378
- "single_word": false,
379
- "lstrip": false,
380
- "rstrip": false,
381
- "normalized": false,
382
- "special": true
383
- },
384
- {
385
- "id": 20004,
386
- "content": "<|eom_id|>",
387
- "single_word": false,
388
- "lstrip": false,
389
- "rstrip": false,
390
- "normalized": false,
391
- "special": true
392
- },
393
- {
394
- "id": 20005,
395
- "content": "<|eot_id|>",
396
- "single_word": false,
397
- "lstrip": false,
398
- "rstrip": false,
399
- "normalized": false,
400
- "special": true
401
  }
402
  ],
403
  "normalizer": {
@@ -418,7 +364,7 @@
418
  "single": [
419
  {
420
  "SpecialToken": {
421
- "id": "<|begin_of_text|>",
422
  "type_id": 0
423
  }
424
  },
@@ -430,7 +376,7 @@
430
  },
431
  {
432
  "SpecialToken": {
433
- "id": "<|end_of_text|>",
434
  "type_id": 0
435
  }
436
  }
@@ -438,7 +384,7 @@
438
  "pair": [
439
  {
440
  "SpecialToken": {
441
- "id": "<|begin_of_text|>",
442
  "type_id": 0
443
  }
444
  },
@@ -450,13 +396,13 @@
450
  },
451
  {
452
  "SpecialToken": {
453
- "id": "<|end_of_text|>",
454
  "type_id": 0
455
  }
456
  },
457
  {
458
  "SpecialToken": {
459
- "id": "<|begin_of_text|>",
460
  "type_id": 1
461
  }
462
  },
@@ -468,28 +414,28 @@
468
  },
469
  {
470
  "SpecialToken": {
471
- "id": "<|end_of_text|>",
472
  "type_id": 1
473
  }
474
  }
475
  ],
476
  "special_tokens": {
477
- "<|begin_of_text|>": {
478
- "id": "<|begin_of_text|>",
479
  "ids": [
480
- 1
481
  ],
482
  "tokens": [
483
- "<|begin_of_text|>"
484
  ]
485
  },
486
- "<|end_of_text|>": {
487
- "id": "<|end_of_text|>",
488
  "ids": [
489
- 2
490
  ],
491
  "tokens": [
492
- "<|end_of_text|>"
493
  ]
494
  }
495
  }
@@ -523,8 +469,8 @@
523
  "ignore_merges": false,
524
  "vocab": {
525
  "<unk>": 0,
526
- "<|begin_of_text|>": 1,
527
- "<|end_of_text|>": 2,
528
  "<pad>": 3,
529
  "▁▁": 4,
530
  "▁▁▁": 5,
 
14
  },
15
  {
16
  "id": 1,
17
+ "content": "<s>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
23
  },
24
  {
25
  "id": 2,
26
+ "content": "</s>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
217
  "lstrip": false,
218
  "rstrip": false,
219
  "normalized": false,
220
+ "special": true
221
  },
222
  {
223
  "id": 24,
 
226
  "lstrip": false,
227
  "rstrip": false,
228
  "normalized": false,
229
+ "special": true
230
  },
231
  {
232
  "id": 25,
 
344
  "rstrip": false,
345
  "normalized": false,
346
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  }
348
  ],
349
  "normalizer": {
 
364
  "single": [
365
  {
366
  "SpecialToken": {
367
+ "id": "<s>",
368
  "type_id": 0
369
  }
370
  },
 
376
  },
377
  {
378
  "SpecialToken": {
379
+ "id": "</s>",
380
  "type_id": 0
381
  }
382
  }
 
384
  "pair": [
385
  {
386
  "SpecialToken": {
387
+ "id": "<s>",
388
  "type_id": 0
389
  }
390
  },
 
396
  },
397
  {
398
  "SpecialToken": {
399
+ "id": "</s>",
400
  "type_id": 0
401
  }
402
  },
403
  {
404
  "SpecialToken": {
405
+ "id": "<s>",
406
  "type_id": 1
407
  }
408
  },
 
414
  },
415
  {
416
  "SpecialToken": {
417
+ "id": "</s>",
418
  "type_id": 1
419
  }
420
  }
421
  ],
422
  "special_tokens": {
423
+ "</s>": {
424
+ "id": "</s>",
425
  "ids": [
426
+ 2
427
  ],
428
  "tokens": [
429
+ "</s>"
430
  ]
431
  },
432
+ "<s>": {
433
+ "id": "<s>",
434
  "ids": [
435
+ 1
436
  ],
437
  "tokens": [
438
+ "<s>"
439
  ]
440
  }
441
  }
 
469
  "ignore_merges": false,
470
  "vocab": {
471
  "<unk>": 0,
472
+ "<s>": 1,
473
+ "</s>": 2,
474
  "<pad>": 3,
475
  "▁▁": 4,
476
  "▁▁▁": 5,
tokenizer_config.json CHANGED
@@ -12,7 +12,7 @@
12
  "special": true
13
  },
14
  "1": {
15
- "content": "<|begin_of_text|>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
@@ -20,7 +20,7 @@
20
  "special": true
21
  },
22
  "2": {
23
- "content": "<|end_of_text|>",
24
  "lstrip": false,
25
  "normalized": false,
26
  "rstrip": false,
@@ -193,7 +193,7 @@
193
  "normalized": false,
194
  "rstrip": false,
195
  "single_word": false,
196
- "special": false
197
  },
198
  "24": {
199
  "content": "<|im_end|>",
@@ -201,7 +201,7 @@
201
  "normalized": false,
202
  "rstrip": false,
203
  "single_word": false,
204
- "special": false
205
  },
206
  "25": {
207
  "content": "<|system|>",
@@ -306,63 +306,18 @@
306
  "rstrip": false,
307
  "single_word": false,
308
  "special": false
309
- },
310
- "20000": {
311
- "content": "<s>",
312
- "lstrip": false,
313
- "normalized": false,
314
- "rstrip": false,
315
- "single_word": false,
316
- "special": true
317
- },
318
- "20001": {
319
- "content": "</s>",
320
- "lstrip": false,
321
- "normalized": false,
322
- "rstrip": false,
323
- "single_word": false,
324
- "special": true
325
- },
326
- "20002": {
327
- "content": "<|start_header_id|>",
328
- "lstrip": false,
329
- "normalized": false,
330
- "rstrip": false,
331
- "single_word": false,
332
- "special": true
333
- },
334
- "20003": {
335
- "content": "<|end_header_id|>",
336
- "lstrip": false,
337
- "normalized": false,
338
- "rstrip": false,
339
- "single_word": false,
340
- "special": true
341
- },
342
- "20004": {
343
- "content": "<|eom_id|>",
344
- "lstrip": false,
345
- "normalized": false,
346
- "rstrip": false,
347
- "single_word": false,
348
- "special": true
349
- },
350
- "20005": {
351
- "content": "<|eot_id|>",
352
- "lstrip": false,
353
- "normalized": false,
354
- "rstrip": false,
355
- "single_word": false,
356
- "special": true
357
  }
358
  },
359
- "bos_token": "<|begin_of_text|>",
360
- "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
361
- "clean_up_tokenization_spaces": true,
362
- "eos_token": "<|eot_id|>",
 
 
 
363
  "legacy": true,
364
  "model_max_length": 1000000000000000019884624838656,
365
- "pad_token": "<|eot_id|>",
366
  "padding_side": "left",
367
  "sp_model_kwargs": {},
368
  "spaces_between_special_tokens": false,
 
12
  "special": true
13
  },
14
  "1": {
15
+ "content": "<s>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
 
20
  "special": true
21
  },
22
  "2": {
23
+ "content": "</s>",
24
  "lstrip": false,
25
  "normalized": false,
26
  "rstrip": false,
 
193
  "normalized": false,
194
  "rstrip": false,
195
  "single_word": false,
196
+ "special": true
197
  },
198
  "24": {
199
  "content": "<|im_end|>",
 
201
  "normalized": false,
202
  "rstrip": false,
203
  "single_word": false,
204
+ "special": true
205
  },
206
  "25": {
207
  "content": "<|system|>",
 
306
  "rstrip": false,
307
  "single_word": false,
308
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  }
310
  },
311
+ "additional_special_tokens": [
312
+ "<|im_start|>"
313
+ ],
314
+ "bos_token": "<s>",
315
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>\n' }}{% endif %}{% endfor %}",
316
+ "clean_up_tokenization_spaces": false,
317
+ "eos_token": "<|im_end|>",
318
  "legacy": true,
319
  "model_max_length": 1000000000000000019884624838656,
320
+ "pad_token": "<pad>",
321
  "padding_side": "left",
322
  "sp_model_kwargs": {},
323
  "spaces_between_special_tokens": false,