manaestras commited on
Commit
77717f9
·
verified ·
1 Parent(s): 662d299

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +4 -2
tokenizer_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "architectures": [
3
  "GPT2LMHeadModel"
4
  ],
5
- "model_max_length": 1048576,
6
  "tokenizer_class": "HYTokenizer",
7
  "auto_map": {
8
  "AutoTokenizer": [
@@ -10,8 +10,10 @@
10
  null
11
  ]
12
  },
 
 
13
  "model_type": "gpt2",
14
  "additional_special_tokens": ["<|startoftext|>", "<|extra_0|>", "<|extra_4|>", "<|extra_5|>", "<|eos|>"],
15
  "pad_token": "<|pad|>",
16
- "chat_template": "{% set context = {'has_head': true} %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = message['content'] %}{% if loop.index0 == 0 %}{% if content == '' %}{% set _ = context.update({'has_head': false}) %}{% elif message['role'] == 'system' %}{% set content = '<|startoftext|>' + content + '<|extra_4|>' %}{% endif %}{% endif %}{% if message['role'] == 'user' %}{% if loop.index0 == 1 and not context.has_head %}{% set content = '<|startoftext|>' + content %}{% endif %}{% if loop.index0 == 1 and context.has_head %}{% set content = content + '<|extra_0|>' %}{% else %}{% set content = '<|startoftext|>' + content + '<|extra_0|>' %}{% endif %}{% elif message['role'] == 'assistant' %}{% set content = content + '<|eos|>' %}{% endif %}{{ content }}{% endfor %}"
17
  }
 
2
  "architectures": [
3
  "GPT2LMHeadModel"
4
  ],
5
+ "model_max_length": 262144,
6
  "tokenizer_class": "HYTokenizer",
7
  "auto_map": {
8
  "AutoTokenizer": [
 
10
  null
11
  ]
12
  },
13
+ "eos_token": "<|eos|>",
14
+ "bos_token": "<|startoftext|>",
15
  "model_type": "gpt2",
16
  "additional_special_tokens": ["<|startoftext|>", "<|extra_0|>", "<|extra_4|>", "<|extra_5|>", "<|eos|>"],
17
  "pad_token": "<|pad|>",
18
+ "chat_template": "{%- if not add_generation_prompt is defined %}\n {%- set add_generation_prompt = false %}\n{%- endif %}\n{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_first_user=true, is_last_user=false) %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n {%- if ns.is_first_sp %}\n {%- set ns.system_prompt = ns.system_prompt + message['content'] %}\n {%- set ns.is_first_sp = false %}\n {%- else %}\n {%- set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{{- bos_token }}\n{{- ns.system_prompt }}\n{%- if tools %}\n {%- if ns.system_prompt != '' %}\n {{- '\n\n# Tools\n\nYou may call one or more functions to assist with the user query.' }}\n {%- else %}\n {{- '# Tools\n\nYou may call one or more functions to assist with the user query.' }}\n {%- endif %}\n {{- '\n\nYou are provided with function signatures within <tools></tools> XML tags:' }}\n {{- '\n<tools>\n' }}\n {%- for tool in tools %}\n {%- if loop.index0 > 0 %}\n {{- '\n' }}\n {%- endif %}\n {{- tool | tojson }}\n {%- endfor %}\n {{- '\n</tools>\n\n' }}\n {{- 'For function call returns, you should first print <tool_calls>' }}\n {{- 'For each function call, you should return object like:\n' }}\n {{- '<tool_call>function_name\n```json\nfunction_arguments_in_json_format\n```</tool_call>' }}\n {{- 'At the end of function call returns, you should print </tool_calls>' }}\n{%- endif %}\n{%- if ns.system_prompt != '' or tools %}\n {{- '<|extra_4|>' }}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'user' %}\n {%- set ns.is_tool = false %}\n {%- set ns.is_first = false %}\n {%- set ns.is_last_user = true %}\n {%- if ns.is_first_user %}\n {{- message['content'] + '<|extra_0|>' }}\n {%- set ns.is_first_user = false %}\n {%- else %}\n {{- bos_token + message['content'] + '<|extra_0|>' }}\n {%- endif %}\n {%- endif %}\n {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}\n {%- set ns.is_last_user = false %}\n {%- if ns.is_tool %}\n {{- '</tool_responses>' + '<|extra_0|>' }}\n {%- endif %}\n {%- set ns.is_first = false %}\n {%- set ns.is_tool = false %}\n {%- set ns.is_output_first = true %}\n {%- for tool in message['tool_calls'] %}\n {%- set arguments = tool['function']['arguments'] %}\n {%- if arguments is not string %}\n {%- set arguments = arguments | tojson %}\n {%- endif %}\n {%- if not ns.is_first %}\n {%- if message['content'] is none %}\n {{- '<tool_calls><tool_call>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '</tool_call>' }}\n {%- else %}\n {{- message['content'] + '<tool_calls><tool_call>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '</tool_call>' }}\n {%- endif %}\n {%- set ns.is_first = true %}\n {%- else %}\n {{- '\n' + '<tool_call>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '</tool_call>' }}\n {%- endif %}\n {%- endfor %}\n {{- '</tool_calls>' + eos_token }}\n {%- endif %}\n {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}\n {%- set content = message['content'] %}\n {%- if '<answer>' in content and not loop.last %}\n {%- set content = content.split('<answer>')[-1].strip('</answer>').strip() %}\n {%- endif %}\n {%- set ns.is_last_user = false %}\n {%- if ns.is_tool %}\n {{- '</tool_responses>' + '<|extra_0|>' + content + eos_token }}\n {%- set ns.is_tool = false %}\n {%- else %}\n {{- content + eos_token }}\n {%- endif %}\n {%- endif %}\n {%- if message['role'] == 'tool' %}\n {%- set ns.is_last_user = false %}\n {%- set ns.is_tool = true %}\n {%- if ns.is_output_first %}\n {{- bos_token + '<tool_responses><tool_response>' + message['content'] + '</tool_response>' }}\n {%- set ns.is_output_first = false %}\n {%- else %}\n {{- '\n<tool_response>' + message['content'] + '</tool_response>' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if ns.is_tool %}\n {{- '</tool_responses>' + '<|extra_0|>' }}\n{%- endif %}\n{%- if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}\n {{- '<|extra_0|>' }}\n{%- endif %}\n{%- if enable_thinking is defined and not enable_thinking %}\n {{- '<think>\n\n</think>\n' }}\n{%- endif %}"
19
  }