feihu.hf
commited on
Commit
·
6d262b5
1
Parent(s):
8022daf
update readme
Browse files- README.md +10 -11
- generation_config.json +1 -1
- tokenizer_config.json +1 -1
README.md
CHANGED
@@ -9,7 +9,7 @@ tags:
|
|
9 |
- chat
|
10 |
---
|
11 |
|
12 |
-
# QwQ-32B
|
13 |
|
14 |
<a href="https://chat.qwenlm.ai/" target="_blank" style="margin: 2px;">
|
15 |
<img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
|
@@ -32,11 +32,11 @@ QwQ is the reasoning model of the Qwen series. Compared with conventional instru
|
|
32 |
|
33 |
**Note:** For the best experience, please review the [usage guidelines](#usage-guidelines) before deploying QwQ models.
|
34 |
|
35 |
-
For more details, please refer to our [blog](https://qwenlm.github.io/blog/
|
36 |
|
37 |
## Requirements
|
38 |
|
39 |
-
|
40 |
|
41 |
With `transformers<4.37.0`, you will encounter the following error:
|
42 |
```
|
@@ -92,9 +92,8 @@ To achieve optimal performance, we recommend the following settings:
|
|
92 |
1. **Enforce Thoughtful Output**: Ensure the model starts with "\<think\>\n" to prevent generating empty thinking content, which can degrade output quality. If you use `apply_chat_template` and set `add_generation_prompt=True`, this is already automatically implemented, but it may cause the response to lack the \<think\> tag at the beginning. This is normal behavior.
|
93 |
|
94 |
2. **Sampling Parameters**:
|
95 |
-
- Use Temperature=0.6 and TopP=0.95 instead of Greedy decoding to avoid endless repetitions
|
96 |
-
-
|
97 |
-
- For other types of questions, use TopK=20.
|
98 |
|
99 |
3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.
|
100 |
- **Math Problems**: Include "Please reason step by step, and put your final answer within \boxed{}." in the prompt.
|
@@ -120,7 +119,7 @@ We advise adding the `rope_scaling` configuration only when processing long cont
|
|
120 |
|
121 |
## Evaluation & Performance
|
122 |
|
123 |
-
Detailed evaluation results are reported in this [📑 blog](https://qwenlm.github.io/blog/
|
124 |
|
125 |
For requirements on GPU memory and the respective throughput, see results [here](https://qwen.readthedocs.io/en/latest/benchmark/speed_benchmark.html).
|
126 |
|
@@ -129,12 +128,12 @@ For requirements on GPU memory and the respective throughput, see results [here]
|
|
129 |
If you find our work helpful, feel free to give us a cite.
|
130 |
|
131 |
```
|
132 |
-
@misc{
|
133 |
title = {Qwen2.5: A Party of Foundation Models},
|
134 |
-
url = {https://qwenlm.github.io/blog/
|
135 |
author = {Qwen Team},
|
136 |
-
month = {
|
137 |
-
year = {
|
138 |
}
|
139 |
|
140 |
@article{qwen2,
|
|
|
9 |
- chat
|
10 |
---
|
11 |
|
12 |
+
# QwQ-32B-AWQ
|
13 |
|
14 |
<a href="https://chat.qwenlm.ai/" target="_blank" style="margin: 2px;">
|
15 |
<img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
|
|
|
32 |
|
33 |
**Note:** For the best experience, please review the [usage guidelines](#usage-guidelines) before deploying QwQ models.
|
34 |
|
35 |
+
For more details, please refer to our [blog](https://qwenlm.github.io/blog/qwq-32b/), [GitHub](https://github.com/QwenLM/Qwen2.5), and [Documentation](https://qwen.readthedocs.io/en/latest/).
|
36 |
|
37 |
## Requirements
|
38 |
|
39 |
+
QwQ is based on Qwen2.5, whose code has been in the latest Hugging face `transformers`. We advise you to use the latest version of `transformers`.
|
40 |
|
41 |
With `transformers<4.37.0`, you will encounter the following error:
|
42 |
```
|
|
|
92 |
1. **Enforce Thoughtful Output**: Ensure the model starts with "\<think\>\n" to prevent generating empty thinking content, which can degrade output quality. If you use `apply_chat_template` and set `add_generation_prompt=True`, this is already automatically implemented, but it may cause the response to lack the \<think\> tag at the beginning. This is normal behavior.
|
93 |
|
94 |
2. **Sampling Parameters**:
|
95 |
+
- Use Temperature=0.6 and TopP=0.95 instead of Greedy decoding to avoid endless repetitions.
|
96 |
+
- Use TopK between 20 and 40 to filter out rare token occurrences while maintaining the diversity of the generated output.
|
|
|
97 |
|
98 |
3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.
|
99 |
- **Math Problems**: Include "Please reason step by step, and put your final answer within \boxed{}." in the prompt.
|
|
|
119 |
|
120 |
## Evaluation & Performance
|
121 |
|
122 |
+
Detailed evaluation results are reported in this [📑 blog](https://qwenlm.github.io/blog/qwq-32b/).
|
123 |
|
124 |
For requirements on GPU memory and the respective throughput, see results [here](https://qwen.readthedocs.io/en/latest/benchmark/speed_benchmark.html).
|
125 |
|
|
|
128 |
If you find our work helpful, feel free to give us a cite.
|
129 |
|
130 |
```
|
131 |
+
@misc{qwq32b,
|
132 |
title = {Qwen2.5: A Party of Foundation Models},
|
133 |
+
url = {https://qwenlm.github.io/blog/qwq-32b/},
|
134 |
author = {Qwen Team},
|
135 |
+
month = {March},
|
136 |
+
year = {2025}
|
137 |
}
|
138 |
|
139 |
@article{qwen2,
|
generation_config.json
CHANGED
@@ -8,7 +8,7 @@
|
|
8 |
"pad_token_id": 151643,
|
9 |
"repetition_penalty": 1.0,
|
10 |
"temperature": 0.6,
|
11 |
-
"top_k":
|
12 |
"top_p": 0.95,
|
13 |
"transformers_version": "4.45.2"
|
14 |
}
|
|
|
8 |
"pad_token_id": 151643,
|
9 |
"repetition_penalty": 1.0,
|
10 |
"temperature": 0.6,
|
11 |
+
"top_k": 40,
|
12 |
"top_p": 0.95,
|
13 |
"transformers_version": "4.45.2"
|
14 |
}
|
tokenizer_config.json
CHANGED
@@ -227,7 +227,7 @@
|
|
227 |
"<|video_pad|>"
|
228 |
],
|
229 |
"bos_token": null,
|
230 |
-
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- '' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" and not message.tool_calls %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
231 |
"clean_up_tokenization_spaces": false,
|
232 |
"eos_token": "<|im_end|>",
|
233 |
"errors": "replace",
|
|
|
227 |
"<|video_pad|>"
|
228 |
],
|
229 |
"bos_token": null,
|
230 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- '' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" and not message.tool_calls %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n<think>\\n' }}\n{%- endif %}\n",
|
231 |
"clean_up_tokenization_spaces": false,
|
232 |
"eos_token": "<|im_end|>",
|
233 |
"errors": "replace",
|