alignmentforever
/

april_aligner

Model card Files Files and versions Community

alignmentforever commited on 22 days ago

Commit

0ad4cbc

verified ·

1 Parent(s): 67352c6

upload model folder to repo

Browse files

Files changed (11) hide show

README.md +93 -0
deploy/aligner_inference_demo.py +170 -0
deploy/deploy_aligner.sh +16 -0
slice_end/added_tokens.json +24 -0
slice_end/config.json +30 -0
slice_end/merges.txt +0 -0
slice_end/pytorch_model.bin +3 -0
slice_end/special_tokens_map.json +31 -0
slice_end/tokenizer.json +0 -0
slice_end/tokenizer_config.json +209 -0
slice_end/vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,93 @@

+# Aligner 模型部署指南
+[[Aligner Github]](https://github.com/PKU-Alignment/aligner)
+[[Aligner Website]](https://pku-aligner.github.io/)
+## 前提条件
+- CUDA环境
+- vLLM 安装完成
+- 至少8张GPU (0-7)
+- 足够的GPU内存用于加载模型
+## 配置说明
+在运行部署脚本前，需要配置以下环境变量：
+1. `BASE_MODEL_PATH` - 基础模型路径
+2. `ALIGNER_MODEL_PATH` - Aligner模型路径
+3. `BASE_PORT` - 基础模型服务端口(默认8011)
+4. `ALIGNER_PORT` - Aligner模型服务端口(默认8013)
+## 部署步骤
+1. 打开`deploy_aligner.sh`脚本，填写所需的模型路径：
+   ```bash
+   export BASE_MODEL_PATH='您的基础模型路径'
+   export ALIGNER_MODEL_PATH='您的Aligner模型路径'
+   ```
+2. 如需要，可修改默认端口：
+   ```bash
+   export BASE_PORT=8011
+   export ALIGNER_PORT=8013
+   ```
+3. 运行部署脚本：
+   ```bash
+   bash deploy_aligner.sh
+   ```
+## 部署详情
+该脚本会启动两个vLLM服务：
+1. 基础模型服务：
+   - 使用GPU 0-3
+   - 4路张量并行
+   - 监听`0.0.0.0:$BASE_PORT`
+   - 最大序列长度2048
+2. Aligner模型服务：
+   - 使用GPU 4-7
+   - 4路张量并行
+   - 监听`0.0.0.0:$ALIGNER_PORT`
+   - 最大序列长度2048
+两个服务都配置了以下共同参数：
+- API密钥：jiayi # 不重要，仅用于初始化
+- 信任远程代码
+- 自动数据类型
+- 强制使用eager模式
+- 1GB交换空间
+## 验证部署
+脚本运行完成后，可通过以下方式验证服务是否成功启动：
+```bash
+curl -X GET http://localhost:$BASE_PORT/v1/models
+curl -X GET http://localhost:$ALIGNER_PORT/v1/models
+```
+或者使用`netstat`查看端口是否被监听：
+```bash
+netstat -tuln | grep $BASE_PORT
+netstat -tuln | grep $ALIGNER_PORT
+```
+## 运行推理
+更改`aligner_inference_demo.py`中的模型路径，需要与`deploy_aligner.sh`中的模型路径保持一致
+```
+aligner_model = ""
+base_model = ""
+```
+运行 `aligner_inference_demo.py` 启动Gradio-based的部署脚本
+```
+python aligner_inference_demo.py
+```

deploy/aligner_inference_demo.py ADDED Viewed

	@@ -0,0 +1,170 @@

+# Copyright 2024 PKU-Alignment Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""访问文本模型的命令行界面"""
+import argparse
+import os
+from openai import OpenAI
+import gradio as gr
+import random
+random.seed(42)
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+# 系统提示词，可以根据需要修改
+SYSTEM_PROMPT = "你是一个有帮助的AI助手，能够回答用户的问题并提供帮助。"
+# 连接设置
+openai_api_key = "jiayi"  # 不重要，仅用于初始化客户端
+aligner_port = 8013
+base_port = 8011
+aligner_api_base = f"http://0.0.0.0:{aligner_port}/v1"
+base_api_base = f"http://0.0.0.0:{base_port}/v1"
+# openai_api_base = "http://0.0.0.0:8009/v1"  # 请修改为实际的模型API端口
+# NOTE please modify the model path
+aligner_model = ""
+base_model = ""
+aligner_client = OpenAI(
+    api_key = openai_api_key,
+    base_url = aligner_api_base,
+)
+base_client = OpenAI(
+    api_key = openai_api_key,
+    base_url = base_api_base,
+)
+# 示例问题
+# TEXT_EXAMPLES = [
+#     {"text": "介绍一下北京大学的历史"},
+#     {"text": "解释一下什么是深度学习"},
+#     {"text": "写一首关于春天的诗"},
+# ]
+TEXT_EXAMPLES = [
+    "介绍一下北京大学的历史",
+    "解释一下什么是深度学习",
+    "写一首关于春天的诗",
+]
+# # 初始化OpenAI客户端
+# client = OpenAI(
+#     api_key=openai_api_key,
+#     base_url=openai_api_base,
+# )
+def text_conversation(text: str, role: str = 'user'):
+    """创建单条文本消息"""
+    return [{'role': role, 'content': text}]
+def question_answering(message: str, history: list):
+    """处理文本问答（流式输出）"""
+    conversation = text_conversation(SYSTEM_PROMPT, 'system')
+    # 处理历史对话记录
+    for past_user_msg, past_bot_msg in history:
+        if past_user_msg:
+            conversation.extend(text_conversation(past_user_msg, 'user'))
+        if past_bot_msg:
+            conversation.extend(text_conversation(past_bot_msg, 'assistant'))
+    # 添加当前问题
+    current_question = message
+    conversation.extend(text_conversation(current_question))
+    # 调用模型API（启用流式输出）
+    stream = base_client.chat.completions.create(
+        model=base_model,
+        stream=True,
+        messages=conversation,
+    )
+    # 流式输出处理
+    total_answer = ""
+    base_section = "🌟 **原始回答:**\n"
+    total_answer += base_section
+    # NOTE 额外用一个base_answer 作为aligner的输入，其他的可以用total_answer 做总的输出
+    base_answer = ""
+    yield total_answer
+    for chunk in stream:
+        if chunk.choices[0].delta.content is not None:
+            base_answer += chunk.choices[0].delta.content
+            total_answer += chunk.choices[0].delta.content
+            yield f"```bash\n{base_section}{base_answer}\n```"
+    # 结束原始回答部分，开始aligner部分
+    aligner_section = "\n**Aligner 修正中...**\n\n🌟 **修正后回答:**\n"
+    # 创建新的total_answer，不再包含在bash格式中
+    total_answer = f"```bash\n{base_section}{base_answer}\n```{aligner_section}"
+    yield total_answer
+    aligner_conversation = text_conversation(SYSTEM_PROMPT,'system')
+    aligner_current_question = f'##Question: {current_question}\n##Answer: {base_answer}\n##Correction: '
+    aligner_conversation.extend(text_conversation(aligner_current_question))
+    aligner_stream = aligner_client.chat.completions.create(
+        model=aligner_model,
+        stream=True,
+        messages=aligner_conversation,
+    )
+    aligner_answer = ""
+    for chunk in aligner_stream:
+        if chunk.choices[0].delta.content is not None:
+            aligner_answer += chunk.choices[0].delta.content
+            aligner_answer = aligner_answer.replace('##CORRECTION:', '')
+            yield f"```bash\n{base_section}{base_answer}\n```{aligner_section}{aligner_answer}"
+    # print('answer:', answer)
+    # print('current question:', current_question)
+    # # 可选：格式化回答（在流式输出完成后处理）
+    # if "**Final Answer**" in answer:
+    #     reasoning_content, final_answer = answer.split("**Final Answer**", 1)
+    #     if len(reasoning_content) > 5:
+    #         answer = f"""🤔 思考过程：\n```bash{reasoning_content}\n```\n✨ ��终答案：\n{final_answer}"""
+    #         yield answer
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--port", type=int, default=7860, help="Gradio服务端口")
+    parser.add_argument("--share", default='True',action="store_true", help="是否创建公共链接")
+    parser.add_argument("--api-only", default='False',action="store_true", help="只输出Python API调用示例")
+    args = parser.parse_args()
+    # if args.api_only:
+    #     print("Python API调用示例输出:")
+    #     print(python_api_example())
+    # else:
+    # 创建Gradio界面（启用流式输出）
+    iface = gr.ChatInterface(
+        fn=question_answering,
+        title='Aligner',
+        description='网络安全 Aligner',
+        examples=TEXT_EXAMPLES,
+        theme=gr.themes.Soft(
+            text_size='lg',
+            spacing_size='lg',
+            radius_size='lg',
+        ),
+    )
+    iface.launch(server_port=args.port, share=args.share)

deploy/deploy_aligner.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+export BASE_MODEL_PATH='' # Base model path
+export ALIGNER_MODEL_PATH='' # Aligner model path
+export BASE_PORT=8011 # Base port
+export ALIGNER_PORT=8013 # Aligner port
+echo $BASE_MODEL_PATH
+echo $ALIGNER_MODEL_PATH
+echo $BASE_PORT
+echo $ALIGNER_PORT
+CUDA_VISIBLE_DEVICES=0,1,2,3 vllm serve $BASE_MODEL_PATH --host 0.0.0.0 --port $BASE_PORT --max-model-len 2048  --tensor-parallel-size 4 --api-key jiayi --trust-remote-code  --dtype auto --enforce-eager --swap-space 1 &
+CUDA_VISIBLE_DEVICES=4,5,6,7 vllm serve $ALIGNER_MODEL_PATH --host 0.0.0.0 --port $ALIGNER_PORT --max-model-len 2048  --tensor-parallel-size 4 --api-key jiayi --trust-remote-code  --dtype auto --enforce-eager --swap-space 1
+# vllm serve /aifs4su/hansirui/yaodong/models/DeepSeek-R1 --host 0.0.0.0 --port 8009 --max-model-len 12800  --tensor-parallel-size 16 --api-key jiayi --trust-remote-code  --dtype auto --enforce-eager --enable-reasoning --reasoning-parser deepseek_r1 --swap-space 1
+echo 'Base Port:' $BASE_PORT
+echo 'Aligner Port:' $ALIGNER_PORT
+# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 vllm serve /aifs4su/yaodong/spring_r1_model/QVQ-72B-Preview --enable-reasoning --reasoning-parser deepseek_r1 --host 0.0.0.0 --port 8009 --max-model-len 12000  --tensor-parallel-size 8 --api-key jiayi

slice_end/added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

slice_end/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_attn_implementation_autoset": true,
+  "_name_or_path": "/home/yangyaodong/cac_aligner/models/Qwen/Qwen2.5-7B-Instruct",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen2",
+  "num_attention_heads": 28,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 4,
+  "pad_token_id": 151643,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": 131072,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 152064
+}

slice_end/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

slice_end/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e55dca3fe79af7f00cb6f59c9223e4920e48b98a41facc31c0927d6b41b32a4
+size 15231345338

slice_end/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

slice_end/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

slice_end/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,209 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 4096,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

slice_end/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff