openbmb
/

MiniCPM3-4B

@@ -4,7 +4,6 @@ import keyword
 import traceback
 import uuid
 from collections import deque
-from copy import deepcopy
 from logging import getLogger
 from typing import Any, Dict, List, Optional, Union
@@ -17,6 +16,7 @@ from jsonschema import Draft202012Validator, exceptions, validate
 from transformers import LlamaTokenizerFast
 from transformers.tokenization_utils_base import BatchEncoding
 from transformers.utils import TensorType
 logger = getLogger(__name__)
@@ -148,7 +148,7 @@ class MiniCPMTokenizer(LlamaTokenizerFast):
                         tool_calls.append(this_one)
                 return {
-                    "content": content.strip(),
                     "tool_calls": [
                         {"type": "function", "function": tool_call, "id": "call_" + uuid.uuid4().hex}
                         for tool_call in tool_calls
@@ -158,13 +158,13 @@ class MiniCPMTokenizer(LlamaTokenizerFast):
             except:
                 logger.error(traceback.format_exc())
                 return {
-                    "content": content.strip(),
                     "role": "assistant",
                     "thought": thought_string,
                 }
         else:
             return {
-                "content": sequence.strip(),
                 "role": "assistant",
                 "thought": thought_string,
             }
@@ -259,10 +259,11 @@ def message_format(msg, system_suffix="", user_prefix=""):
             content = thought_prefix + content
             msg["content"] = content
     elif msg["role"] == "user":
-        msg["content"] = user_prefix + "\n" + msg["content"]
     elif msg["role"] == "system":
         msg["content"] = msg["content"] + "\n" + system_suffix
-    msg["content"] = msg["content"].strip()
     return msg
@@ -361,12 +362,12 @@ func2(params)
 <|tool_call_end|>
 {{answer the user's question directly or ask the user for more information}}
 """
-        tools_string = tools_template.format(tools=tools_string).strip()
     else:
         tools_string = ""
     if add_to_system:
-        if len(messages) > 0 and messages[0]["role"] != "system" and tools_string != "":
             messages.insert(0, {"role": "system", "content": ""})
         return [message_format(msg, system_suffix=tools_string, user_prefix="") for msg in messages]
     else:
@@ -429,5 +430,4 @@ def resolve_ast_by_type(value):
             output = ast.unparse(value.value) + "[" + ast.unparse(value.slice) + "]"
     else:
         raise Exception(f"Unsupported AST type: {type(value)}")
-    return output

 import traceback
 import uuid
 from collections import deque
 from logging import getLogger
 from typing import Any, Dict, List, Optional, Union
 from transformers import LlamaTokenizerFast
 from transformers.tokenization_utils_base import BatchEncoding
 from transformers.utils import TensorType
+from copy import deepcopy
 logger = getLogger(__name__)
                         tool_calls.append(this_one)
                 return {
+                    "content": content,
                     "tool_calls": [
                         {"type": "function", "function": tool_call, "id": "call_" + uuid.uuid4().hex}
                         for tool_call in tool_calls
             except:
                 logger.error(traceback.format_exc())
                 return {
+                    "content": content,
                     "role": "assistant",
                     "thought": thought_string,
                 }
         else:
             return {
+                "content": sequence,
                 "role": "assistant",
                 "thought": thought_string,
             }
             content = thought_prefix + content
             msg["content"] = content
     elif msg["role"] == "user":
+        if user_prefix != "":
+            msg["content"] = user_prefix + "\n" + msg["content"]
     elif msg["role"] == "system":
         msg["content"] = msg["content"] + "\n" + system_suffix
+    msg["content"] = msg["content"]
     return msg
 <|tool_call_end|>
 {{answer the user's question directly or ask the user for more information}}
 """
+        tools_string = tools_template.format(tools=tools_string)
     else:
         tools_string = ""
     if add_to_system:
+        if len(messages) > 0 and messages[0]["role"] != "system" and len(tools_string.strip()) > 0:
             messages.insert(0, {"role": "system", "content": ""})
         return [message_format(msg, system_suffix=tools_string, user_prefix="") for msg in messages]
     else:
             output = ast.unparse(value.value) + "[" + ast.unparse(value.slice) + "]"
     else:
         raise Exception(f"Unsupported AST type: {type(value)}")
+    return output