Spaces:

XciD
/

vllm-debug

Runtime error

App Files Files Community

XciD HF Staff commited on Jul 9

Commit

5f20b5a

verified ·

1 Parent(s): 5d05f89

Upload 2 files

Browse files

Files changed (2) hide show

Dockerfile +5 -0
patch1 +72 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,5 @@

+FROM vllm/vllm-openai:v0.9.2
+COPY patch1 /tmp/patch1
+RUN cd /usr/local/lib/python3.12/dist-packages && patch -p 1 < /tmp/patch1

patch1 ADDED Viewed

	@@ -0,0 +1,72 @@

+diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
+index 79eac184a..868e236e6 100644
+--- a/vllm/entrypoints/openai/serving_chat.py
++++ b/vllm/entrypoints/openai/serving_chat.py
+@@ -200,6 +200,15 @@ class OpenAIServingChat(OpenAIServing):
+             logger.exception("Error in preprocessing prompt inputs")
+             return self.create_error_response(f"{e} {e.__cause__}")
++        logger.info("DEBUG "*100)
++
++        logger.info("REQUEST PROMPTS %s", request_prompts)
++        logger.info("CONVERSATION %s", conversation)
++
++        # Not sure to understand what the engine prompt is: from what I understand this is the raw /tokenized input
++        # fed to the llm (so what we are looking for)
++        logger.info("ENGINE PROMPTS %s", engine_prompts)
++
+         request_id = "chatcmpl-" \
+                      f"{self._base_request_id(raw_request, request.request_id)}"
+diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
+index c7030d34d..7a2765838 100644
+--- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
++++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
+@@ -44,7 +44,7 @@ class Hermes2ProToolParser(ToolParser):
+         self.tool_call_end_token: str = "</tool_call>"
+         self.tool_call_regex = re.compile(
+-            r"<tool_call>(.*?)</tool_call>|<tool_call>(.*)", re.DOTALL)
++            r"<tool_call>\s*(.*?)\s*(?:<tool_call>\s*|</tool_call>\s*|$)", re.DOTALL)
+         self.scratch_pad_regex = re.compile(
+             r"<scratch_pad>(.*?)</scratch_pad>", re.DOTALL)
+@@ -80,15 +80,17 @@ class Hermes2ProToolParser(ToolParser):
+                 # tag and end-of-string so the result of
+                 # findall is an array of tuples where one is a function call and
+                 # the other is None
+-                function_call_tuples = (
+-                    self.tool_call_regex.findall(model_output))
+-
+-                # load the JSON, and then use it to build the Function and
+-                # Tool Call
+-                raw_function_calls = [
+-                    json.loads(match[0] if match[0] else match[1])
+-                    for match in function_call_tuples
+-                ]
++                matches = self.tool_call_regex.findall(model_output)
++                raw_function_calls = []
++                for match in matches:
++                    if not match:
++                        continue
++                    try:
++                        parsed = json.loads(match.strip())
++                        raw_function_calls.append(parsed)
++                    except json.JSONDecodeError as e:
++                        logger.warning("Skipping malformed tool_call block: %s", e)
++
+                 tool_calls = [
+                     ToolCall(
+                         type="function",
+@@ -99,9 +101,8 @@ class Hermes2ProToolParser(ToolParser):
+                                                  ensure_ascii=False)))
+                     for function_call in raw_function_calls
+                 ]
+-
+-                content = model_output[:model_output.
+-                                       find(self.tool_call_start_token)]
++                tool_call_start = model_output.find(self.tool_call_start_token)
++                content = model_output[:tool_call_start] if tool_call_start >= 0 else None
+                 return ExtractedToolCallInformation(
+                     tools_called=True,
+                     tool_calls=tool_calls,