Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- Dockerfile +5 -0
- patch1 +72 -0
Dockerfile
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM vllm/vllm-openai:v0.9.2
|
2 |
+
|
3 |
+
COPY patch1 /tmp/patch1
|
4 |
+
|
5 |
+
RUN cd /usr/local/lib/python3.12/dist-packages && patch -p 1 < /tmp/patch1
|
patch1
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
|
2 |
+
index 79eac184a..868e236e6 100644
|
3 |
+
--- a/vllm/entrypoints/openai/serving_chat.py
|
4 |
+
+++ b/vllm/entrypoints/openai/serving_chat.py
|
5 |
+
@@ -200,6 +200,15 @@ class OpenAIServingChat(OpenAIServing):
|
6 |
+
logger.exception("Error in preprocessing prompt inputs")
|
7 |
+
return self.create_error_response(f"{e} {e.__cause__}")
|
8 |
+
|
9 |
+
+ logger.info("DEBUG "*100)
|
10 |
+
+
|
11 |
+
+ logger.info("REQUEST PROMPTS %s", request_prompts)
|
12 |
+
+ logger.info("CONVERSATION %s", conversation)
|
13 |
+
+
|
14 |
+
+ # Not sure to understand what the engine prompt is: from what I understand this is the raw /tokenized input
|
15 |
+
+ # fed to the llm (so what we are looking for)
|
16 |
+
+ logger.info("ENGINE PROMPTS %s", engine_prompts)
|
17 |
+
+
|
18 |
+
request_id = "chatcmpl-" \
|
19 |
+
f"{self._base_request_id(raw_request, request.request_id)}"
|
20 |
+
|
21 |
+
diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
|
22 |
+
index c7030d34d..7a2765838 100644
|
23 |
+
--- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
|
24 |
+
+++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
|
25 |
+
@@ -44,7 +44,7 @@ class Hermes2ProToolParser(ToolParser):
|
26 |
+
self.tool_call_end_token: str = "</tool_call>"
|
27 |
+
|
28 |
+
self.tool_call_regex = re.compile(
|
29 |
+
- r"<tool_call>(.*?)</tool_call>|<tool_call>(.*)", re.DOTALL)
|
30 |
+
+ r"<tool_call>\s*(.*?)\s*(?:<tool_call>\s*|</tool_call>\s*|$)", re.DOTALL)
|
31 |
+
self.scratch_pad_regex = re.compile(
|
32 |
+
r"<scratch_pad>(.*?)</scratch_pad>", re.DOTALL)
|
33 |
+
|
34 |
+
@@ -80,15 +80,17 @@ class Hermes2ProToolParser(ToolParser):
|
35 |
+
# tag and end-of-string so the result of
|
36 |
+
# findall is an array of tuples where one is a function call and
|
37 |
+
# the other is None
|
38 |
+
- function_call_tuples = (
|
39 |
+
- self.tool_call_regex.findall(model_output))
|
40 |
+
-
|
41 |
+
- # load the JSON, and then use it to build the Function and
|
42 |
+
- # Tool Call
|
43 |
+
- raw_function_calls = [
|
44 |
+
- json.loads(match[0] if match[0] else match[1])
|
45 |
+
- for match in function_call_tuples
|
46 |
+
- ]
|
47 |
+
+ matches = self.tool_call_regex.findall(model_output)
|
48 |
+
+ raw_function_calls = []
|
49 |
+
+ for match in matches:
|
50 |
+
+ if not match:
|
51 |
+
+ continue
|
52 |
+
+ try:
|
53 |
+
+ parsed = json.loads(match.strip())
|
54 |
+
+ raw_function_calls.append(parsed)
|
55 |
+
+ except json.JSONDecodeError as e:
|
56 |
+
+ logger.warning("Skipping malformed tool_call block: %s", e)
|
57 |
+
+
|
58 |
+
tool_calls = [
|
59 |
+
ToolCall(
|
60 |
+
type="function",
|
61 |
+
@@ -99,9 +101,8 @@ class Hermes2ProToolParser(ToolParser):
|
62 |
+
ensure_ascii=False)))
|
63 |
+
for function_call in raw_function_calls
|
64 |
+
]
|
65 |
+
-
|
66 |
+
- content = model_output[:model_output.
|
67 |
+
- find(self.tool_call_start_token)]
|
68 |
+
+ tool_call_start = model_output.find(self.tool_call_start_token)
|
69 |
+
+ content = model_output[:tool_call_start] if tool_call_start >= 0 else None
|
70 |
+
return ExtractedToolCallInformation(
|
71 |
+
tools_called=True,
|
72 |
+
tool_calls=tool_calls,
|