XciD HF Staff commited on
Commit
5f20b5a
·
verified ·
1 Parent(s): 5d05f89

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile +5 -0
  2. patch1 +72 -0
Dockerfile ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ FROM vllm/vllm-openai:v0.9.2
2
+
3
+ COPY patch1 /tmp/patch1
4
+
5
+ RUN cd /usr/local/lib/python3.12/dist-packages && patch -p 1 < /tmp/patch1
patch1 ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
2
+ index 79eac184a..868e236e6 100644
3
+ --- a/vllm/entrypoints/openai/serving_chat.py
4
+ +++ b/vllm/entrypoints/openai/serving_chat.py
5
+ @@ -200,6 +200,15 @@ class OpenAIServingChat(OpenAIServing):
6
+ logger.exception("Error in preprocessing prompt inputs")
7
+ return self.create_error_response(f"{e} {e.__cause__}")
8
+
9
+ + logger.info("DEBUG "*100)
10
+ +
11
+ + logger.info("REQUEST PROMPTS %s", request_prompts)
12
+ + logger.info("CONVERSATION %s", conversation)
13
+ +
14
+ + # Not sure to understand what the engine prompt is: from what I understand this is the raw /tokenized input
15
+ + # fed to the llm (so what we are looking for)
16
+ + logger.info("ENGINE PROMPTS %s", engine_prompts)
17
+ +
18
+ request_id = "chatcmpl-" \
19
+ f"{self._base_request_id(raw_request, request.request_id)}"
20
+
21
+ diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
22
+ index c7030d34d..7a2765838 100644
23
+ --- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
24
+ +++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
25
+ @@ -44,7 +44,7 @@ class Hermes2ProToolParser(ToolParser):
26
+ self.tool_call_end_token: str = "</tool_call>"
27
+
28
+ self.tool_call_regex = re.compile(
29
+ - r"<tool_call>(.*?)</tool_call>|<tool_call>(.*)", re.DOTALL)
30
+ + r"<tool_call>\s*(.*?)\s*(?:<tool_call>\s*|</tool_call>\s*|$)", re.DOTALL)
31
+ self.scratch_pad_regex = re.compile(
32
+ r"<scratch_pad>(.*?)</scratch_pad>", re.DOTALL)
33
+
34
+ @@ -80,15 +80,17 @@ class Hermes2ProToolParser(ToolParser):
35
+ # tag and end-of-string so the result of
36
+ # findall is an array of tuples where one is a function call and
37
+ # the other is None
38
+ - function_call_tuples = (
39
+ - self.tool_call_regex.findall(model_output))
40
+ -
41
+ - # load the JSON, and then use it to build the Function and
42
+ - # Tool Call
43
+ - raw_function_calls = [
44
+ - json.loads(match[0] if match[0] else match[1])
45
+ - for match in function_call_tuples
46
+ - ]
47
+ + matches = self.tool_call_regex.findall(model_output)
48
+ + raw_function_calls = []
49
+ + for match in matches:
50
+ + if not match:
51
+ + continue
52
+ + try:
53
+ + parsed = json.loads(match.strip())
54
+ + raw_function_calls.append(parsed)
55
+ + except json.JSONDecodeError as e:
56
+ + logger.warning("Skipping malformed tool_call block: %s", e)
57
+ +
58
+ tool_calls = [
59
+ ToolCall(
60
+ type="function",
61
+ @@ -99,9 +101,8 @@ class Hermes2ProToolParser(ToolParser):
62
+ ensure_ascii=False)))
63
+ for function_call in raw_function_calls
64
+ ]
65
+ -
66
+ - content = model_output[:model_output.
67
+ - find(self.tool_call_start_token)]
68
+ + tool_call_start = model_output.find(self.tool_call_start_token)
69
+ + content = model_output[:tool_call_start] if tool_call_start >= 0 else None
70
+ return ExtractedToolCallInformation(
71
+ tools_called=True,
72
+ tool_calls=tool_calls,