Spaces:
Running
on
Zero
Running
on
Zero
added token processing
Browse files- src/app.py +45 -7
src/app.py
CHANGED
@@ -94,13 +94,13 @@ def process_user_input(message: dict, max_images: int) -> list[dict]:
|
|
94 |
|
95 |
def process_history(history: list[dict]) -> list[dict]:
|
96 |
messages = []
|
97 |
-
|
98 |
|
99 |
for item in history:
|
100 |
if item["role"] == "assistant":
|
101 |
-
if
|
102 |
-
messages.append({"role": "user", "content":
|
103 |
-
|
104 |
|
105 |
messages.append(
|
106 |
{
|
@@ -110,13 +110,51 @@ def process_history(history: list[dict]) -> list[dict]:
|
|
110 |
)
|
111 |
else:
|
112 |
content = item["content"]
|
113 |
-
|
114 |
{"type": "text", "text": content}
|
115 |
if isinstance(content, str)
|
116 |
else {"type": "image", "url": content[0]}
|
117 |
)
|
118 |
|
119 |
-
if
|
120 |
-
messages.append({"role": "user", "content":
|
121 |
|
122 |
return messages
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
def process_history(history: list[dict]) -> list[dict]:
|
96 |
messages = []
|
97 |
+
content_buffer = []
|
98 |
|
99 |
for item in history:
|
100 |
if item["role"] == "assistant":
|
101 |
+
if content_buffer:
|
102 |
+
messages.append({"role": "user", "content": content_buffer})
|
103 |
+
content_buffer = []
|
104 |
|
105 |
messages.append(
|
106 |
{
|
|
|
110 |
)
|
111 |
else:
|
112 |
content = item["content"]
|
113 |
+
content_buffer.append(
|
114 |
{"type": "text", "text": content}
|
115 |
if isinstance(content, str)
|
116 |
else {"type": "image", "url": content[0]}
|
117 |
)
|
118 |
|
119 |
+
if content_buffer:
|
120 |
+
messages.append({"role": "user", "content": content_buffer})
|
121 |
|
122 |
return messages
|
123 |
+
|
124 |
+
|
125 |
+
@spaces.GPU(duration=120)
|
126 |
+
def run(
|
127 |
+
message: dict, history: list[dict], system_prompt: str, max_new_tokens: int = 512
|
128 |
+
) -> Iterator[str]:
|
129 |
+
|
130 |
+
messages = []
|
131 |
+
if system_prompt:
|
132 |
+
messages.append(
|
133 |
+
{"role": "system", "content": [{"type": "text", "text": system_prompt}]}
|
134 |
+
)
|
135 |
+
messages.extend(process_history(history))
|
136 |
+
messages.append({"role": "user", "content": process_user_input(message)})
|
137 |
+
|
138 |
+
inputs = input_processor.apply_chat_template(
|
139 |
+
messages,
|
140 |
+
add_generation_prompt=True,
|
141 |
+
tokenize=True,
|
142 |
+
return_dict=True,
|
143 |
+
return_tensors="pt",
|
144 |
+
).to(device=model.device, dtype=torch.bfloat16)
|
145 |
+
|
146 |
+
streamer = TextIteratorStreamer(
|
147 |
+
input_processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True
|
148 |
+
)
|
149 |
+
generate_kwargs = dict(
|
150 |
+
inputs,
|
151 |
+
streamer=streamer,
|
152 |
+
max_new_tokens=max_new_tokens,
|
153 |
+
)
|
154 |
+
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
155 |
+
t.start()
|
156 |
+
|
157 |
+
output = ""
|
158 |
+
for delta in streamer:
|
159 |
+
output += delta
|
160 |
+
yield output
|