Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -182,7 +182,8 @@ def model_inference(input_dict, history):
|
|
182 |
return_tensors="pt",
|
183 |
padding=True,
|
184 |
).to("cuda")
|
185 |
-
|
|
|
186 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
|
187 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, temperature=0.1, top_p=0.95, top_k=50)
|
188 |
# generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False, num_beams=1)
|
@@ -205,7 +206,7 @@ def model_inference(input_dict, history):
|
|
205 |
|
206 |
# Process the full segment (e.g., remove <|im_end|>)
|
207 |
processed_segment = current_model_output_segment.split("<|im_end|>", 1)[0] if "<|im_end|>" in current_model_output_segment else current_model_output_segment
|
208 |
-
|
209 |
# Append this processed segment to the cumulative display string for Gradio
|
210 |
complete_assistant_response_for_gradio += [processed_segment + "\n\n"]
|
211 |
yield complete_assistant_response_for_gradio # Ensure the fully processed segment is yielded to Gradio
|
|
|
182 |
return_tensors="pt",
|
183 |
padding=True,
|
184 |
).to("cuda")
|
185 |
+
print(f"===> messages for generation")
|
186 |
+
print(messages)
|
187 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
|
188 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, temperature=0.1, top_p=0.95, top_k=50)
|
189 |
# generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False, num_beams=1)
|
|
|
206 |
|
207 |
# Process the full segment (e.g., remove <|im_end|>)
|
208 |
processed_segment = current_model_output_segment.split("<|im_end|>", 1)[0] if "<|im_end|>" in current_model_output_segment else current_model_output_segment
|
209 |
+
messages.append(dict(role='assistant', content=processed_segment))
|
210 |
# Append this processed segment to the cumulative display string for Gradio
|
211 |
complete_assistant_response_for_gradio += [processed_segment + "\n\n"]
|
212 |
yield complete_assistant_response_for_gradio # Ensure the fully processed segment is yielded to Gradio
|