Spaces:

TIGER-Lab
/

Pixel-Reasoner

Running on Zero

JasperHaozhe commited on 7 days ago

Commit

15c9525

verified ·

1 Parent(s): 75a3e7f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -182,7 +182,8 @@ def model_inference(input_dict, history):
             return_tensors="pt",
             padding=True,
         ).to("cuda")
         streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
         generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, temperature=0.1, top_p=0.95, top_k=50)
         # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False, num_beams=1)
@@ -205,7 +206,7 @@ def model_inference(input_dict, history):
         # Process the full segment (e.g., remove <|im_end|>)
         processed_segment = current_model_output_segment.split("<|im_end|>", 1)[0] if "<|im_end|>" in current_model_output_segment else current_model_output_segment
         # Append this processed segment to the cumulative display string for Gradio
         complete_assistant_response_for_gradio += [processed_segment + "\n\n"]
         yield complete_assistant_response_for_gradio # Ensure the fully processed segment is yielded to Gradio

             return_tensors="pt",
             padding=True,
         ).to("cuda")
+        print(f"===> messages for generation")
+        print(messages)
         streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
         generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, temperature=0.1, top_p=0.95, top_k=50)
         # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False, num_beams=1)
         # Process the full segment (e.g., remove <|im_end|>)
         processed_segment = current_model_output_segment.split("<|im_end|>", 1)[0] if "<|im_end|>" in current_model_output_segment else current_model_output_segment
+        messages.append(dict(role='assistant', content=processed_segment))
         # Append this processed segment to the cumulative display string for Gradio
         complete_assistant_response_for_gradio += [processed_segment + "\n\n"]
         yield complete_assistant_response_for_gradio # Ensure the fully processed segment is yielded to Gradio