JasperHaozhe commited on
Commit
15c9525
·
verified ·
1 Parent(s): 75a3e7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -182,7 +182,8 @@ def model_inference(input_dict, history):
182
  return_tensors="pt",
183
  padding=True,
184
  ).to("cuda")
185
-
 
186
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
187
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, temperature=0.1, top_p=0.95, top_k=50)
188
  # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False, num_beams=1)
@@ -205,7 +206,7 @@ def model_inference(input_dict, history):
205
 
206
  # Process the full segment (e.g., remove <|im_end|>)
207
  processed_segment = current_model_output_segment.split("<|im_end|>", 1)[0] if "<|im_end|>" in current_model_output_segment else current_model_output_segment
208
-
209
  # Append this processed segment to the cumulative display string for Gradio
210
  complete_assistant_response_for_gradio += [processed_segment + "\n\n"]
211
  yield complete_assistant_response_for_gradio # Ensure the fully processed segment is yielded to Gradio
 
182
  return_tensors="pt",
183
  padding=True,
184
  ).to("cuda")
185
+ print(f"===> messages for generation")
186
+ print(messages)
187
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
188
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, temperature=0.1, top_p=0.95, top_k=50)
189
  # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False, num_beams=1)
 
206
 
207
  # Process the full segment (e.g., remove <|im_end|>)
208
  processed_segment = current_model_output_segment.split("<|im_end|>", 1)[0] if "<|im_end|>" in current_model_output_segment else current_model_output_segment
209
+ messages.append(dict(role='assistant', content=processed_segment))
210
  # Append this processed segment to the cumulative display string for Gradio
211
  complete_assistant_response_for_gradio += [processed_segment + "\n\n"]
212
  yield complete_assistant_response_for_gradio # Ensure the fully processed segment is yielded to Gradio