Chao-Ying commited on
Commit
f1615f0
·
verified ·
1 Parent(s): 2915274

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -53,6 +53,7 @@ def get_florence2():
53
  attn_implementation="eager", # 關鍵:不依賴 flash_attn
54
  torch_dtype=torch.float16 if device == "cuda" else torch.float32
55
  ).to(device).eval()
 
56
  return _processor, _model
57
 
58
  @torch.inference_mode()
@@ -77,8 +78,9 @@ def florence2_text(image: Image.Image, task: str = "caption"):
77
  **inputs,
78
  max_new_tokens=128,
79
  do_sample=False,
80
- num_beams=3,
81
- early_stopping=True,
 
82
  eos_token_id=getattr(getattr(proc, "tokenizer", None), "eos_token_id", None),
83
  )
84
  out = proc.batch_decode(ids, skip_special_tokens=True)[0].strip()
 
53
  attn_implementation="eager", # 關鍵:不依賴 flash_attn
54
  torch_dtype=torch.float16 if device == "cuda" else torch.float32
55
  ).to(device).eval()
56
+ _model.config.use_cache = False
57
  return _processor, _model
58
 
59
  @torch.inference_mode()
 
78
  **inputs,
79
  max_new_tokens=128,
80
  do_sample=False,
81
+ num_beams=1,
82
+ use_cache=False, # ← 關掉 KV-cache(關鍵)
83
+ early_stopping=False, # ← 與 num_beams=1 時無效,但設 False 更乾淨
84
  eos_token_id=getattr(getattr(proc, "tokenizer", None), "eos_token_id", None),
85
  )
86
  out = proc.batch_decode(ids, skip_special_tokens=True)[0].strip()