Update app.py
Browse files
app.py
CHANGED
@@ -53,6 +53,7 @@ def get_florence2():
|
|
53 |
attn_implementation="eager", # 關鍵:不依賴 flash_attn
|
54 |
torch_dtype=torch.float16 if device == "cuda" else torch.float32
|
55 |
).to(device).eval()
|
|
|
56 |
return _processor, _model
|
57 |
|
58 |
@torch.inference_mode()
|
@@ -77,8 +78,9 @@ def florence2_text(image: Image.Image, task: str = "caption"):
|
|
77 |
**inputs,
|
78 |
max_new_tokens=128,
|
79 |
do_sample=False,
|
80 |
-
num_beams=
|
81 |
-
|
|
|
82 |
eos_token_id=getattr(getattr(proc, "tokenizer", None), "eos_token_id", None),
|
83 |
)
|
84 |
out = proc.batch_decode(ids, skip_special_tokens=True)[0].strip()
|
|
|
53 |
attn_implementation="eager", # 關鍵:不依賴 flash_attn
|
54 |
torch_dtype=torch.float16 if device == "cuda" else torch.float32
|
55 |
).to(device).eval()
|
56 |
+
_model.config.use_cache = False
|
57 |
return _processor, _model
|
58 |
|
59 |
@torch.inference_mode()
|
|
|
78 |
**inputs,
|
79 |
max_new_tokens=128,
|
80 |
do_sample=False,
|
81 |
+
num_beams=1,
|
82 |
+
use_cache=False, # ← 關掉 KV-cache(關鍵)
|
83 |
+
early_stopping=False, # ← 與 num_beams=1 時無效,但設 False 更乾淨
|
84 |
eos_token_id=getattr(getattr(proc, "tokenizer", None), "eos_token_id", None),
|
85 |
)
|
86 |
out = proc.batch_decode(ids, skip_special_tokens=True)[0].strip()
|