Update app.py
Browse files
app.py
CHANGED
@@ -20,6 +20,7 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
20 |
model = LlavaQwen2ForCausalLM.from_pretrained(
|
21 |
'qnguyen3/nanoLLaVA',
|
22 |
torch_dtype=torch.float16,
|
|
|
23 |
trust_remote_code=True)
|
24 |
|
25 |
model.to('cuda')
|
@@ -118,7 +119,7 @@ def bot_streaming(message, history):
|
|
118 |
yield generated_text_without_prompt
|
119 |
|
120 |
|
121 |
-
demo = gr.ChatInterface(fn=bot_streaming, title="nanoLLaVA", examples=[{"text": "What is on the flower?", "files":["./bee.jpg"]},
|
122 |
{"text": "How to make this pastry?", "files":["./baklava.png"]}],
|
123 |
description="Try [nanoLLaVA](https://huggingface.co/qnguyen3/nanoLLaVA) in this demo. Built on top of [Quyen-SE-v0.1](https://huggingface.co/vilm/Quyen-SE-v0.1) (Qwen1.5-0.5B) and [Google SigLIP-400M](https://huggingface.co/google/siglip-so400m-patch14-384). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
|
124 |
stop_btn="Stop Generation", multimodal=True)
|
|
|
20 |
model = LlavaQwen2ForCausalLM.from_pretrained(
|
21 |
'qnguyen3/nanoLLaVA',
|
22 |
torch_dtype=torch.float16,
|
23 |
+
attn_implementation="flash_attention_2",
|
24 |
trust_remote_code=True)
|
25 |
|
26 |
model.to('cuda')
|
|
|
119 |
yield generated_text_without_prompt
|
120 |
|
121 |
|
122 |
+
demo = gr.ChatInterface(fn=bot_streaming, title="🚀nanoLLaVA", examples=[{"text": "What is on the flower?", "files":["./bee.jpg"]},
|
123 |
{"text": "How to make this pastry?", "files":["./baklava.png"]}],
|
124 |
description="Try [nanoLLaVA](https://huggingface.co/qnguyen3/nanoLLaVA) in this demo. Built on top of [Quyen-SE-v0.1](https://huggingface.co/vilm/Quyen-SE-v0.1) (Qwen1.5-0.5B) and [Google SigLIP-400M](https://huggingface.co/google/siglip-so400m-patch14-384). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
|
125 |
stop_btn="Stop Generation", multimodal=True)
|