helenai
/

llava-med-imf16-llmint4

llava_mistral

Model card Files Files and versions Community

helenai commited on Mar 3

Commit

cc22261

1 Parent(s): a1bcd02

Upload appstream.py

Browse files

Files changed (1) hide show

appstream.py +103 -0

appstream.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from threading import Thread
+import gradio as gr
+import openvino as ov
+from llava.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
+from llava.conversation import conv_templates
+from llava.mm_utils import get_model_name_from_path, process_images, tokenizer_image_token
+from llava.model.builder import load_pretrained_model
+from transformers import TextIteratorStreamer
+css = """
+.text textarea {font-size: 24px !important;}
+.text p {font-size: 24px !important;}
+"""
+model_path = "llava-med-imf16-llmint4"
+# model_path = "llava-med-imint8-llmint4"
+model_name = get_model_name_from_path(model_path)
+device = "GPU" if "GPU" in ov.Core().available_devices else "CPU"
+image_device = "NPU" if "NPU" in ov.Core().available_devices else device
+tokenizer, model, image_processor, context_len = load_pretrained_model(
+    model_path=model_path,
+    model_base=None,
+    model_name=model_name,
+    device=device,
+    openvino=True,
+    image_device=image_device,
+)
+print("models loaded")
+def reset_inputs():
+    return None, "", ""
+def prepare_inputs_image(image, question):
+    conv_mode = "vicuna_v1"  # default
+    qs = question.replace(DEFAULT_IMAGE_TOKEN, "").strip()
+    qs = DEFAULT_IMAGE_TOKEN + "\n" + qs  # model.config.mm_use_im_start_end is False
+    conv = conv_templates[conv_mode].copy()
+    conv.append_message(conv.roles[0], qs)
+    conv.append_message(conv.roles[1], None)
+    prompt = conv.get_prompt()
+    input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0)
+    # image = Image.open(image_file)
+    image_tensor = process_images([image], image_processor, model.config)[0]
+    return input_ids, image_tensor
+def run_inference(image, message):
+    """
+    Function to handle the chat input and generate model responses.
+    """
+    if not message:
+        return ""
+    input_ids, image_tensor = prepare_inputs_image(image, message)
+    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    generation_kwargs = {
+        "streamer": streamer,
+        "input_ids": input_ids,
+        "images": image_tensor.unsqueeze(0).half(),
+        "do_sample": False,
+        "max_new_tokens": 512,
+        "use_cache": True,
+    }
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    # Stream output
+    response = ""
+    for new_text in streamer:
+        response += new_text
+        yield response
+with gr.Blocks(css=css) as demo:
+    gr.Markdown("# LLaVA-Med 1.5 OpenVINO Demo")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="Upload an Image", height=300, width=500)
+        with gr.Column():
+            text_input = gr.Textbox(label="Enter a Question", elem_classes="text", interactive=True)
+            chatbot = gr.Textbox(label="Answer", elem_classes="text")
+    with gr.Row():
+        process_button = gr.Button("Process")
+        reset_button = gr.Button("Reset")
+    gr.Markdown("NOTE: This OpenVINO model is unvalidated. Results are provisional and may contain errors. Use this demo to explore AI PC and OpenVINO optimizations")
+    gr.Markdown("Source model: [microsoft/LLaVA-Med](https://github.com/microsoft/LLaVA-Med). For research purposes only.")
+    process_button.click(run_inference, inputs=[image_input, text_input], outputs=chatbot)
+    text_input.submit(run_inference, inputs=[image_input, text_input], outputs=chatbot)
+    reset_button.click(reset_inputs, inputs=[], outputs=[image_input, text_input, chatbot])
+if __name__ == "__main__":
+    demo.launch(server_port=7788, server_name="0.0.0.0")