|
import gradio as gr |
|
import torch |
|
from PIL import Image |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, CLIPImageProcessor, pipeline |
|
from llava.conversation import conv_templates |
|
from llava.constants import DEFAULT_IMAGE_TOKEN |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
print("Model yükleniyor...") |
|
model = AutoModelForCausalLM.from_pretrained(".", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32).to(device) |
|
tokenizer = AutoTokenizer.from_pretrained(".") |
|
image_processor = CLIPImageProcessor.from_pretrained(".") |
|
|
|
|
|
print("Çeviri yükleniyor...") |
|
tr_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-tr-en") |
|
en_to_tr = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-tr") |
|
|
|
def turkish_to_english(text): |
|
return tr_to_en(text)[0]['translation_text'] |
|
|
|
def english_to_turkish(text): |
|
return en_to_tr(text)[0]['translation_text'] |
|
|
|
|
|
def predict_turkish(image, turkish_question): |
|
english_question = turkish_to_english(turkish_question) |
|
|
|
|
|
image = image.convert("RGB") |
|
image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"].half().to(device) |
|
|
|
|
|
conv = conv_templates["llava-v1"].copy() |
|
conv.messages = [] |
|
conv.append_message(conv.roles[0], DEFAULT_IMAGE_TOKEN + "\n" + english_question) |
|
conv.append_message(conv.roles[1], None) |
|
prompt = conv.get_prompt() |
|
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device) |
|
|
|
with torch.inference_mode(): |
|
output_ids = model.generate( |
|
input_ids=input_ids, |
|
images=image_tensor, |
|
do_sample=False, |
|
max_new_tokens=512 |
|
) |
|
|
|
english_response = tokenizer.decode(output_ids[0, input_ids.shape[1]:], skip_special_tokens=True) |
|
return english_to_turkish(english_response) |
|
|
|
|
|
interface = gr.Interface( |
|
fn=predict_turkish, |
|
inputs=[ |
|
gr.Image(type="pil", label="Tıbbi Görüntü"), |
|
gr.Textbox(label="Türkçe Sorunuz", placeholder="Örn: Bu görüntüde bir tümör var mı?") |
|
], |
|
outputs=gr.Textbox(label="Model Cevabı"), |
|
title="Türkçe LLaVA-Med Görsel Soru-Cevaplama", |
|
description="LLaVA-Med v1.5 (Mistral 7B) modelinin Türkçe destekli demo arayüzüdür. Görüntü yükleyin, Türkçe soru sorun, Türkçe cevap alın." |
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |
|
|