File size: 2,527 Bytes

dd65fe4
 
 
89c4b70
 
 
dd65fe4
89c4b70
 
 
 
 
 
 
 
 
 
dd65fe4
 
 
 
89c4b70
dd65fe4
 
89c4b70
dd65fe4
89c4b70
 
 
dd65fe4
89c4b70
 
 
dd65fe4
89c4b70
 
 
 
 
 
 
dd65fe4
89c4b70
 
 
 
 
 
 
dd65fe4
89c4b70
 
dd65fe4
89c4b70
dd65fe4
 
 
 
89c4b70
dd65fe4
89c4b70
 
 
dd65fe4
 
89c4b70

import gradio as gr
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer, CLIPImageProcessor, pipeline
from llava.conversation import conv_templates
from llava.constants import DEFAULT_IMAGE_TOKEN

device = "cuda" if torch.cuda.is_available() else "cpu"

# 1. LLaVA-Med yükle
print("Model yükleniyor...")
model = AutoModelForCausalLM.from_pretrained(".", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32).to(device)
tokenizer = AutoTokenizer.from_pretrained(".")
image_processor = CLIPImageProcessor.from_pretrained(".")

# 2. Çeviri modelleri (pipeline üzerinden)
print("Çeviri yükleniyor...")
tr_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-tr-en")
en_to_tr = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-tr")

def turkish_to_english(text):
    return tr_to_en(text)[0]['translation_text']

def english_to_turkish(text):
    return en_to_tr(text)[0]['translation_text']

# 3. Tahmin fonksiyonu
def predict_turkish(image, turkish_question):
    english_question = turkish_to_english(turkish_question)

    # Görüntü formatı
    image = image.convert("RGB")
    image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"].half().to(device)

    # LLaVA prompt
    conv = conv_templates["llava-v1"].copy()
    conv.messages = []
    conv.append_message(conv.roles[0], DEFAULT_IMAGE_TOKEN + "\n" + english_question)
    conv.append_message(conv.roles[1], None)
    prompt = conv.get_prompt()
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

    with torch.inference_mode():
        output_ids = model.generate(
            input_ids=input_ids,
            images=image_tensor,
            do_sample=False,
            max_new_tokens=512
        )

    english_response = tokenizer.decode(output_ids[0, input_ids.shape[1]:], skip_special_tokens=True)
    return english_to_turkish(english_response)

# 4. Gradio arayüzü
interface = gr.Interface(
    fn=predict_turkish,
    inputs=[
        gr.Image(type="pil", label="Tıbbi Görüntü"),
        gr.Textbox(label="Türkçe Sorunuz", placeholder="Örn: Bu görüntüde bir tümör var mı?")
    ],
    outputs=gr.Textbox(label="Model Cevabı"),
    title="Türkçe LLaVA-Med Görsel Soru-Cevaplama",
    description="LLaVA-Med v1.5 (Mistral 7B) modelinin Türkçe destekli demo arayüzüdür. Görüntü yükleyin, Türkçe soru sorun, Türkçe cevap alın."
)

if __name__ == "__main__":
    interface.launch()