nezahatkorkmaz's picture
Update app.py
89c4b70 verified
raw
history blame
2.53 kB
import gradio as gr
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer, CLIPImageProcessor, pipeline
from llava.conversation import conv_templates
from llava.constants import DEFAULT_IMAGE_TOKEN
device = "cuda" if torch.cuda.is_available() else "cpu"
# 1. LLaVA-Med yükle
print("Model yükleniyor...")
model = AutoModelForCausalLM.from_pretrained(".", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32).to(device)
tokenizer = AutoTokenizer.from_pretrained(".")
image_processor = CLIPImageProcessor.from_pretrained(".")
# 2. Çeviri modelleri (pipeline üzerinden)
print("Çeviri yükleniyor...")
tr_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-tr-en")
en_to_tr = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-tr")
def turkish_to_english(text):
return tr_to_en(text)[0]['translation_text']
def english_to_turkish(text):
return en_to_tr(text)[0]['translation_text']
# 3. Tahmin fonksiyonu
def predict_turkish(image, turkish_question):
english_question = turkish_to_english(turkish_question)
# Görüntü formatı
image = image.convert("RGB")
image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"].half().to(device)
# LLaVA prompt
conv = conv_templates["llava-v1"].copy()
conv.messages = []
conv.append_message(conv.roles[0], DEFAULT_IMAGE_TOKEN + "\n" + english_question)
conv.append_message(conv.roles[1], None)
prompt = conv.get_prompt()
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
with torch.inference_mode():
output_ids = model.generate(
input_ids=input_ids,
images=image_tensor,
do_sample=False,
max_new_tokens=512
)
english_response = tokenizer.decode(output_ids[0, input_ids.shape[1]:], skip_special_tokens=True)
return english_to_turkish(english_response)
# 4. Gradio arayüzü
interface = gr.Interface(
fn=predict_turkish,
inputs=[
gr.Image(type="pil", label="Tıbbi Görüntü"),
gr.Textbox(label="Türkçe Sorunuz", placeholder="Örn: Bu görüntüde bir tümör var mı?")
],
outputs=gr.Textbox(label="Model Cevabı"),
title="Türkçe LLaVA-Med Görsel Soru-Cevaplama",
description="LLaVA-Med v1.5 (Mistral 7B) modelinin Türkçe destekli demo arayüzüdür. Görüntü yükleyin, Türkçe soru sorun, Türkçe cevap alın."
)
if __name__ == "__main__":
interface.launch()