import gradio as gr
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer, CLIPImageProcessor, pipeline
from llava.conversation import conv_templates
from llava.constants import DEFAULT_IMAGE_TOKEN

device = "cuda" if torch.cuda.is_available() else "cpu"

# 1. LLaVA-Med yükle
print("Model yükleniyor...")
model = AutoModelForCausalLM.from_pretrained(".", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32).to(device)
tokenizer = AutoTokenizer.from_pretrained(".")
image_processor = CLIPImageProcessor.from_pretrained(".")

# 2. Çeviri modelleri (pipeline üzerinden)
print("Çeviri yükleniyor...")
tr_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-tr-en")
en_to_tr = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-tr")

def turkish_to_english(text):
    return tr_to_en(text)[0]['translation_text']

def english_to_turkish(text):
    return en_to_tr(text)[0]['translation_text']

# 3. Tahmin fonksiyonu
def predict_turkish(image, turkish_question):
    english_question = turkish_to_english(turkish_question)

    # Görüntü formatı
    image = image.convert("RGB")
    image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"].half().to(device)

    # LLaVA prompt
    conv = conv_templates["llava-v1"].copy()
    conv.messages = []
    conv.append_message(conv.roles[0], DEFAULT_IMAGE_TOKEN + "\n" + english_question)
    conv.append_message(conv.roles[1], None)
    prompt = conv.get_prompt()
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

    with torch.inference_mode():
        output_ids = model.generate(
            input_ids=input_ids,
            images=image_tensor,
            do_sample=False,
            max_new_tokens=512
        )

    english_response = tokenizer.decode(output_ids[0, input_ids.shape[1]:], skip_special_tokens=True)
    return english_to_turkish(english_response)

# 4. Gradio arayüzü
interface = gr.Interface(
    fn=predict_turkish,
    inputs=[
        gr.Image(type="pil", label="Tıbbi Görüntü"),
        gr.Textbox(label="Türkçe Sorunuz", placeholder="Örn: Bu görüntüde bir tümör var mı?")
    ],
    outputs=gr.Textbox(label="Model Cevabı"),
    title="Türkçe LLaVA-Med Görsel Soru-Cevaplama",
    description="LLaVA-Med v1.5 (Mistral 7B) modelinin Türkçe destekli demo arayüzüdür. Görüntü yükleyin, Türkçe soru sorun, Türkçe cevap alın."
)

if __name__ == "__main__":
    interface.launch()