nezahatkorkmaz
/

turkish-llava-med-pipeline-v1.5-mistral-7b

text-generation

8-bit precision

Model card Files Files and versions Community

turkish-llava-med-pipeline-v1.5-mistral-7b / app.py

nezahatkorkmaz's picture

Update app.py

89c4b70 verified 3 months ago

2.53 kB

	import gradio as gr
	import torch
	from PIL import Image
	from transformers import AutoModelForCausalLM, AutoTokenizer, CLIPImageProcessor, pipeline
	from llava.conversation import conv_templates
	from llava.constants import DEFAULT_IMAGE_TOKEN

	device = "cuda" if torch.cuda.is_available() else "cpu"

	# 1. LLaVA-Med yükle
	print("Model yükleniyor...")
	model = AutoModelForCausalLM.from_pretrained(".", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32).to(device)
	tokenizer = AutoTokenizer.from_pretrained(".")
	image_processor = CLIPImageProcessor.from_pretrained(".")

	# 2. Çeviri modelleri (pipeline üzerinden)
	print("Çeviri yükleniyor...")
	tr_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-tr-en")
	en_to_tr = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-tr")

	def turkish_to_english(text):
	return tr_to_en(text)[0]['translation_text']

	def english_to_turkish(text):
	return en_to_tr(text)[0]['translation_text']

	# 3. Tahmin fonksiyonu
	def predict_turkish(image, turkish_question):
	english_question = turkish_to_english(turkish_question)

	# Görüntü formatı
	image = image.convert("RGB")
	image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"].half().to(device)

	# LLaVA prompt
	conv = conv_templates["llava-v1"].copy()
	conv.messages = []
	conv.append_message(conv.roles[0], DEFAULT_IMAGE_TOKEN + "\n" + english_question)
	conv.append_message(conv.roles[1], None)
	prompt = conv.get_prompt()
	input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

	with torch.inference_mode():
	output_ids = model.generate(
	input_ids=input_ids,
	images=image_tensor,
	do_sample=False,
	max_new_tokens=512
	)

	english_response = tokenizer.decode(output_ids[0, input_ids.shape[1]:], skip_special_tokens=True)
	return english_to_turkish(english_response)

	# 4. Gradio arayüzü
	interface = gr.Interface(
	fn=predict_turkish,
	inputs=[
	gr.Image(type="pil", label="Tıbbi Görüntü"),
	gr.Textbox(label="Türkçe Sorunuz", placeholder="Örn: Bu görüntüde bir tümör var mı?")
	],
	outputs=gr.Textbox(label="Model Cevabı"),
	title="Türkçe LLaVA-Med Görsel Soru-Cevaplama",
	description="LLaVA-Med v1.5 (Mistral 7B) modelinin Türkçe destekli demo arayüzüdür. Görüntü yükleyin, Türkçe soru sorun, Türkçe cevap alın."
	)

	if __name__ == "__main__":
	interface.launch()