nezahatkorkmaz
/

turkish-llava-med-pipeline-v1.5-mistral-7b

@@ -1,96 +1,67 @@
 import gradio as gr
 import torch
 from PIL import Image
-from transformers import pipeline
-from transformers import CLIPVisionModel, CLIPImageProcessor
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# 1. Çeviri modelleri
-print("Çeviri modelleri yükleniyor...")
 tr_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-tr-en")
 en_to_tr = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-tr")
 def turkish_to_english(text):
-    result = tr_to_en(text, max_length=512)
-    return result[0]['translation_text']
 def english_to_turkish(text):
-    result = en_to_tr(text, max_length=512)
-    return result[0]['translation_text']
-print("Çeviri modelleri hazır!")
-# 2. LLaVA-Med bileşenleri
-print("LLaVA-Med bileşenleri yükleniyor...")
-vision_model_path = "openai/clip-vit-large-patch14"
-vision_model = CLIPVisionModel.from_pretrained(vision_model_path)
-image_processor = CLIPImageProcessor.from_pretrained(vision_model_path)
-model_path = "microsoft/llava-med-v1.5-mistral-7b"
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-model = AutoModelForCausalLM.from_pretrained(
-    model_path,
-    torch_dtype=torch.float16,
-    load_in_8bit=True,
-    device_map="auto"
-)
-print("LLaVA-Med modeli yüklendi!")
-def predict_turkish(image, turkish_question):
-    try:
-        # Görüntüyü işle
-        image_inputs = image_processor(images=image, return_tensors="pt").to(model.device)
-        image_features = vision_model(**image_inputs).last_hidden_state
-        # Türkçe -> İngilizce çeviri
-        english_question = turkish_to_english(turkish_question)
-        # Prompt hazırla
-        prompt = f"Image description: [No text content in the image].\\n\\nQuestion: {english_question}\\n\\nAnswer:"
-        # Yanıt oluştur
-        inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=500,
-                do_sample=False
-            )
-        english_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # İngilizce -> Türkçe çeviri
-        turkish_response = english_to_turkish(english_response)
-        return turkish_response
-    except Exception as e:
-        # Hata durumunda yedek sistem
-        english_question = turkish_to_english(turkish_question)
-        # Basit anahtar kelime tabanlı yapay yanıtlar
-        if "symptom" in english_question.lower() or "semptom" in turkish_question.lower():
-            english_response = "Yes, the image shows signs of pulmonary edema with bilateral infiltrates. There are also indications of cardiomegaly. These findings are consistent with heart failure."
-        elif "diagnosis" in english_question.lower() or "tanı" in turkish_question.lower():
-            english_response = "The radiograph shows pulmonary edema with bilateral infiltrates, consistent with congestive heart failure. There's also evidence of cardiomegaly (enlarged heart)."
-        elif "normal" in english_question.lower() or "normal" in turkish_question.lower():
-            english_response = "No, this chest X-ray is not normal. It shows pulmonary edema with bilateral infiltrates and cardiomegaly, consistent with heart failure."
-        else:
-            english_response = "The chest X-ray shows pulmonary edema with bilateral infiltrates, particularly in the lower lung fields. There is also cardiomegaly (enlarged heart). These findings are consistent with congestive heart failure."
-        turkish_response = english_to_turkish(english_response)
-        return turkish_response
-# Gradio arayüzü oluştur
 interface = gr.Interface(
     fn=predict_turkish,
     inputs=[
         gr.Image(type="pil", label="Tıbbi Görüntü"),
-        gr.Textbox(label="Türkçe Sorunuz", placeholder="Örn: Bu görüntüde akciğerlerde bir anormallik görüyor musunuz?")
     ],
-    outputs=gr.Textbox(label="Cevap"),
-    title="Türkçe Tıbbi Görüntü Analiz Modeli",
-    description="Bu model, Microsoft'un LLaVA-Med modelini Türkçe kullanım için özelleştirilmiş şekilde kullanmanızı sağlar."
 )
-interface.launch()

 import gradio as gr
 import torch
 from PIL import Image
+from transformers import AutoModelForCausalLM, AutoTokenizer, CLIPImageProcessor, pipeline
+from llava.conversation import conv_templates
+from llava.constants import DEFAULT_IMAGE_TOKEN
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# 1. LLaVA-Med yükle
+print("Model yükleniyor...")
+model = AutoModelForCausalLM.from_pretrained(".", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32).to(device)
+tokenizer = AutoTokenizer.from_pretrained(".")
+image_processor = CLIPImageProcessor.from_pretrained(".")
+# 2. Çeviri modelleri (pipeline üzerinden)
+print("Çeviri yükleniyor...")
 tr_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-tr-en")
 en_to_tr = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-tr")
 def turkish_to_english(text):
+    return tr_to_en(text)[0]['translation_text']
 def english_to_turkish(text):
+    return en_to_tr(text)[0]['translation_text']
+# 3. Tahmin fonksiyonu
+def predict_turkish(image, turkish_question):
+    english_question = turkish_to_english(turkish_question)
+    # Görüntü formatı
+    image = image.convert("RGB")
+    image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"].half().to(device)
+    # LLaVA prompt
+    conv = conv_templates["llava-v1"].copy()
+    conv.messages = []
+    conv.append_message(conv.roles[0], DEFAULT_IMAGE_TOKEN + "\n" + english_question)
+    conv.append_message(conv.roles[1], None)
+    prompt = conv.get_prompt()
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    with torch.inference_mode():
+        output_ids = model.generate(
+            input_ids=input_ids,
+            images=image_tensor,
+            do_sample=False,
+            max_new_tokens=512
+        )
+    english_response = tokenizer.decode(output_ids[0, input_ids.shape[1]:], skip_special_tokens=True)
+    return english_to_turkish(english_response)
+# 4. Gradio arayüzü
 interface = gr.Interface(
     fn=predict_turkish,
     inputs=[
         gr.Image(type="pil", label="Tıbbi Görüntü"),
+        gr.Textbox(label="Türkçe Sorunuz", placeholder="Örn: Bu görüntüde bir tümör var mı?")
     ],
+    outputs=gr.Textbox(label="Model Cevabı"),
+    title="Türkçe LLaVA-Med Görsel Soru-Cevaplama",
+    description="LLaVA-Med v1.5 (Mistral 7B) modelinin Türkçe destekli demo arayüzüdür. Görüntü yükleyin, Türkçe soru sorun, Türkçe cevap alın."
 )
+if __name__ == "__main__":
+    interface.launch()