from transformers import AutoModelForVision2Seq, AutoProcessor from peft import PeftModel from PIL import Image import torch #LOAD model = AutoModelForVision2Seq.from_pretrained( "unsloth/llava-1.5-7b-hf-bnb-4bit", device_map="auto", torch_dtype=torch.float16, ) model = PeftModel.from_pretrained(model, "grohitraj/archive_classification") model = model.half() # fix dtype mismatch processor = AutoProcessor.from_pretrained("grohitraj/archive_classification") # TEST image = Image.open("example_from_2019ISIC_data.jpg") prompt = "\nDescribe about the image for male aged 54:" inputs = processor(text=prompt, images=image, return_tensors="pt") inputs = {k: v.to(model.device) for k, v in inputs.items()} #OUTPUT with torch.no_grad(): outputs = model.generate(**inputs, max_new_tokens=100) print(processor.tokenizer.decode(outputs[0], skip_special_tokens=True))