Inference Fails with Type Mismatch Error: self and mat2 must have the same dtype, but got Half and Char
#68
by
Manaf-paar
- opened
Hi team,
I'm encountering an error during inference using the moondream2
model (vikhyatk/moondream2
) on Jetson Orin Nano. The model loads correctly, but when running inference on images, I consistently get the following error:
Inference result: Error processing image: self and mat2 must have the same dtype, but got Half and Char
Environment:
- Device: Jetson Orin Nano
- OS: Ubuntu 22.04
- Python Version: 3.10
- Transformers version: 4.48.3
- torch: Jetson-compatible version (from NVIDIA wheels[ torch-2.6.0-cp310-cp310-linux_aarch64.whl
torchaudio-2.6.0-cp310-cp310-linux_aarch64.whl
torchvision-0.21.0-cp310-cp310-linux_aarch64.whl
]) - Quantization:
BitsAndBytesConfig(load_in_8bit=True)
- Code to load model:
self.model = AutoModelForCausalLM.from_pretrained(
"vikhyatk/moondream2",
quantization_config=quantization_config,
revision="2025-01-09",
trust_remote_code=True,
device_map={"": "cuda"}
)
Reproduction:
When running this snippet:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from PIL import Image
import torch
import time
import cv2
import gc
import os
import csv
class VLMInference:
def __init__(self):
"""Initialize the VLMInference class without loading the model by default"""
self.model = None # Model is not loaded initially
def load_model(self):
"""
Load the Moondream model from pretrained weights and set it to eval mode
Returns:
model: Loaded Moondream model on CUDA
"""
if self.model is not None:
print("Model is already loaded.")
return self.model
try:
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
self.model = AutoModelForCausalLM.from_pretrained(
"vikhyatk/moondream2",
quantization_config=quantization_config, # Use the config here
revision="2025-01-09",
trust_remote_code=True,
device_map={"": "cuda"}
)
self.model.eval() # Set model to evaluation mode after loading
print("Model loaded successfully.")
return self.model
except Exception as e:
raise RuntimeError(f"Failed to load model: {str(e)}")
def unload_model(self):
"""
Unload the model from memory and GPU
"""
if self.model is None:
print("No model is currently loaded.")
return
try:
del self.model # Delete the model object
self.model = None # Set to None to indicate it's unloaded
torch.cuda.empty_cache() # Clear GPU memory
print("Model unloaded successfully.")
except Exception as e:
print(f"Error unloading model: {str(e)}")
def run_moondream(self, image, prompt="how many trucks are there"):
"""
Run inference on the provided image with the given prompt
Args:
image: Input image (expected to be in a format compatible with the model)
prompt: Text prompt for the model (default: "how many trucks are there")
Returns:
str: Model response or error message
"""
if self.model is None:
return "Error: Model is not loaded. Please call load_model() first."
try:
response = self.model.query(image, prompt)
return response
except Exception as e:
return f"Error processing image: {str(e)}"
def point(self, image, prompt):
"""
Detect objects in the image based on the provided prompt and print the count and pixel coordinates.
Args:
image: Input image
prompt: Object category to detect (e.g., "person" or "car")
"""
if self.model is None:
print("Error: Model is not loaded. Please call load_model() first.")
return []
try:
points = self.model.point(image, prompt)["points"]
print(f"Found {len(points)} {prompt}(s)")
image_width, image_height = image.size # Get image dimensions
for i, point in enumerate(points):
if isinstance(point, dict) and 'x' in point and 'y' in point:
x_pixel = int(point['x'] * image_width)
y_pixel = int(point['y'] * image_height)
print(f"{prompt} {i+1}: ({x_pixel}, {y_pixel})") # Now in pixel coordinates
else:
print(f"Unexpected point format for {prompt} {i+1}: {point}")
return points
except Exception as e:
print(f"Error detecting {prompt}: {str(e)}")
return []
def print_gpu_memory():
"""Print GPU memory usage"""
print(f"Allocated Memory: {torch.cuda.memory_allocated() / 1e6} MB")
print(f"Reserved Memory: {torch.cuda.memory_reserved() / 1e6} MB")
def process_images_in_folder(folder_path, prompt, output_csv="timing_results.csv"):
"""
Process all images in a folder and log timing information, including the prompt, to a CSV file.
Args:
folder_path: Path to the folder containing images
prompt: Text prompt to use for inference (default: "how many trucks are there")
output_csv: Path to the output CSV file
"""
vlm = VLMInference()
# Supported image extensions
image_extensions = (".jpg", ".jpeg", ".png")
# Open CSV file for writing
with open(output_csv, mode='w', newline='') as csv_file:
fieldnames = ['image_name', 'load_time_s', 'inference_time_s', 'unload_time_s', 'prompt']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
# Iterate over all files in the folder
for filename in os.listdir(folder_path):
if filename.lower().endswith(image_extensions):
image_path = os.path.join(folder_path, filename)
print(f"\nProcessing {filename}...")
# Load image
image = Image.open(image_path).resize((480, 480))
# Measure load time
start_time = time.time()
vlm.load_model()
load_time = time.time() - start_time
# Measure inference time with the specified prompt
start_time = time.time()
result = vlm.run_moondream(image, prompt)
inference_time = time.time() - start_time
print(f"Inference result: {result}")
# Measure unload time
start_time = time.time()
vlm.unload_model()
unload_time = time.time() - start_time
# Log to CSV
writer.writerow({
'image_name': filename,
'load_time_s': load_time,
'inference_time_s': inference_time,
'unload_time_s': unload_time,
'prompt': prompt
})
# Print timing info
print(f"Load time: {load_time:.3f}s")
print(f"Inference time: {inference_time:.3f}s")
print(f"Unload time: {unload_time:.3f}s")
print_gpu_memory()
# Optional: Small delay between iterations
time.sleep(1)
if __name__ == "__main__":
try:
# Specify the folder containing images
image_folder = "data"
# Define the prompt (you can change this as needed)
inference_prompt = "describe the image"
# Process all images and log results
process_images_in_folder(image_folder, prompt=inference_prompt, output_csv="int8.csv")
except Exception as e:
print(f"Error in main execution: {str(e)}")
The error message appears. Full output:
Processing 100000046.jpg...
Model loaded successfully.
Inference result: Error processing image: self and mat2 must have the same dtype, but got Half and Char
Model unloaded successfully.
Request:
Could you please look into:
- Why the dtype mismatch is happening?
- Whether
moondream2
supports inference in 8-bit or half precision on Jetson Orin Nano? - Any known workaround (e.g., forcing float32 or proper dtype conversion for the image input)?
Thank you for your amazing work on this model. Looking forward to your help to resolve this issue.
Best regards,
Abdul Manaf PV