|
import onnxruntime as ort
|
|
import torch
|
|
import json
|
|
import numpy as np
|
|
from PIL import Image
|
|
import torchvision.transforms as transforms
|
|
import os
|
|
import time
|
|
|
|
def preprocess_image(image_path, image_size=512):
|
|
"""Process an image for inference"""
|
|
if not os.path.exists(image_path):
|
|
raise ValueError(f"Image not found at path: {image_path}")
|
|
|
|
|
|
transform = transforms.Compose([
|
|
transforms.ToTensor(),
|
|
])
|
|
|
|
try:
|
|
with Image.open(image_path) as img:
|
|
|
|
if img.mode in ('RGBA', 'P'):
|
|
img = img.convert('RGB')
|
|
|
|
|
|
width, height = img.size
|
|
aspect_ratio = width / height
|
|
|
|
|
|
if aspect_ratio > 1:
|
|
new_width = image_size
|
|
new_height = int(new_width / aspect_ratio)
|
|
else:
|
|
new_height = image_size
|
|
new_width = int(new_height * aspect_ratio)
|
|
|
|
|
|
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
|
|
|
|
|
new_image = Image.new('RGB', (image_size, image_size), (0, 0, 0))
|
|
paste_x = (image_size - new_width) // 2
|
|
paste_y = (image_size - new_height) // 2
|
|
new_image.paste(img, (paste_x, paste_y))
|
|
|
|
|
|
img_tensor = transform(new_image)
|
|
return img_tensor
|
|
except Exception as e:
|
|
raise Exception(f"Error processing {image_path}: {str(e)}")
|
|
|
|
def test_onnx_model(model_path, metadata_path, image_path, threshold=0.325):
|
|
"""Test an ONNX model with a single image"""
|
|
|
|
with open(metadata_path, 'r') as f:
|
|
metadata = json.load(f)
|
|
|
|
|
|
print(f"Loading ONNX model from {model_path}")
|
|
try:
|
|
|
|
session = ort.InferenceSession(
|
|
model_path,
|
|
providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
|
|
)
|
|
print(f"Using providers: {session.get_providers()}")
|
|
except Exception as e:
|
|
print(f"CUDA not available, using CPU: {e}")
|
|
session = ort.InferenceSession(
|
|
model_path,
|
|
providers=['CPUExecutionProvider']
|
|
)
|
|
print(f"Using providers: {session.get_providers()}")
|
|
|
|
|
|
print(f"Processing image: {image_path}")
|
|
img_tensor = preprocess_image(image_path)
|
|
img_numpy = img_tensor.unsqueeze(0).numpy()
|
|
|
|
|
|
input_name = session.get_inputs()[0].name
|
|
print(f"Input name: {input_name}")
|
|
|
|
|
|
print("Running inference...")
|
|
start_time = time.time()
|
|
outputs = session.run(None, {input_name: img_numpy})
|
|
inference_time = time.time() - start_time
|
|
print(f"Inference completed in {inference_time:.4f} seconds")
|
|
|
|
|
|
initial_probs = 1.0 / (1.0 + np.exp(-outputs[0]))
|
|
refined_probs = 1.0 / (1.0 + np.exp(-outputs[1])) if len(outputs) > 1 else initial_probs
|
|
|
|
|
|
predictions = (refined_probs >= threshold).astype(np.float32)
|
|
|
|
|
|
indices = np.where(predictions[0] > 0)[0]
|
|
|
|
|
|
tags_by_category = {}
|
|
for idx in indices:
|
|
idx_str = str(idx)
|
|
tag_name = metadata['idx_to_tag'].get(idx_str, f"unknown-{idx}")
|
|
category = metadata['tag_to_category'].get(tag_name, "general")
|
|
|
|
if category not in tags_by_category:
|
|
tags_by_category[category] = []
|
|
|
|
prob = float(refined_probs[0, idx])
|
|
tags_by_category[category].append((tag_name, prob))
|
|
|
|
|
|
for category in tags_by_category:
|
|
tags_by_category[category] = sorted(tags_by_category[category], key=lambda x: x[1], reverse=True)
|
|
|
|
|
|
print("\nPredicted tags:")
|
|
for category in sorted(tags_by_category.keys()):
|
|
print(f"\n{category.capitalize()}:")
|
|
for tag, prob in tags_by_category[category]:
|
|
print(f" {tag}: {prob:.3f}")
|
|
|
|
return tags_by_category
|
|
|
|
|
|
test_onnx_model('model_initial.onnx', 'model_initial_metadata.json', 'test_image.jpg') |