Projekt-test / app.py
John6666's picture
Upload app.py
15744f5 verified
# for Zero GPU Spaces compatibility
import spaces
@spaces.GPU
def dummy_gpu():
pass
import gradio as gr
import numpy as np
import cv2
import torch
import onnxruntime as ort
from optimum.onnxruntime import ORTModel
from ultralytics import YOLO
import os
from typing import Tuple, List
import subprocess
def install_cuda_toolkit():
print("Installing CUDA Toolkit.")
#CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run"
CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
os.environ["CUDA_HOME"] = "/usr/local/cuda"
os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
os.environ["CUDA_HOME"],
"" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
)
# Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
#install_cuda_toolkit()
# Configuration - UPDATE THESE VALUES
MODEL_PT_PATH = "model.pt" # Your trained PyTorch model
MODEL_ONNX_PATH = "model.onnx" # Output ONNX model name
INPUT_SIZE = 640 # Must match training size
CLASS_NAMES = ["class0", "class1"] # Your actual class names
CONF_THRESHOLD = 0.5 # Confidence threshold
IOU_THRESHOLD = 0.45 # NMS IoU threshold
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
session_options = ort.SessionOptions()
session_options.log_severity_level = 0
def convert_pt_to_onnx():
"""Convert PyTorch model to ONNX format if not exists"""
print(f'Converting model on {"cuda" if torch.cuda.is_available() else "cpu"}')
if not os.path.exists(MODEL_ONNX_PATH):
print("Converting PyTorch model to ONNX...")
try:
# Load trained YOLO model
model = YOLO(MODEL_PT_PATH)
# Export to ONNX with correct parameters
model.export(
format="onnx",
imgsz=INPUT_SIZE,
opset=12,
simplify=True,
dynamic=False,
half=False # Disable for maximum compatibility
)
# Rename exported model (Ultralytics uses default name)
if os.path.exists("yolov8n.onnx"):
os.rename("yolov8n.onnx", MODEL_ONNX_PATH)
print("ONNX conversion successful!")
except Exception as e:
raise RuntimeError(f"ONNX conversion failed: {str(e)}")
def load_onnx_model() -> ort.InferenceSession:
"""Initialize ONNX runtime session"""
print(f'Loading model on {"cuda" if torch.cuda.is_available() else "cpu"}')
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if DEVICE != "cpu" else ['CPUExecutionProvider']
try:
#return ort.InferenceSession(MODEL_ONNX_PATH, providers=providers, session_options=session_options, export=True)
return ORTModel.load_model(MODEL_ONNX_PATH, provider='CUDAExecutionProvider' if DEVICE != "cpu" else 'CPUExecutionProvider', session_options=session_options)
except Exception as e:
raise RuntimeError(f"Failed to load ONNX model: {str(e)}")
# Initialize model
convert_pt_to_onnx()
ort_session = load_onnx_model()
print("Available Providers: ", ort_session._providers)
#assert "CUDAExecutionProvider" in ort_session._providers
def letterbox_image(image: np.ndarray) -> Tuple[np.ndarray, float, Tuple[int, int]]:
"""
Preprocess image using YOLO's letterboxing method
Returns:
- Processed image tensor
- Scale ratio (original to processed)
- Padding dimensions (width, height)
"""
# Get original dimensions
h, w = image.shape[:2]
# Calculate scale and new dimensions
scale = min(INPUT_SIZE / h, INPUT_SIZE / w)
new_h, new_w = int(h * scale), int(w * scale)
# Resize with antialiasing
resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
# Create canvas with 114-gray background
canvas = np.full((INPUT_SIZE, INPUT_SIZE, 3), 114, dtype=np.uint8)
# Calculate padding offsets
pad_w = (INPUT_SIZE - new_w) // 2
pad_h = (INPUT_SIZE - new_h) // 2
# Paste resized image onto canvas
canvas[pad_h:pad_h+new_h, pad_w:pad_w+new_w] = resized
# Convert to float32 and normalize
processed = canvas.astype(np.float32) / 255.0
# Transpose to CHW format and add batch dimension
processed = processed.transpose(2, 0, 1)[None, ...]
return processed, scale, (pad_w, pad_h)
def process_detections(
outputs: np.ndarray,
scale: float,
padding: Tuple[int, int],
orig_shape: Tuple[int, int]
) -> Tuple[List[List[int]], List[float], List[int]]:
"""
Process raw model outputs into usable detections
Returns:
- List of bounding boxes [x1, y1, x2, y2]
- List of confidence scores
- List of class IDs
"""
# Transpose and squeeze outputs
predictions = np.squeeze(outputs[0]).T
# Filter by confidence threshold
scores = np.max(predictions[:, 4:], axis=1)
valid = scores > CONF_THRESHOLD
predictions = predictions[valid]
scores = scores[valid]
if predictions.shape[0] == 0:
return [], [], []
# Extract boxes and classes
boxes = predictions[:, :4]
class_ids = np.argmax(predictions[:, 4:], axis=1)
# Convert from center to corner coordinates
boxes[:, [0, 1]] = boxes[:, [0, 1]] - boxes[:, [2, 3]] / 2 # xy top-left
boxes[:, [2, 3]] = boxes[:, [0, 1]] + boxes[:, [2, 3]] # xy bottom-right
# Adjust for letterbox padding and scale
pad_w, pad_h = padding
boxes[:, [0, 2]] = (boxes[:, [0, 2]] - pad_w) / scale
boxes[:, [1, 3]] = (boxes[:, [1, 3]] - pad_h) / scale
# Clip coordinates to image dimensions
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, orig_shape[1])
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, orig_shape[0])
# Convert to integer coordinates
boxes = boxes.round().astype(int)
# Apply NMS
indices = cv2.dnn.NMSBoxes(
boxes.tolist(),
scores.tolist(),
CONF_THRESHOLD,
IOU_THRESHOLD
)
if len(indices) == 0:
return [], [], []
# Return filtered results
return boxes[indices], scores[indices], class_ids[indices]
def draw_detections(
image: np.ndarray,
boxes: List[List[int]],
scores: List[float],
class_ids: List[int]
) -> np.ndarray:
"""Draw bounding boxes and labels on image"""
output = image.copy()
for box, score, class_id in zip(boxes, scores, class_ids):
x1, y1, x2, y2 = box
# Draw bounding box
color = (0, 255, 0) # Green
cv2.rectangle(output, (x1, y1), (x2, y2), color, 2)
# Create label
label = f"{CLASS_NAMES[class_id]}: {score:.2f}"
# Get text size
(tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
# Draw text background
cv2.rectangle(
output,
(x1, y1 - th - 4),
(x1 + tw, y1),
color,
-1
)
# Draw text
cv2.putText(
output,
label,
(x1, y1 - 4),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 0),
1,
cv2.LINE_AA
)
return output
def inference_frame(frame: np.ndarray) -> np.ndarray:
"""Full processing pipeline for single frame"""
# Preprocess
input_tensor, scale, padding = letterbox_image(frame)
# Inference
outputs = ort_session.run(
None,
{ort_session.get_inputs()[0].name: input_tensor}
)
# Post-process
boxes, scores, class_ids = process_detections(
outputs,
scale,
padding,
frame.shape[:2]
)
# Draw results
if len(boxes) > 0:
frame = draw_detections(frame, boxes, scores, class_ids)
return frame
# Gradio interface setup
with gr.Blocks() as app:
gr.Markdown("# Real-Time YOLOv8 Object Detection")
with gr.Row():
webcam = gr.Image(
sources=["webcam"],
streaming=True,
label="Webcam Input"
)
output = gr.Image(
label="Detections",
interactive=False
)
webcam.stream(
fn=inference_frame,
inputs=webcam,
outputs=output,
show_progress="hidden"
)
if __name__ == "__main__":
app.launch(show_error=True)
# https://discuss.huggingface.co/t/failed-to-create-cudaexecutionprovider/26501
# https://stackoverflow.com/questions/75267445/why-does-onnxruntime-fail-to-create-cudaexecutionprovider-in-linuxubuntu-20
# https://github.com/microsoft/onnxruntime/issues/4292
# https://github.com/ultralytics/ultralytics/issues/664