Spaces:
Running
Running
updating handler for cpu-only inference
Browse files- handler.py +19 -6
handler.py
CHANGED
@@ -2,6 +2,7 @@ import base64
|
|
2 |
import io
|
3 |
import json
|
4 |
import logging
|
|
|
5 |
import time
|
6 |
from pathlib import Path
|
7 |
from typing import Any
|
@@ -109,7 +110,17 @@ class EndpointHandler:
|
|
109 |
if not mapping_file.exists():
|
110 |
raise FileNotFoundError(f"Mapping file not found: {mapping_file}")
|
111 |
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
self.model = load_model(str(weights_file), self.device)
|
114 |
self.transform = transforms.Compose(
|
115 |
[
|
@@ -159,11 +170,13 @@ class EndpointHandler:
|
|
159 |
|
160 |
inference_start_time = time.time()
|
161 |
with torch.inference_mode():
|
162 |
-
# Preprocess image on CPU
|
163 |
-
image_tensor = self.transform(image).unsqueeze(0)
|
164 |
-
|
165 |
-
|
166 |
-
|
|
|
|
|
167 |
|
168 |
# Run model on GPU
|
169 |
probs = self.model(image_tensor)[0] # Get probs for the single image
|
|
|
2 |
import io
|
3 |
import json
|
4 |
import logging
|
5 |
+
import os
|
6 |
import time
|
7 |
from pathlib import Path
|
8 |
from typing import Any
|
|
|
110 |
if not mapping_file.exists():
|
111 |
raise FileNotFoundError(f"Mapping file not found: {mapping_file}")
|
112 |
|
113 |
+
# Robust device selection: prefer CPU unless CUDA is truly usable
|
114 |
+
force_cpu = os.environ.get("FORCE_CPU", "0") in {"1", "true", "TRUE", "yes", "on"}
|
115 |
+
if not force_cpu and torch.cuda.is_available():
|
116 |
+
try:
|
117 |
+
# Probe that CUDA can actually be used (driver present)
|
118 |
+
torch.zeros(1).to("cuda")
|
119 |
+
self.device = "cuda"
|
120 |
+
except Exception:
|
121 |
+
self.device = "cpu"
|
122 |
+
else:
|
123 |
+
self.device = "cpu"
|
124 |
self.model = load_model(str(weights_file), self.device)
|
125 |
self.transform = transforms.Compose(
|
126 |
[
|
|
|
170 |
|
171 |
inference_start_time = time.time()
|
172 |
with torch.inference_mode():
|
173 |
+
# Preprocess image on CPU
|
174 |
+
image_tensor = self.transform(image).unsqueeze(0)
|
175 |
+
# Pin memory and use non_blocking transfer only when using CUDA
|
176 |
+
if self.device == "cuda":
|
177 |
+
image_tensor = image_tensor.pin_memory().to(self.device, non_blocking=True)
|
178 |
+
else:
|
179 |
+
image_tensor = image_tensor.to(self.device)
|
180 |
|
181 |
# Run model on GPU
|
182 |
probs = self.model(image_tensor)[0] # Get probs for the single image
|