trojblue commited on
Commit
618b677
·
1 Parent(s): bc07199

updating handler for cpu-only inference

Browse files
Files changed (1) hide show
  1. handler.py +19 -6
handler.py CHANGED
@@ -2,6 +2,7 @@ import base64
2
  import io
3
  import json
4
  import logging
 
5
  import time
6
  from pathlib import Path
7
  from typing import Any
@@ -109,7 +110,17 @@ class EndpointHandler:
109
  if not mapping_file.exists():
110
  raise FileNotFoundError(f"Mapping file not found: {mapping_file}")
111
 
112
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
 
 
113
  self.model = load_model(str(weights_file), self.device)
114
  self.transform = transforms.Compose(
115
  [
@@ -159,11 +170,13 @@ class EndpointHandler:
159
 
160
  inference_start_time = time.time()
161
  with torch.inference_mode():
162
- # Preprocess image on CPU, then pin memory for faster async transfer
163
- image_tensor = self.transform(image).unsqueeze(0).pin_memory()
164
-
165
- # Asynchronously move image to GPU
166
- image_tensor = image_tensor.to(self.device, non_blocking=True)
 
 
167
 
168
  # Run model on GPU
169
  probs = self.model(image_tensor)[0] # Get probs for the single image
 
2
  import io
3
  import json
4
  import logging
5
+ import os
6
  import time
7
  from pathlib import Path
8
  from typing import Any
 
110
  if not mapping_file.exists():
111
  raise FileNotFoundError(f"Mapping file not found: {mapping_file}")
112
 
113
+ # Robust device selection: prefer CPU unless CUDA is truly usable
114
+ force_cpu = os.environ.get("FORCE_CPU", "0") in {"1", "true", "TRUE", "yes", "on"}
115
+ if not force_cpu and torch.cuda.is_available():
116
+ try:
117
+ # Probe that CUDA can actually be used (driver present)
118
+ torch.zeros(1).to("cuda")
119
+ self.device = "cuda"
120
+ except Exception:
121
+ self.device = "cpu"
122
+ else:
123
+ self.device = "cpu"
124
  self.model = load_model(str(weights_file), self.device)
125
  self.transform = transforms.Compose(
126
  [
 
170
 
171
  inference_start_time = time.time()
172
  with torch.inference_mode():
173
+ # Preprocess image on CPU
174
+ image_tensor = self.transform(image).unsqueeze(0)
175
+ # Pin memory and use non_blocking transfer only when using CUDA
176
+ if self.device == "cuda":
177
+ image_tensor = image_tensor.pin_memory().to(self.device, non_blocking=True)
178
+ else:
179
+ image_tensor = image_tensor.to(self.device)
180
 
181
  # Run model on GPU
182
  probs = self.model(image_tensor)[0] # Get probs for the single image