Spaces:

pixai-labs
/

pixai-tagger-demo

Running

trojblue commited on Aug 27

Commit

618b677

1 Parent(s): bc07199

updating handler for cpu-only inference

Files changed (1) hide show

handler.py CHANGED Viewed

@@ -2,6 +2,7 @@ import base64
 import io
 import json
 import logging
 import time
 from pathlib import Path
 from typing import Any
@@ -109,7 +110,17 @@ class EndpointHandler:
         if not mapping_file.exists():
             raise FileNotFoundError(f"Mapping file not found: {mapping_file}")
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model = load_model(str(weights_file), self.device)
         self.transform = transforms.Compose(
             [
@@ -159,11 +170,13 @@ class EndpointHandler:
         inference_start_time = time.time()
         with torch.inference_mode():
-            # Preprocess image on CPU, then pin memory for faster async transfer
-            image_tensor = self.transform(image).unsqueeze(0).pin_memory()
-            # Asynchronously move image to GPU
-            image_tensor = image_tensor.to(self.device, non_blocking=True)
             # Run model on GPU
             probs = self.model(image_tensor)[0]  # Get probs for the single image

 import io
 import json
 import logging
+import os
 import time
 from pathlib import Path
 from typing import Any
         if not mapping_file.exists():
             raise FileNotFoundError(f"Mapping file not found: {mapping_file}")
+        # Robust device selection: prefer CPU unless CUDA is truly usable
+        force_cpu = os.environ.get("FORCE_CPU", "0") in {"1", "true", "TRUE", "yes", "on"}
+        if not force_cpu and torch.cuda.is_available():
+            try:
+                # Probe that CUDA can actually be used (driver present)
+                torch.zeros(1).to("cuda")
+                self.device = "cuda"
+            except Exception:
+                self.device = "cpu"
+        else:
+            self.device = "cpu"
         self.model = load_model(str(weights_file), self.device)
         self.transform = transforms.Compose(
             [
         inference_start_time = time.time()
         with torch.inference_mode():
+            # Preprocess image on CPU
+            image_tensor = self.transform(image).unsqueeze(0)
+            # Pin memory and use non_blocking transfer only when using CUDA
+            if self.device == "cuda":
+                image_tensor = image_tensor.pin_memory().to(self.device, non_blocking=True)
+            else:
+                image_tensor = image_tensor.to(self.device)
             # Run model on GPU
             probs = self.model(image_tensor)[0]  # Get probs for the single image