Spaces:

phiph
/

DA-2-WebGPU

Running

App Files Files Community

Philipp S commited on Nov 27, 2025

Commit

2dd8e33

1 Parent(s): 3a0fd02

Add WebGPU demo files

Browse files

Files changed (3) hide show

README.md +9 -7
index.html +38 -18
script.js +138 -0

README.md CHANGED Viewed

@@ -1,12 +1,14 @@
 ---
-title: DA 2 WebGPU
-emoji: 📈
-colorFrom: red
-colorTo: indigo
 sdk: static
 pinned: false
-license: apache-2.0
-short_description: DA-2-WebGPU
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: DA-2 WebGPU Demo
+emoji: 🌍
+colorFrom: blue
+colorTo: purple
 sdk: static
 pinned: false
 ---
+# DA-2 WebGPU Demo
+This is a client-side WebGPU demo for [DA-2: Depth Anything in Any Direction](https://huggingface.co/phiph/DA-2-WebGPU).
+It runs entirely in your browser using ONNX Runtime Web.

index.html CHANGED Viewed

@@ -1,19 +1,39 @@
-<!doctype html>
-<html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
 </html>

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>DA-2 WebGPU Demo</title>
+    <style>
+        body { font-family: sans-serif; padding: 20px; max-width: 1200px; margin: 0 auto; }
+        canvas { max-width: 100%; border: 1px solid #ccc; margin-top: 10px; display: block; }
+        #controls { margin-bottom: 20px; padding: 10px; background: #f0f0f0; border-radius: 5px; }
+        .container { display: flex; flex-wrap: wrap; gap: 20px; }
+        .view { flex: 1; min-width: 300px; }
+        #status { margin-left: 10px; font-weight: bold; }
+    </style>
+</head>
+<body>
+    <h1>DA-2 Depth Estimation (WebGPU)</h1>
+    <p>Upload a 360° panorama image to estimate depth.</p>
+    <div id="controls">
+        <input type="file" id="imageInput" accept="image/*">
+        <button id="runBtn" disabled>Run Inference</button>
+        <span id="status">Initializing...</span>
+    </div>
+    <div class="container">
+        <div class="view">
+            <h3>Input Image</h3>
+            <canvas id="inputCanvas"></canvas>
+        </div>
+        <div class="view">
+            <h3>Depth Map</h3>
+            <canvas id="outputCanvas"></canvas>
+        </div>
+    </div>
+    <script type="module" src="script.js"></script>
+</body>
 </html>

script.js ADDED Viewed

	@@ -0,0 +1,138 @@

+import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
+// Skip local model checks since we are fetching from HF Hub
+env.allowLocalModels = false;
+const MODEL_ID = 'phiph/DA-2-WebGPU';
+const INPUT_WIDTH = 1092;
+const INPUT_HEIGHT = 546;
+let depth_estimator = null;
+const statusElement = document.getElementById('status');
+const runBtn = document.getElementById('runBtn');
+const imageInput = document.getElementById('imageInput');
+const inputCanvas = document.getElementById('inputCanvas');
+const outputCanvas = document.getElementById('outputCanvas');
+const inputCtx = inputCanvas.getContext('2d');
+const outputCtx = outputCanvas.getContext('2d');
+// Initialize Transformers.js Pipeline
+async function init() {
+    try {
+        statusElement.textContent = 'Loading model... (this may take a while)';
+        // Initialize the pipeline
+        depth_estimator = await pipeline('depth-estimation', MODEL_ID, {
+            device: 'webgpu',
+            dtype: 'fp32', // Important: Model is FP32
+        });
+        statusElement.textContent = 'Model loaded. Ready.';
+        runBtn.disabled = false;
+    } catch (e) {
+        console.error(e);
+        statusElement.textContent = 'Error loading model: ' + e.message;
+        // Fallback to wasm if webgpu fails
+        try {
+            statusElement.textContent = 'WebGPU failed, trying WASM...';
+            depth_estimator = await pipeline('depth-estimation', MODEL_ID, {
+                device: 'wasm',
+                dtype: 'fp32'
+            });
+            statusElement.textContent = 'Model loaded (WASM). Ready.';
+            runBtn.disabled = false;
+        } catch (e2) {
+            statusElement.textContent = 'Error loading model (WASM): ' + e2.message;
+        }
+    }
+}
+imageInput.addEventListener('change', (e) => {
+    const file = e.target.files[0];
+    if (!file) return;
+    const img = new Image();
+    img.onload = () => {
+        inputCanvas.width = INPUT_WIDTH;
+        inputCanvas.height = INPUT_HEIGHT;
+        inputCtx.drawImage(img, 0, 0, INPUT_WIDTH, INPUT_HEIGHT);
+        // Clear output
+        outputCanvas.width = INPUT_WIDTH;
+        outputCanvas.height = INPUT_HEIGHT;
+        outputCtx.clearRect(0, 0, INPUT_WIDTH, INPUT_HEIGHT);
+    };
+    img.src = URL.createObjectURL(file);
+});
+runBtn.addEventListener('click', async () => {
+    if (!depth_estimator) return;
+    statusElement.textContent = 'Running inference...';
+    runBtn.disabled = true;
+    try {
+        // Get the image source from the canvas (or the file URL directly)
+        // Using the canvas data ensures we are passing what the user sees
+        const url = inputCanvas.toDataURL();
+        // Run inference
+        // The pipeline handles preprocessing (resize, rescale) automatically
+        const output = await depth_estimator(url);
+        // output.depth is the raw tensor
+        // output.mask is the visualized depth map (Image object) if available,
+        // but for custom models it might just return the tensor.
+        // Let's check what we got
+        if (output.depth) {
+             // Visualize the raw tensor manually to be safe
+             visualize(output.depth.data, INPUT_WIDTH, INPUT_HEIGHT);
+        } else {
+             // Fallback if structure is different
+             console.log("Output structure:", output);
+             statusElement.textContent = 'Done (Check console for output structure).';
+        }
+        statusElement.textContent = 'Done.';
+    } catch (e) {
+        console.error(e);
+        statusElement.textContent = 'Error running inference: ' + e.message;
+    } finally {
+        runBtn.disabled = false;
+    }
+});
+function visualize(data, width, height) {
+    // Find min and max for normalization
+    let min = Infinity;
+    let max = -Infinity;
+    for (let i = 0; i < data.length; i++) {
+        if (data[i] < min) min = data[i];
+        if (data[i] > max) max = data[i];
+    }
+    const range = max - min;
+    const imageData = outputCtx.createImageData(width, height);
+    for (let i = 0; i < data.length; i++) {
+        // Normalize to 0-1
+        const val = (data[i] - min) / (range || 1);
+        // Simple heatmap (Magma-like or just grayscale)
+        // Inverted depth usually looks better (closer is brighter)
+        // But here it's distance, so closer is smaller value.
+        // If we map min (close) to 255 (white) and max (far) to 0 (black)
+        const pixelVal = Math.floor((1 - val) * 255);
+        imageData.data[i * 4] = pixelVal; // R
+        imageData.data[i * 4 + 1] = pixelVal; // G
+        imageData.data[i * 4 + 2] = pixelVal; // B
+        imageData.data[i * 4 + 3] = 255; // Alpha
+    }
+    outputCtx.putImageData(imageData, 0, 0);
+}
+init();