Spaces:
Running
Running
Upload 36 files
Browse files
src/components/MultiSourceCaptioningView.tsx
CHANGED
|
@@ -33,12 +33,13 @@ export default function MultiSourceCaptioningView() {
|
|
| 33 |
const [debugOutput, setDebugOutput] = useState<string>("");
|
| 34 |
const [canvasDims, setCanvasDims] = useState<{w:number,h:number}|null>(null);
|
| 35 |
const [videoDims, setVideoDims] = useState<{w:number,h:number}|null>(null);
|
|
|
|
| 36 |
|
| 37 |
const videoRef = useRef<HTMLVideoElement | null>(null);
|
| 38 |
const canvasRef = useRef<HTMLCanvasElement | null>(null);
|
| 39 |
const imageRef = useRef<HTMLImageElement | null>(null);
|
| 40 |
const webcamStreamRef = useRef<MediaStream | null>(null);
|
| 41 |
-
const { isLoaded, runInference } = useVLMContext();
|
| 42 |
|
| 43 |
// Webcam setup and teardown (unchanged)
|
| 44 |
useEffect(() => {
|
|
@@ -91,6 +92,7 @@ export default function MultiSourceCaptioningView() {
|
|
| 91 |
try {
|
| 92 |
setProcessing(true);
|
| 93 |
setError(null);
|
|
|
|
| 94 |
const fakeVideo = {
|
| 95 |
videoWidth: canvas.width,
|
| 96 |
videoHeight: canvas.height,
|
|
@@ -98,11 +100,14 @@ export default function MultiSourceCaptioningView() {
|
|
| 98 |
} as unknown as HTMLVideoElement;
|
| 99 |
const result = await runInference(fakeVideo, prompt);
|
| 100 |
setDebugOutput(result);
|
|
|
|
| 101 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
| 102 |
const boxes = extractJsonFromMarkdown(result) || [];
|
|
|
|
| 103 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
| 104 |
} catch (e) {
|
| 105 |
setError(e instanceof Error ? e.message : String(e));
|
|
|
|
| 106 |
} finally {
|
| 107 |
setProcessing(false);
|
| 108 |
}
|
|
@@ -132,6 +137,7 @@ export default function MultiSourceCaptioningView() {
|
|
| 132 |
try {
|
| 133 |
setProcessing(true);
|
| 134 |
setError(null);
|
|
|
|
| 135 |
const fakeVideo = {
|
| 136 |
videoWidth: canvas.width,
|
| 137 |
videoHeight: canvas.height,
|
|
@@ -139,11 +145,14 @@ export default function MultiSourceCaptioningView() {
|
|
| 139 |
} as unknown as HTMLVideoElement;
|
| 140 |
const result = await runInference(fakeVideo, prompt);
|
| 141 |
setDebugOutput(result);
|
|
|
|
| 142 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
| 143 |
const boxes = extractJsonFromMarkdown(result) || [];
|
|
|
|
| 144 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
| 145 |
} catch (e) {
|
| 146 |
setError(e instanceof Error ? e.message : String(e));
|
|
|
|
| 147 |
} finally {
|
| 148 |
setProcessing(false);
|
| 149 |
}
|
|
@@ -171,6 +180,7 @@ export default function MultiSourceCaptioningView() {
|
|
| 171 |
try {
|
| 172 |
setProcessing(true);
|
| 173 |
setError(null);
|
|
|
|
| 174 |
const fakeVideo = {
|
| 175 |
videoWidth: canvas.width,
|
| 176 |
videoHeight: canvas.height,
|
|
@@ -178,12 +188,15 @@ export default function MultiSourceCaptioningView() {
|
|
| 178 |
} as unknown as HTMLVideoElement;
|
| 179 |
const result = await runInference(fakeVideo, prompt);
|
| 180 |
setDebugOutput(result);
|
|
|
|
| 181 |
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
|
| 182 |
const boxes = extractJsonFromMarkdown(result) || [];
|
|
|
|
| 183 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
| 184 |
setImageProcessed(true);
|
| 185 |
} catch (e) {
|
| 186 |
setError(e instanceof Error ? e.message : String(e));
|
|
|
|
| 187 |
} finally {
|
| 188 |
setProcessing(false);
|
| 189 |
}
|
|
@@ -206,6 +219,7 @@ export default function MultiSourceCaptioningView() {
|
|
| 206 |
try {
|
| 207 |
setProcessing(true);
|
| 208 |
setError(null);
|
|
|
|
| 209 |
const fakeVideo = {
|
| 210 |
videoWidth: canvas.width,
|
| 211 |
videoHeight: canvas.height,
|
|
@@ -213,11 +227,14 @@ export default function MultiSourceCaptioningView() {
|
|
| 213 |
} as unknown as HTMLVideoElement;
|
| 214 |
const result = await runInference(fakeVideo, prompt);
|
| 215 |
setDebugOutput(result);
|
|
|
|
| 216 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
| 217 |
const boxes = extractJsonFromMarkdown(result) || [];
|
|
|
|
| 218 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
| 219 |
} catch (e) {
|
| 220 |
setError(e instanceof Error ? e.message : String(e));
|
|
|
|
| 221 |
} finally {
|
| 222 |
setProcessing(false);
|
| 223 |
}
|
|
@@ -247,6 +264,7 @@ export default function MultiSourceCaptioningView() {
|
|
| 247 |
try {
|
| 248 |
setProcessing(true);
|
| 249 |
setError(null);
|
|
|
|
| 250 |
const fakeVideo = {
|
| 251 |
videoWidth: canvas.width,
|
| 252 |
videoHeight: canvas.height,
|
|
@@ -254,11 +272,14 @@ export default function MultiSourceCaptioningView() {
|
|
| 254 |
} as unknown as HTMLVideoElement;
|
| 255 |
const result = await runInference(fakeVideo, prompt);
|
| 256 |
setDebugOutput(result);
|
|
|
|
| 257 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
| 258 |
const boxes = extractJsonFromMarkdown(result) || [];
|
|
|
|
| 259 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
| 260 |
} catch (e) {
|
| 261 |
setError(e instanceof Error ? e.message : String(e));
|
|
|
|
| 262 |
} finally {
|
| 263 |
setProcessing(false);
|
| 264 |
}
|
|
@@ -314,6 +335,10 @@ export default function MultiSourceCaptioningView() {
|
|
| 314 |
|
| 315 |
return (
|
| 316 |
<div className="absolute inset-0 text-white">
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
<div className="flex flex-col items-center justify-center h-full w-full">
|
| 318 |
{/* Mode Selector */}
|
| 319 |
<div className="mb-6">
|
|
|
|
| 33 |
const [debugOutput, setDebugOutput] = useState<string>("");
|
| 34 |
const [canvasDims, setCanvasDims] = useState<{w:number,h:number}|null>(null);
|
| 35 |
const [videoDims, setVideoDims] = useState<{w:number,h:number}|null>(null);
|
| 36 |
+
const [inferenceStatus, setInferenceStatus] = useState<string>("");
|
| 37 |
|
| 38 |
const videoRef = useRef<HTMLVideoElement | null>(null);
|
| 39 |
const canvasRef = useRef<HTMLCanvasElement | null>(null);
|
| 40 |
const imageRef = useRef<HTMLImageElement | null>(null);
|
| 41 |
const webcamStreamRef = useRef<MediaStream | null>(null);
|
| 42 |
+
const { isLoaded, isLoading, error: modelError, runInference } = useVLMContext();
|
| 43 |
|
| 44 |
// Webcam setup and teardown (unchanged)
|
| 45 |
useEffect(() => {
|
|
|
|
| 92 |
try {
|
| 93 |
setProcessing(true);
|
| 94 |
setError(null);
|
| 95 |
+
setInferenceStatus("Running inference...");
|
| 96 |
const fakeVideo = {
|
| 97 |
videoWidth: canvas.width,
|
| 98 |
videoHeight: canvas.height,
|
|
|
|
| 100 |
} as unknown as HTMLVideoElement;
|
| 101 |
const result = await runInference(fakeVideo, prompt);
|
| 102 |
setDebugOutput(result);
|
| 103 |
+
setInferenceStatus("Inference complete.");
|
| 104 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
| 105 |
const boxes = extractJsonFromMarkdown(result) || [];
|
| 106 |
+
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
| 107 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
| 108 |
} catch (e) {
|
| 109 |
setError(e instanceof Error ? e.message : String(e));
|
| 110 |
+
setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
|
| 111 |
} finally {
|
| 112 |
setProcessing(false);
|
| 113 |
}
|
|
|
|
| 137 |
try {
|
| 138 |
setProcessing(true);
|
| 139 |
setError(null);
|
| 140 |
+
setInferenceStatus("Running inference...");
|
| 141 |
const fakeVideo = {
|
| 142 |
videoWidth: canvas.width,
|
| 143 |
videoHeight: canvas.height,
|
|
|
|
| 145 |
} as unknown as HTMLVideoElement;
|
| 146 |
const result = await runInference(fakeVideo, prompt);
|
| 147 |
setDebugOutput(result);
|
| 148 |
+
setInferenceStatus("Inference complete.");
|
| 149 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
| 150 |
const boxes = extractJsonFromMarkdown(result) || [];
|
| 151 |
+
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
| 152 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
| 153 |
} catch (e) {
|
| 154 |
setError(e instanceof Error ? e.message : String(e));
|
| 155 |
+
setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
|
| 156 |
} finally {
|
| 157 |
setProcessing(false);
|
| 158 |
}
|
|
|
|
| 180 |
try {
|
| 181 |
setProcessing(true);
|
| 182 |
setError(null);
|
| 183 |
+
setInferenceStatus("Running inference...");
|
| 184 |
const fakeVideo = {
|
| 185 |
videoWidth: canvas.width,
|
| 186 |
videoHeight: canvas.height,
|
|
|
|
| 188 |
} as unknown as HTMLVideoElement;
|
| 189 |
const result = await runInference(fakeVideo, prompt);
|
| 190 |
setDebugOutput(result);
|
| 191 |
+
setInferenceStatus("Inference complete.");
|
| 192 |
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
|
| 193 |
const boxes = extractJsonFromMarkdown(result) || [];
|
| 194 |
+
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
| 195 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
| 196 |
setImageProcessed(true);
|
| 197 |
} catch (e) {
|
| 198 |
setError(e instanceof Error ? e.message : String(e));
|
| 199 |
+
setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
|
| 200 |
} finally {
|
| 201 |
setProcessing(false);
|
| 202 |
}
|
|
|
|
| 219 |
try {
|
| 220 |
setProcessing(true);
|
| 221 |
setError(null);
|
| 222 |
+
setInferenceStatus("Running inference...");
|
| 223 |
const fakeVideo = {
|
| 224 |
videoWidth: canvas.width,
|
| 225 |
videoHeight: canvas.height,
|
|
|
|
| 227 |
} as unknown as HTMLVideoElement;
|
| 228 |
const result = await runInference(fakeVideo, prompt);
|
| 229 |
setDebugOutput(result);
|
| 230 |
+
setInferenceStatus("Inference complete.");
|
| 231 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
| 232 |
const boxes = extractJsonFromMarkdown(result) || [];
|
| 233 |
+
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
| 234 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
| 235 |
} catch (e) {
|
| 236 |
setError(e instanceof Error ? e.message : String(e));
|
| 237 |
+
setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
|
| 238 |
} finally {
|
| 239 |
setProcessing(false);
|
| 240 |
}
|
|
|
|
| 264 |
try {
|
| 265 |
setProcessing(true);
|
| 266 |
setError(null);
|
| 267 |
+
setInferenceStatus("Running inference...");
|
| 268 |
const fakeVideo = {
|
| 269 |
videoWidth: canvas.width,
|
| 270 |
videoHeight: canvas.height,
|
|
|
|
| 272 |
} as unknown as HTMLVideoElement;
|
| 273 |
const result = await runInference(fakeVideo, prompt);
|
| 274 |
setDebugOutput(result);
|
| 275 |
+
setInferenceStatus("Inference complete.");
|
| 276 |
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
| 277 |
const boxes = extractJsonFromMarkdown(result) || [];
|
| 278 |
+
if (boxes.length === 0) setInferenceStatus("No boxes detected or model output invalid.");
|
| 279 |
drawBoundingBoxesOnCanvas(ctx, boxes);
|
| 280 |
} catch (e) {
|
| 281 |
setError(e instanceof Error ? e.message : String(e));
|
| 282 |
+
setInferenceStatus("Inference error: " + (e instanceof Error ? e.message : String(e)));
|
| 283 |
} finally {
|
| 284 |
setProcessing(false);
|
| 285 |
}
|
|
|
|
| 335 |
|
| 336 |
return (
|
| 337 |
<div className="absolute inset-0 text-white">
|
| 338 |
+
<div className="fixed top-0 left-0 w-full bg-gray-900 text-white text-center py-2 z-50">
|
| 339 |
+
{isLoading ? "Loading model..." : isLoaded ? "Model loaded" : modelError ? `Model error: ${modelError}` : "Model not loaded"}
|
| 340 |
+
</div>
|
| 341 |
+
<div className="text-center text-sm text-blue-300 mt-2">{inferenceStatus}</div>
|
| 342 |
<div className="flex flex-col items-center justify-center h-full w-full">
|
| 343 |
{/* Mode Selector */}
|
| 344 |
<div className="mb-6">
|