Spaces:
Running
Running
Upload 36 files
Browse files- src/context/VLMContext.tsx +16 -4
- src/types/vlm.ts +1 -1
src/context/VLMContext.tsx
CHANGED
|
@@ -67,7 +67,7 @@ export const VLMProvider: React.FC<React.PropsWithChildren> = ({ children }) =>
|
|
| 67 |
);
|
| 68 |
|
| 69 |
const runInference = useCallback(
|
| 70 |
-
async (
|
| 71 |
if (inferenceLock.current) {
|
| 72 |
console.log("Inference already running, skipping frame");
|
| 73 |
return ""; // Return empty string to signal a skip
|
|
@@ -83,13 +83,25 @@ export const VLMProvider: React.FC<React.PropsWithChildren> = ({ children }) =>
|
|
| 83 |
}
|
| 84 |
const canvas = canvasRef.current;
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
const ctx = canvas.getContext("2d", { willReadFrequently: true });
|
| 90 |
if (!ctx) throw new Error("Could not get canvas context");
|
| 91 |
|
| 92 |
-
ctx.drawImage(
|
| 93 |
|
| 94 |
const frame = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
| 95 |
const rawImg = new RawImage(frame.data, frame.width, frame.height, 4);
|
|
|
|
| 67 |
);
|
| 68 |
|
| 69 |
const runInference = useCallback(
|
| 70 |
+
async (media: HTMLVideoElement | HTMLImageElement, instruction: string, onTextUpdate?: (text: string) => void): Promise<string> => {
|
| 71 |
if (inferenceLock.current) {
|
| 72 |
console.log("Inference already running, skipping frame");
|
| 73 |
return ""; // Return empty string to signal a skip
|
|
|
|
| 83 |
}
|
| 84 |
const canvas = canvasRef.current;
|
| 85 |
|
| 86 |
+
// Support both video and image
|
| 87 |
+
let width = 0;
|
| 88 |
+
let height = 0;
|
| 89 |
+
if (media instanceof HTMLVideoElement) {
|
| 90 |
+
width = media.videoWidth;
|
| 91 |
+
height = media.videoHeight;
|
| 92 |
+
} else if (media instanceof HTMLImageElement) {
|
| 93 |
+
width = media.naturalWidth;
|
| 94 |
+
height = media.naturalHeight;
|
| 95 |
+
} else {
|
| 96 |
+
throw new Error("Unsupported media type");
|
| 97 |
+
}
|
| 98 |
+
canvas.width = width;
|
| 99 |
+
canvas.height = height;
|
| 100 |
|
| 101 |
const ctx = canvas.getContext("2d", { willReadFrequently: true });
|
| 102 |
if (!ctx) throw new Error("Could not get canvas context");
|
| 103 |
|
| 104 |
+
ctx.drawImage(media, 0, 0, width, height);
|
| 105 |
|
| 106 |
const frame = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
| 107 |
const rawImg = new RawImage(frame.data, frame.width, frame.height, 4);
|
src/types/vlm.ts
CHANGED
|
@@ -4,7 +4,7 @@ export type VLMContextValue = {
|
|
| 4 |
error: string | null;
|
| 5 |
loadModel: (onProgress?: (msg: string) => void) => Promise<void>;
|
| 6 |
runInference: (
|
| 7 |
-
|
| 8 |
instruction: string,
|
| 9 |
onTextUpdate?: (text: string) => void,
|
| 10 |
) => Promise<string>;
|
|
|
|
| 4 |
error: string | null;
|
| 5 |
loadModel: (onProgress?: (msg: string) => void) => Promise<void>;
|
| 6 |
runInference: (
|
| 7 |
+
media: HTMLVideoElement | HTMLImageElement,
|
| 8 |
instruction: string,
|
| 9 |
onTextUpdate?: (text: string) => void,
|
| 10 |
) => Promise<string>;
|