Spaces:
Sleeping
Sleeping
import React, { useState, useCallback, useRef } from 'react'; | |
import { VlaData, TaskSegment, Interaction } from './types'; | |
import { generateOverallGoal, generateTasksAndInteractions } from './services/backendService'; | |
import { extractFramesFromVideo } from './utils/videoProcessor'; | |
import { VideoUploader } from './components/VideoUploader'; | |
import { VideoPlayer } from './components/VideoPlayer'; | |
import { ResultsDisplay } from './components/ResultsDisplay'; | |
import { WandSparkles } from './components/Icons'; | |
// Type for the point to be highlighted on the video | |
type HighlightPoint = { x: number; y: number; isEditing: boolean } | null; | |
// Type for the coordinate picker callback function | |
type CoordinatePickerCallback = ((coords: { x: number; y: number }) => void) | null; | |
export default function App(): React.ReactNode { | |
const [videoFile, setVideoFile] = useState<File | null>(null); | |
const [videoSrc, setVideoSrc] = useState<string | null>(null); | |
const [videoDuration, setVideoDuration] = useState<number>(0); | |
const [vlaData, setVlaData] = useState<VlaData | null>(null); | |
const [isLoading, setIsLoading] = useState<boolean>(false); | |
const [loadingMessage, setLoadingMessage] = useState<string>(''); | |
const [error, setError] = useState<string | null>(null); | |
const [totalFrames, setTotalFrames] = useState<number>(0); | |
const [highlightPoint, setHighlightPoint] = useState<HighlightPoint>(null); | |
const [coordinatePicker, setCoordinatePicker] = useState<CoordinatePickerCallback>(null); | |
const [usedFallback, setUsedFallback] = useState<boolean>(false); | |
const videoRef = useRef<HTMLVideoElement>(null); | |
const isGeneratingRef = useRef(false); | |
const handleVideoUpload = useCallback((file: File) => { | |
if (file.type.startsWith('video/')) { | |
setVideoFile(file); | |
if (videoSrc) { | |
URL.revokeObjectURL(videoSrc); | |
} | |
const url = URL.createObjectURL(file); | |
setVideoSrc(url); | |
setVlaData(null); | |
setError(null); | |
setUsedFallback(false); | |
setVideoDuration(0); | |
setTotalFrames(0); | |
const videoElement = document.createElement('video'); | |
videoElement.preload = 'metadata'; | |
videoElement.src = url; | |
videoElement.onloadedmetadata = () => { | |
setVideoDuration(videoElement.duration); | |
}; | |
videoElement.onerror = () => { | |
setError("Could not read video metadata to get duration."); | |
}; | |
} else { | |
setError('Please upload a valid video file.'); | |
} | |
}, [videoSrc]); | |
const handleGenerate = useCallback(async () => { | |
if (!videoFile || isGeneratingRef.current) { | |
if (!videoFile) setError('No video file selected.'); | |
return; | |
} | |
isGeneratingRef.current = true; | |
setIsLoading(true); | |
setError(null); | |
setVlaData(null); | |
setUsedFallback(false); // Reset on each generation | |
try { | |
const FRAMES_PER_SECOND = 2; // Extract 2 frames per second | |
const MAX_FRAMES_TOTAL = 360; // Cap at 360 frames (e.g., 3 minutes at 2fps) to manage memory/performance | |
let calculatedFrames = Math.ceil(videoDuration * FRAMES_PER_SECOND); | |
if (calculatedFrames > MAX_FRAMES_TOTAL) { | |
calculatedFrames = MAX_FRAMES_TOTAL; | |
} | |
if (calculatedFrames === 0 && videoDuration > 0) { | |
calculatedFrames = 1; // ensure at least one frame for very short videos | |
} | |
setTotalFrames(calculatedFrames); | |
// Step 1: Extract frames | |
setLoadingMessage(`Step 1/3: Extracting ${calculatedFrames} frames from video...`); | |
const frames = await extractFramesFromVideo(videoFile, calculatedFrames); | |
if (frames.length === 0) { | |
throw new Error("Could not extract any frames from the video. The file might be corrupted or in an unsupported format."); | |
} | |
// Step 2: Generate Overall Goal | |
setLoadingMessage('Step 2/3: Determining overall goal...'); | |
const keyframes = [frames[0], frames[Math.floor(frames.length / 2)], frames[frames.length - 1]]; | |
const overallGoal = await generateOverallGoal(keyframes, videoDuration); | |
const initialVlaData: VlaData = { overallGoal, tasks: [] }; | |
setVlaData(initialVlaData); | |
// Step 3: Generate Task Segments and Interactions in one go | |
setLoadingMessage('Step 3/3: Analyzing tasks and interactions...'); | |
const vlaData = await generateTasksAndInteractions( | |
frames, | |
overallGoal, | |
videoDuration, | |
totalFrames, | |
(current, total) => { | |
// Progress callback - you could update loading message here | |
console.log(`Progress: ${current}/${total}`); | |
} | |
); | |
setVlaData(vlaData); | |
} catch (err) { | |
console.error(err); | |
const errorMessage = err instanceof Error ? err.message : 'An unknown error occurred.'; | |
setError(`Failed to process video. ${errorMessage}`); | |
setVlaData(null); // Clear partial data on major failure | |
} finally { | |
setIsLoading(false); | |
setLoadingMessage(''); | |
isGeneratingRef.current = false; | |
} | |
}, [videoFile, videoDuration]); | |
const handleDownload = useCallback(() => { | |
if (!vlaData || !videoFile) return; | |
const dataStr = JSON.stringify(vlaData, null, 2); | |
const dataBlob = new Blob([dataStr], { type: 'application/json' }); | |
const dataUrl = URL.createObjectURL(dataBlob); | |
const link = document.createElement('a'); | |
link.href = dataUrl; | |
const baseName = videoFile.name.substring(0, videoFile.name.lastIndexOf('.')) || videoFile.name; | |
link.download = `${baseName}_vla_data.json`; | |
document.body.appendChild(link); | |
link.click(); | |
document.body.removeChild(link); | |
URL.revokeObjectURL(dataUrl); | |
}, [vlaData, videoFile]); | |
const handleSeekToTime = useCallback((time: number) => { | |
if (videoRef.current) { | |
videoRef.current.currentTime = time; | |
} | |
}, []); | |
const handleUpdateInteraction = useCallback((taskId: number, interactionIndex: number, updatedInteraction: Interaction) => { | |
setVlaData(currentData => { | |
if (!currentData) return null; | |
const newTasks = currentData.tasks.map(task => { | |
if (task.id === taskId) { | |
const newInteractions = [...task.interactions]; | |
newInteractions[interactionIndex] = updatedInteraction; | |
return { ...task, interactions: newInteractions }; | |
} | |
return task; | |
}); | |
return { ...currentData, tasks: newTasks }; | |
}); | |
}, []); | |
const handleVideoClick = useCallback((coords: { x: number; y: number }) => { | |
if (coordinatePicker) { | |
coordinatePicker(coords); | |
} | |
}, [coordinatePicker]); | |
const handleHighlightPoint = useCallback((point: HighlightPoint) => { | |
// Prevent hover from overriding a sticky editing highlight | |
if (highlightPoint?.isEditing && !point?.isEditing) { | |
return; | |
} | |
setHighlightPoint(point); | |
}, [highlightPoint?.isEditing]); | |
const handleSetCoordinatePicker = useCallback((callback: CoordinatePickerCallback) => { | |
setCoordinatePicker(() => callback); | |
}, []); | |
return ( | |
<div className="min-h-screen bg-slate-900 text-slate-200 font-sans"> | |
<main className="grid grid-cols-1 lg:grid-cols-2 gap-6 p-4 md:p-8 max-w-screen-2xl mx-auto"> | |
{/* Left Column: Video and Controls */} | |
<div className="flex flex-col gap-6 lg:h-[calc(100vh-4rem)]"> | |
<header> | |
<h1 className="text-3xl md:text-4xl font-bold text-white tracking-tight">VLA Data Generator</h1> | |
<p className="text-slate-400 mt-2">Upload a screen recording to automatically generate structured data about user actions.</p> | |
</header> | |
<div | |
className={`bg-slate-800/50 rounded-2xl p-2 aspect-video flex-grow flex items-center justify-center ${coordinatePicker ? 'cursor-crosshair' : ''}`} | |
> | |
{videoSrc ? ( | |
<VideoPlayer | |
src={videoSrc} | |
ref={videoRef} | |
highlightPoint={highlightPoint} | |
onVideoClick={handleVideoClick} | |
/> | |
) : ( | |
<VideoUploader onVideoSelect={handleVideoUpload} /> | |
)} | |
</div> | |
{videoFile && ( | |
<button | |
onClick={handleGenerate} | |
disabled={isLoading || videoDuration === 0} | |
className="w-full flex items-center justify-center gap-3 bg-indigo-600 hover:bg-indigo-500 disabled:bg-indigo-800 disabled:text-slate-400 disabled:cursor-not-allowed text-white font-bold py-3 px-4 rounded-xl transition-all duration-300 text-lg shadow-lg shadow-indigo-900/50" | |
> | |
<WandSparkles className="w-6 h-6" /> | |
{isLoading ? 'Generating...' : (videoDuration === 0 ? 'Reading Video...' : 'Generate Action Data')} | |
</button> | |
)} | |
</div> | |
{/* Right Column: Results */} | |
<div className="bg-slate-800 rounded-2xl lg:h-[calc(100vh-4rem)] flex flex-col"> | |
<ResultsDisplay | |
vlaData={vlaData} | |
isLoading={isLoading} | |
loadingMessage={loadingMessage} | |
error={error} | |
hasVideo={!!videoFile} | |
videoDuration={videoDuration} | |
totalFrames={totalFrames} | |
usedFallback={usedFallback} | |
onDownload={handleDownload} | |
onSeekToTime={handleSeekToTime} | |
onUpdateInteraction={handleUpdateInteraction} | |
onHighlightPoint={handleHighlightPoint} | |
onSetCoordinatePicker={handleSetCoordinatePicker} | |
/> | |
</div> | |
</main> | |
</div> | |
); | |
} | |