|  | <!DOCTYPE html> | 
					
						
						|  | <html lang="en"> | 
					
						
						|  | <head> | 
					
						
						|  | <meta charset="UTF-8"> | 
					
						
						|  | <meta name="viewport" content="width=device-width, initial-scale=1.0"> | 
					
						
						|  | <title>Computer Agent Evaluation Viewer</title> | 
					
						
						|  | <style> | 
					
						
						|  |  | 
					
						
						|  | body { | 
					
						
						|  | font-family: Arial, sans-serif; | 
					
						
						|  | margin: 0; | 
					
						
						|  | padding: 20px; | 
					
						
						|  | background-color: #f5f5f5; | 
					
						
						|  | } | 
					
						
						|  | .container { | 
					
						
						|  | max-width: 1200px; | 
					
						
						|  | margin: 0 auto; | 
					
						
						|  | background-color: #fff; | 
					
						
						|  | padding: 20px; | 
					
						
						|  | border-radius: 8px; | 
					
						
						|  | box-shadow: 0 2px 10px rgba(0,0,0,0.1); | 
					
						
						|  | } | 
					
						
						|  | h1, h2, h3 { | 
					
						
						|  | color: #333; | 
					
						
						|  | } | 
					
						
						|  | select, input, button { | 
					
						
						|  | padding: 8px 12px; | 
					
						
						|  | margin: 5px 0; | 
					
						
						|  | border: 1px solid #ddd; | 
					
						
						|  | border-radius: 4px; | 
					
						
						|  | } | 
					
						
						|  | button { | 
					
						
						|  | background-color: #4a6cf7; | 
					
						
						|  | color: white; | 
					
						
						|  | cursor: pointer; | 
					
						
						|  | border: none; | 
					
						
						|  | } | 
					
						
						|  | button:hover { | 
					
						
						|  | background-color: #3a5ce5; | 
					
						
						|  | } | 
					
						
						|  | button:disabled { | 
					
						
						|  | background-color: #cccccc; | 
					
						
						|  | cursor: not-allowed; | 
					
						
						|  | } | 
					
						
						|  | .row { | 
					
						
						|  | display: flex; | 
					
						
						|  | margin-bottom: 20px; | 
					
						
						|  | } | 
					
						
						|  | .col { | 
					
						
						|  | flex: 1; | 
					
						
						|  | padding: 0 10px; | 
					
						
						|  | } | 
					
						
						|  | .image-viewer { | 
					
						
						|  | width: 100%; | 
					
						
						|  | max-height: 500px; | 
					
						
						|  | border: 1px solid #ddd; | 
					
						
						|  | border-radius: 4px; | 
					
						
						|  | overflow: hidden; | 
					
						
						|  | margin-bottom: 10px; | 
					
						
						|  | position: relative; | 
					
						
						|  | } | 
					
						
						|  | .image-viewer img { | 
					
						
						|  | max-width: 100%; | 
					
						
						|  | max-height: 450px; | 
					
						
						|  | display: block; | 
					
						
						|  | margin: 0 auto; | 
					
						
						|  | } | 
					
						
						|  | .image-controls { | 
					
						
						|  | display: flex; | 
					
						
						|  | justify-content: space-between; | 
					
						
						|  | align-items: center; | 
					
						
						|  | margin-top: 10px; | 
					
						
						|  | } | 
					
						
						|  | .nav-buttons { | 
					
						
						|  | display: flex; | 
					
						
						|  | gap: 10px; | 
					
						
						|  | } | 
					
						
						|  | .step { | 
					
						
						|  | border: 1px solid #ddd; | 
					
						
						|  | border-radius: 4px; | 
					
						
						|  | margin-bottom: 10px; | 
					
						
						|  | overflow: hidden; | 
					
						
						|  | } | 
					
						
						|  | .step-header { | 
					
						
						|  | background-color: #f0f0f0; | 
					
						
						|  | padding: 10px; | 
					
						
						|  | font-weight: bold; | 
					
						
						|  | cursor: pointer; | 
					
						
						|  | display: flex; | 
					
						
						|  | justify-content: space-between; | 
					
						
						|  | } | 
					
						
						|  | .step-content { | 
					
						
						|  | padding: 15px; | 
					
						
						|  | white-space: pre-wrap; | 
					
						
						|  | font-family: monospace; | 
					
						
						|  | background-color: #f9f9f9; | 
					
						
						|  | max-height: 300px; | 
					
						
						|  | overflow-y: auto; | 
					
						
						|  | } | 
					
						
						|  | .hidden { | 
					
						
						|  | display: none; | 
					
						
						|  | } | 
					
						
						|  | .status-success { | 
					
						
						|  | color: #22c55e; | 
					
						
						|  | font-weight: bold; | 
					
						
						|  | } | 
					
						
						|  | .status-failure { | 
					
						
						|  | color: #ef4444; | 
					
						
						|  | font-weight: bold; | 
					
						
						|  | } | 
					
						
						|  | .tabs { | 
					
						
						|  | display: flex; | 
					
						
						|  | border-bottom: 1px solid #ddd; | 
					
						
						|  | margin-bottom: 20px; | 
					
						
						|  | } | 
					
						
						|  | .tab { | 
					
						
						|  | padding: 10px 20px; | 
					
						
						|  | cursor: pointer; | 
					
						
						|  | border-bottom: 2px solid transparent; | 
					
						
						|  | } | 
					
						
						|  | .tab.active { | 
					
						
						|  | border-bottom-color: #4a6cf7; | 
					
						
						|  | font-weight: bold; | 
					
						
						|  | } | 
					
						
						|  | .tab-content { | 
					
						
						|  | display: none; | 
					
						
						|  | } | 
					
						
						|  | .tab-content.active { | 
					
						
						|  | display: block; | 
					
						
						|  | } | 
					
						
						|  | pre { | 
					
						
						|  | background-color: #f0f0f0; | 
					
						
						|  | padding: 10px; | 
					
						
						|  | border-radius: 4px; | 
					
						
						|  | overflow-x: auto; | 
					
						
						|  | white-space: pre-wrap; | 
					
						
						|  | } | 
					
						
						|  | .error-message { | 
					
						
						|  | background-color: #fee2e2; | 
					
						
						|  | color: #b91c1c; | 
					
						
						|  | padding: 10px; | 
					
						
						|  | border-radius: 4px; | 
					
						
						|  | margin: 10px 0; | 
					
						
						|  | } | 
					
						
						|  | .loading { | 
					
						
						|  | display: inline-block; | 
					
						
						|  | width: 20px; | 
					
						
						|  | height: 20px; | 
					
						
						|  | border: 2px solid #f3f3f3; | 
					
						
						|  | border-top: 2px solid #3498db; | 
					
						
						|  | border-radius: 50%; | 
					
						
						|  | animation: spin 1s linear infinite; | 
					
						
						|  | margin-left: 10px; | 
					
						
						|  | } | 
					
						
						|  | @keyframes spin { | 
					
						
						|  | 0% { transform: rotate(0deg); } | 
					
						
						|  | 100% { transform: rotate(360deg); } | 
					
						
						|  | } | 
					
						
						|  | </style> | 
					
						
						|  | </head> | 
					
						
						|  | <body> | 
					
						
						|  | <div class="container"> | 
					
						
						|  | <h1>Computer Agent Evaluation Viewer</h1> | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | <div style="margin-bottom: 20px; padding: 15px; background-color: #f0f0f0; border-radius: 8px;"> | 
					
						
						|  | <h2>Load Evaluation Data</h2> | 
					
						
						|  | <div style="display: flex; gap: 10px; margin-top: 10px;"> | 
					
						
						|  | <input type="text" id="base-path" placeholder="Base directory path (leave empty for default)" | 
					
						
						|  | style="flex-grow: 1; padding: 8px; border: 1px solid #ddd; border-radius: 4px;"> | 
					
						
						|  | <button id="refresh-evals-btn">Refresh</button> | 
					
						
						|  | </div> | 
					
						
						|  | <div style="margin-top: 10px;"> | 
					
						
						|  | <label for="eval-select">Select Evaluation:</label> | 
					
						
						|  | <select id="eval-select" style="min-width: 300px;"></select> | 
					
						
						|  | </div> | 
					
						
						|  | <div id="load-status" style="margin-top: 10px; font-style: italic;"></div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | <div class="row"> | 
					
						
						|  | <div class="col"> | 
					
						
						|  | <label for="example-select">Select Example:</label> | 
					
						
						|  | <select id="example-select"> | 
					
						
						|  | <option value="">-- Select Example --</option> | 
					
						
						|  | </select> | 
					
						
						|  | </div> | 
					
						
						|  | <div class="col"> | 
					
						
						|  | <label for="run-select">Select Run:</label> | 
					
						
						|  | <select id="run-select" disabled> | 
					
						
						|  | <option value="">-- Select Run --</option> | 
					
						
						|  | </select> | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | <div id="run-details" class="hidden"> | 
					
						
						|  | <div> | 
					
						
						|  | <h2>Task</h2> | 
					
						
						|  | <pre id="task-text"></pre> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  | <div> | 
					
						
						|  | <h2>Run Status</h2> | 
					
						
						|  | <div id="status-display"></div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | <div class="tabs"> | 
					
						
						|  | <div class="tab active" data-tab="screenshots">Screenshots</div> | 
					
						
						|  | <div class="tab" data-tab="agent-trace">Agent Trace</div> | 
					
						
						|  | <div class="tab" data-tab="raw-json">Raw JSON</div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | <div id="screenshots-tab" class="tab-content active"> | 
					
						
						|  | <div id="no-images" class="hidden"> | 
					
						
						|  | <p>No screenshots available for this run.</p> | 
					
						
						|  | </div> | 
					
						
						|  | <div id="image-container" class="image-viewer hidden"> | 
					
						
						|  | <img id="current-image" src="" alt="Screenshot"> | 
					
						
						|  | <p id="image-caption" class="text-center"></p> | 
					
						
						|  | </div> | 
					
						
						|  | <div class="image-controls hidden" id="image-controls"> | 
					
						
						|  | <div class="nav-buttons"> | 
					
						
						|  | <button id="prev-image">Previous</button> | 
					
						
						|  | <span id="image-counter">0 / 0</span> | 
					
						
						|  | <button id="next-image">Next</button> | 
					
						
						|  | </div> | 
					
						
						|  | <input type="range" id="image-slider" min="0" max="0" value="0" style="width: 100%"> | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | <div id="agent-trace-tab" class="tab-content"> | 
					
						
						|  | <div id="agent-steps"></div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | <div id="raw-json-tab" class="tab-content"> | 
					
						
						|  | <div id="json-loading-indicator" class="hidden"> | 
					
						
						|  | <p>Loading metadata... <span class="loading"></span></p> | 
					
						
						|  | </div> | 
					
						
						|  | <div id="json-error" class="error-message hidden"></div> | 
					
						
						|  | <pre id="raw-json"></pre> | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  | <script> | 
					
						
						|  |  | 
					
						
						|  | const appState = { | 
					
						
						|  | basePath: '', | 
					
						
						|  | evalId: null, | 
					
						
						|  | currentExampleId: null, | 
					
						
						|  | currentRunId: null, | 
					
						
						|  | currentImages: [], | 
					
						
						|  | currentImageIndex: 0, | 
					
						
						|  | loadedData: { | 
					
						
						|  | examples: {}, | 
					
						
						|  | runs: {}, | 
					
						
						|  | metadata: {}, | 
					
						
						|  | screenshots: {} | 
					
						
						|  | } | 
					
						
						|  | }; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | const basePathInput = document.getElementById('base-path'); | 
					
						
						|  | const refreshEvalsBtn = document.getElementById('refresh-evals-btn'); | 
					
						
						|  | const evalSelect = document.getElementById('eval-select'); | 
					
						
						|  | const loadStatusDisplay = document.getElementById('load-status'); | 
					
						
						|  | const exampleSelect = document.getElementById('example-select'); | 
					
						
						|  | const runSelect = document.getElementById('run-select'); | 
					
						
						|  | const runDetails = document.getElementById('run-details'); | 
					
						
						|  | const taskText = document.getElementById('task-text'); | 
					
						
						|  | const statusDisplay = document.getElementById('status-display'); | 
					
						
						|  | const imageContainer = document.getElementById('image-container'); | 
					
						
						|  | const noImages = document.getElementById('no-images'); | 
					
						
						|  | const imageControls = document.getElementById('image-controls'); | 
					
						
						|  | const currentImage = document.getElementById('current-image'); | 
					
						
						|  | const imageCaption = document.getElementById('image-caption'); | 
					
						
						|  | const imageCounter = document.getElementById('image-counter'); | 
					
						
						|  | const imageSlider = document.getElementById('image-slider'); | 
					
						
						|  | const prevImage = document.getElementById('prev-image'); | 
					
						
						|  | const nextImage = document.getElementById('next-image'); | 
					
						
						|  | const agentSteps = document.getElementById('agent-steps'); | 
					
						
						|  | const rawJson = document.getElementById('raw-json'); | 
					
						
						|  | const jsonLoadingIndicator = document.getElementById('json-loading-indicator'); | 
					
						
						|  | const jsonError = document.getElementById('json-error'); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | refreshEvalsBtn.addEventListener('click', loadEvaluations); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | async function loadEvaluations() { | 
					
						
						|  | appState.basePath = basePathInput.value.trim(); | 
					
						
						|  | loadStatusDisplay.textContent = 'Loading evaluations...'; | 
					
						
						|  | refreshEvalsBtn.disabled = true; | 
					
						
						|  |  | 
					
						
						|  | try { | 
					
						
						|  | const response = await fetch(`/api/evals?path=${encodeURIComponent(appState.basePath)}`); | 
					
						
						|  | if (!response.ok) { | 
					
						
						|  | const errorData = await response.json(); | 
					
						
						|  | throw new Error(errorData.error || 'Failed to load evaluations'); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | const evals = await response.json(); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | evalSelect.innerHTML = '<option value="">-- Select Evaluation --</option>'; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | evals.forEach(evalId => { | 
					
						
						|  | const option = document.createElement('option'); | 
					
						
						|  | option.value = evalId; | 
					
						
						|  | option.textContent = evalId; | 
					
						
						|  | evalSelect.appendChild(option); | 
					
						
						|  | }); | 
					
						
						|  |  | 
					
						
						|  | loadStatusDisplay.textContent = `Loaded ${evals.length} evaluations`; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if (evals.length > 0) { | 
					
						
						|  |  | 
					
						
						|  | evals.sort().reverse(); | 
					
						
						|  | evalSelect.value = evals[0]; | 
					
						
						|  |  | 
					
						
						|  | evalSelect.dispatchEvent(new Event('change')); | 
					
						
						|  | } | 
					
						
						|  | } catch (err) { | 
					
						
						|  | console.error('Error loading evaluations:', err); | 
					
						
						|  | loadStatusDisplay.textContent = `Error: ${err.message}`; | 
					
						
						|  | } finally { | 
					
						
						|  | refreshEvalsBtn.disabled = false; | 
					
						
						|  | } | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | evalSelect.addEventListener('change', async () => { | 
					
						
						|  | appState.evalId = evalSelect.value; | 
					
						
						|  |  | 
					
						
						|  | if (!appState.evalId) { | 
					
						
						|  | exampleSelect.innerHTML = '<option value="">-- Select Example --</option>'; | 
					
						
						|  | exampleSelect.disabled = true; | 
					
						
						|  | runSelect.innerHTML = '<option value="">-- Select Run --</option>'; | 
					
						
						|  | runSelect.disabled = true; | 
					
						
						|  | runDetails.classList.add('hidden'); | 
					
						
						|  | return; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | try { | 
					
						
						|  | loadStatusDisplay.textContent = 'Loading examples...'; | 
					
						
						|  | evalSelect.disabled = true; | 
					
						
						|  |  | 
					
						
						|  | const response = await fetch(`/api/eval/${appState.evalId}/examples?path=${encodeURIComponent(appState.basePath)}`); | 
					
						
						|  | if (!response.ok) { | 
					
						
						|  | const errorData = await response.json(); | 
					
						
						|  | throw new Error(errorData.error || 'Failed to load examples'); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | const examples = await response.json(); | 
					
						
						|  | appState.loadedData.examples = examples; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | exampleSelect.innerHTML = '<option value="">-- Select Example --</option>'; | 
					
						
						|  |  | 
					
						
						|  | for (const [exampleId, task] of Object.entries(examples)) { | 
					
						
						|  | const option = document.createElement('option'); | 
					
						
						|  | option.value = exampleId; | 
					
						
						|  | option.textContent = exampleId; | 
					
						
						|  | option.title = task; | 
					
						
						|  | exampleSelect.appendChild(option); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | exampleSelect.disabled = false; | 
					
						
						|  | runSelect.innerHTML = '<option value="">-- Select Run --</option>'; | 
					
						
						|  | runSelect.disabled = true; | 
					
						
						|  | runDetails.classList.add('hidden'); | 
					
						
						|  |  | 
					
						
						|  | loadStatusDisplay.textContent = `Loaded ${Object.keys(examples).length} examples`; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if (Object.keys(examples).length > 0) { | 
					
						
						|  | const firstExampleId = Object.keys(examples)[0]; | 
					
						
						|  | exampleSelect.value = firstExampleId; | 
					
						
						|  |  | 
					
						
						|  | exampleSelect.dispatchEvent(new Event('change')); | 
					
						
						|  | } | 
					
						
						|  | } catch (err) { | 
					
						
						|  | console.error('Error loading examples:', err); | 
					
						
						|  | loadStatusDisplay.textContent = `Error: ${err.message}`; | 
					
						
						|  | } finally { | 
					
						
						|  | evalSelect.disabled = false; | 
					
						
						|  | } | 
					
						
						|  | }); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | exampleSelect.addEventListener('change', async () => { | 
					
						
						|  | appState.currentExampleId = exampleSelect.value; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | runSelect.innerHTML = '<option value="">-- Select Run --</option>'; | 
					
						
						|  |  | 
					
						
						|  | if (!appState.currentExampleId) { | 
					
						
						|  | runSelect.disabled = true; | 
					
						
						|  | runDetails.classList.add('hidden'); | 
					
						
						|  | return; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | try { | 
					
						
						|  | loadStatusDisplay.textContent = 'Loading runs...'; | 
					
						
						|  | exampleSelect.disabled = true; | 
					
						
						|  |  | 
					
						
						|  | const response = await fetch(`/api/eval/${appState.evalId}/example/${appState.currentExampleId}/runs?path=${encodeURIComponent(appState.basePath)}`); | 
					
						
						|  | if (!response.ok) { | 
					
						
						|  | const errorData = await response.json(); | 
					
						
						|  | throw new Error(errorData.error || 'Failed to load runs'); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | const runs = await response.json(); | 
					
						
						|  | appState.loadedData.runs[appState.currentExampleId] = runs; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | runs.sort((a, b) => a.id.localeCompare(b.id, undefined, {numeric: true})); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | runSelect.innerHTML = '<option value="">-- Select Run --</option>'; | 
					
						
						|  | runs.forEach(run => { | 
					
						
						|  | const option = document.createElement('option'); | 
					
						
						|  | option.value = run.id; | 
					
						
						|  | option.textContent = `${run.id} (${run.status})`; | 
					
						
						|  | option.dataset.status = run.status; | 
					
						
						|  | runSelect.appendChild(option); | 
					
						
						|  | }); | 
					
						
						|  |  | 
					
						
						|  | runSelect.disabled = false; | 
					
						
						|  | runDetails.classList.add('hidden'); | 
					
						
						|  |  | 
					
						
						|  | loadStatusDisplay.textContent = `Loaded ${runs.length} runs`; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if (runs.length > 0) { | 
					
						
						|  | runSelect.value = runs[0].id; | 
					
						
						|  |  | 
					
						
						|  | runSelect.dispatchEvent(new Event('change')); | 
					
						
						|  | } | 
					
						
						|  | } catch (err) { | 
					
						
						|  | console.error('Error loading runs:', err); | 
					
						
						|  | loadStatusDisplay.textContent = `Error: ${err.message}`; | 
					
						
						|  | } finally { | 
					
						
						|  | exampleSelect.disabled = false; | 
					
						
						|  | } | 
					
						
						|  | }); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | runSelect.addEventListener('change', () => { | 
					
						
						|  | appState.currentRunId = runSelect.value; | 
					
						
						|  |  | 
					
						
						|  | if (appState.currentRunId && appState.currentExampleId) { | 
					
						
						|  | loadRunData(appState.currentExampleId, appState.currentRunId); | 
					
						
						|  | runDetails.classList.remove('hidden'); | 
					
						
						|  | } else { | 
					
						
						|  | runDetails.classList.add('hidden'); | 
					
						
						|  | } | 
					
						
						|  | }); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | async function loadRunData(exampleId, runId) { | 
					
						
						|  | loadStatusDisplay.textContent = 'Loading run data...'; | 
					
						
						|  | runSelect.disabled = true; | 
					
						
						|  | jsonLoadingIndicator.classList.remove('hidden'); | 
					
						
						|  | jsonError.classList.add('hidden'); | 
					
						
						|  |  | 
					
						
						|  | try { | 
					
						
						|  |  | 
					
						
						|  | const metadataResponse = await fetch(`/api/eval/${appState.evalId}/example/${exampleId}/run/${runId}/metadata?path=${encodeURIComponent(appState.basePath)}`); | 
					
						
						|  | let metadata; | 
					
						
						|  |  | 
					
						
						|  | if (metadataResponse.ok) { | 
					
						
						|  | metadata = await metadataResponse.json(); | 
					
						
						|  | } else { | 
					
						
						|  | const errorData = await metadataResponse.json(); | 
					
						
						|  | console.error('Error loading metadata:', errorData); | 
					
						
						|  | jsonError.textContent = `Error loading metadata: ${errorData.error || 'Unknown error'}`; | 
					
						
						|  | jsonError.classList.remove('hidden'); | 
					
						
						|  | metadata = null; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | appState.loadedData.metadata[exampleId] = appState.loadedData.metadata[exampleId] || {}; | 
					
						
						|  | appState.loadedData.metadata[exampleId][runId] = metadata; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | const task = appState.loadedData.examples[exampleId]; | 
					
						
						|  | taskText.textContent = task || "No task available"; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | let statusHtml = ""; | 
					
						
						|  |  | 
					
						
						|  | if (metadata) { | 
					
						
						|  | if (metadata.status === 'completed') { | 
					
						
						|  | statusHtml = `<p><span class="status-success">✓ Completed successfully</span></p>`; | 
					
						
						|  | } else { | 
					
						
						|  | statusHtml = `<p><span class="status-failure">✗ Failed</span></p>`; | 
					
						
						|  | if (metadata.error_message) { | 
					
						
						|  | statusHtml += `<p>Error: ${metadata.error_message}</p>`; | 
					
						
						|  | } | 
					
						
						|  | } | 
					
						
						|  | } else { | 
					
						
						|  | statusHtml = "<p>Status information not available</p>"; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | statusDisplay.innerHTML = statusHtml; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | const screenshotsResponse = await fetch(`/api/eval/${appState.evalId}/example/${exampleId}/run/${runId}/screenshots?path=${encodeURIComponent(appState.basePath)}`); | 
					
						
						|  | const screenshots = await screenshotsResponse.json(); | 
					
						
						|  |  | 
					
						
						|  | appState.loadedData.screenshots[exampleId] = appState.loadedData.screenshots[exampleId] || {}; | 
					
						
						|  | appState.loadedData.screenshots[exampleId][runId] = screenshots; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | loadScreenshots(exampleId, runId); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | renderAgentTrace(metadata); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if (metadata) { | 
					
						
						|  | rawJson.textContent = JSON.stringify(metadata, null, 2); | 
					
						
						|  | } else { | 
					
						
						|  | rawJson.textContent = "No metadata available"; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | document.querySelector('.tab[data-tab="screenshots"]').click(); | 
					
						
						|  |  | 
					
						
						|  | loadStatusDisplay.textContent = 'Run data loaded successfully'; | 
					
						
						|  | } catch (err) { | 
					
						
						|  | console.error('Error loading run data:', err); | 
					
						
						|  | loadStatusDisplay.textContent = `Error: ${err.message}`; | 
					
						
						|  | jsonError.textContent = `Error loading data: ${err.message}`; | 
					
						
						|  | jsonError.classList.remove('hidden'); | 
					
						
						|  | } finally { | 
					
						
						|  | jsonLoadingIndicator.classList.add('hidden'); | 
					
						
						|  | runSelect.disabled = false; | 
					
						
						|  | } | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | function loadScreenshots(exampleId, runId) { | 
					
						
						|  | appState.currentImages = appState.loadedData.screenshots[exampleId]?.[runId] || []; | 
					
						
						|  |  | 
					
						
						|  | if (appState.currentImages.length === 0) { | 
					
						
						|  | imageContainer.classList.add('hidden'); | 
					
						
						|  | imageControls.classList.add('hidden'); | 
					
						
						|  | noImages.classList.remove('hidden'); | 
					
						
						|  | return; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | noImages.classList.add('hidden'); | 
					
						
						|  | imageContainer.classList.remove('hidden'); | 
					
						
						|  | imageControls.classList.remove('hidden'); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | imageSlider.min = 0; | 
					
						
						|  | imageSlider.max = appState.currentImages.length - 1; | 
					
						
						|  | imageSlider.value = 0; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | appState.currentImageIndex = 0; | 
					
						
						|  | updateImageDisplay(); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | function updateImageDisplay() { | 
					
						
						|  | if (appState.currentImages.length === 0) return; | 
					
						
						|  |  | 
					
						
						|  | const image = appState.currentImages[appState.currentImageIndex]; | 
					
						
						|  | currentImage.src = image.path; | 
					
						
						|  | imageCaption.textContent = image.name; | 
					
						
						|  | imageCounter.textContent = `${appState.currentImageIndex + 1} / ${appState.currentImages.length}`; | 
					
						
						|  | imageSlider.value = appState.currentImageIndex; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | prevImage.disabled = appState.currentImageIndex === 0; | 
					
						
						|  | nextImage.disabled = appState.currentImageIndex === appState.currentImages.length - 1; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | prevImage.addEventListener('click', () => { | 
					
						
						|  | if (appState.currentImageIndex > 0) { | 
					
						
						|  | appState.currentImageIndex--; | 
					
						
						|  | updateImageDisplay(); | 
					
						
						|  | } | 
					
						
						|  | }); | 
					
						
						|  |  | 
					
						
						|  | nextImage.addEventListener('click', () => { | 
					
						
						|  | if (appState.currentImageIndex < appState.currentImages.length - 1) { | 
					
						
						|  | appState.currentImageIndex++; | 
					
						
						|  | updateImageDisplay(); | 
					
						
						|  | } | 
					
						
						|  | }); | 
					
						
						|  |  | 
					
						
						|  | imageSlider.addEventListener('input', () => { | 
					
						
						|  | appState.currentImageIndex = parseInt(imageSlider.value); | 
					
						
						|  | updateImageDisplay(); | 
					
						
						|  | }); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | document.querySelectorAll('.tab').forEach(tab => { | 
					
						
						|  | tab.addEventListener('click', () => { | 
					
						
						|  |  | 
					
						
						|  | document.querySelectorAll('.tab').forEach(t => t.classList.remove('active')); | 
					
						
						|  | tab.classList.add('active'); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | const tabId = tab.getAttribute('data-tab'); | 
					
						
						|  | document.querySelectorAll('.tab-content').forEach(content => { | 
					
						
						|  | content.classList.remove('active'); | 
					
						
						|  | }); | 
					
						
						|  | document.getElementById(`${tabId}-tab`).classList.add('active'); | 
					
						
						|  | }); | 
					
						
						|  | }); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | function renderAgentTrace(metadata) { | 
					
						
						|  | agentSteps.innerHTML = ''; | 
					
						
						|  |  | 
					
						
						|  | if (!metadata || !metadata.summary || metadata.summary.length === 0) { | 
					
						
						|  | agentSteps.innerHTML = '<p>No agent trace data available</p>'; | 
					
						
						|  | return; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | metadata.summary.forEach((step, index) => { | 
					
						
						|  | const stepDiv = document.createElement('div'); | 
					
						
						|  | stepDiv.className = 'step'; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | const headerDiv = document.createElement('div'); | 
					
						
						|  | headerDiv.className = 'step-header'; | 
					
						
						|  |  | 
					
						
						|  | let headerText = `Step ${index}`; | 
					
						
						|  | if (index === 0 && step.task) { | 
					
						
						|  | headerText = 'Task'; | 
					
						
						|  | } else if (step.model_output_message) { | 
					
						
						|  | headerText = 'Planning'; | 
					
						
						|  | } else if (step.tool_calls) { | 
					
						
						|  | headerText = `Action ${index}`; | 
					
						
						|  | } else if (step.error) { | 
					
						
						|  | headerText = 'Error'; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | headerDiv.innerHTML = `<span>${headerText}</span><span>▲</span>`; | 
					
						
						|  | stepDiv.appendChild(headerDiv); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | const contentDiv = document.createElement('div'); | 
					
						
						|  | contentDiv.className = 'step-content'; | 
					
						
						|  |  | 
					
						
						|  | contentDiv.style.display = 'block'; | 
					
						
						|  |  | 
					
						
						|  | let contentHtml = ''; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if (index === 0 && step.task) { | 
					
						
						|  |  | 
					
						
						|  | contentHtml += `${step.task}\n\n`; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if (step.model_output_message && step.model_output_message.content) { | 
					
						
						|  | contentHtml += `<strong>Model Output:</strong>\n${step.model_output_message.content}\n\n`; | 
					
						
						|  |  | 
					
						
						|  | if (step.plan) { | 
					
						
						|  | contentHtml += `<strong>Plan:</strong>\n${step.plan}\n\n`; | 
					
						
						|  | } | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if (step.tool_calls && step.tool_calls.length > 0) { | 
					
						
						|  | step.tool_calls.forEach(toolCall => { | 
					
						
						|  | if (toolCall.function) { | 
					
						
						|  | contentHtml += `<strong>Tool Call:</strong> ${toolCall.function.name}\n`; | 
					
						
						|  | if (toolCall.function.arguments) { | 
					
						
						|  | contentHtml += `<strong>Arguments:</strong>\n${toolCall.function.arguments}\n\n`; | 
					
						
						|  | } | 
					
						
						|  | } | 
					
						
						|  | }); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if (step.model_output) { | 
					
						
						|  | contentHtml += `<strong>Model Reasoning:</strong>\n${step.model_output}\n\n`; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if (step.observations) { | 
					
						
						|  | contentHtml += `<strong>Observations:</strong>\n${step.observations}\n\n`; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if (step.action_output) { | 
					
						
						|  | contentHtml += `<strong>Action Output:</strong>\n${step.action_output}\n\n`; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if (step.error) { | 
					
						
						|  | contentHtml += `<strong>Error Type:</strong> ${step.error.type || 'Unknown'}\n`; | 
					
						
						|  | if (step.error.message) { | 
					
						
						|  | contentHtml += `<strong>Error Message:</strong> ${step.error.message}\n`; | 
					
						
						|  | } | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | contentDiv.textContent = contentHtml || "No content available for this step"; | 
					
						
						|  | stepDiv.appendChild(contentDiv); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | headerDiv.addEventListener('click', () => { | 
					
						
						|  | const isHidden = contentDiv.style.display === 'none'; | 
					
						
						|  | contentDiv.style.display = isHidden ? 'block' : 'none'; | 
					
						
						|  | headerDiv.querySelector('span:last-child').textContent = isHidden ? '▲' : '▼'; | 
					
						
						|  | }); | 
					
						
						|  |  | 
					
						
						|  | agentSteps.appendChild(stepDiv); | 
					
						
						|  | }); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | document.addEventListener('keydown', (e) => { | 
					
						
						|  | if (!appState.currentImages || appState.currentImages.length === 0) return; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | const screenshotsTab = document.getElementById('screenshots-tab'); | 
					
						
						|  | if (!screenshotsTab.classList.contains('active')) return; | 
					
						
						|  |  | 
					
						
						|  | if (e.key === 'ArrowLeft' && appState.currentImageIndex > 0) { | 
					
						
						|  | appState.currentImageIndex--; | 
					
						
						|  | updateImageDisplay(); | 
					
						
						|  | } else if (e.key === 'ArrowRight' && appState.currentImageIndex < appState.currentImages.length - 1) { | 
					
						
						|  | appState.currentImageIndex++; | 
					
						
						|  | updateImageDisplay(); | 
					
						
						|  | } | 
					
						
						|  | }); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | document.addEventListener('DOMContentLoaded', loadEvaluations); | 
					
						
						|  | </script> | 
					
						
						|  | </body> | 
					
						
						|  | </html> |