Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
"""
|
2 |
-
|
3 |
-
|
4 |
"""
|
5 |
|
6 |
import asyncio
|
@@ -31,7 +31,7 @@ logger = logging.getLogger(__name__)
|
|
31 |
app = FastAPI(
|
32 |
title="NovaEval by Noveum.ai",
|
33 |
description="Advanced AI Model Evaluation Platform with Hugging Face Models",
|
34 |
-
version="
|
35 |
)
|
36 |
|
37 |
app.add_middleware(
|
@@ -508,13 +508,16 @@ async def get_homepage():
|
|
508 |
transform: translateY(-2px);
|
509 |
box-shadow: 0 10px 25px rgba(0,0,0,0.1);
|
510 |
}
|
511 |
-
.
|
512 |
-
|
513 |
-
|
514 |
}
|
515 |
-
.
|
516 |
-
|
517 |
-
|
|
|
|
|
|
|
518 |
}
|
519 |
.progress-bar {
|
520 |
transition: width 0.5s ease;
|
@@ -526,122 +529,125 @@ async def get_homepage():
|
|
526 |
from { opacity: 0; transform: translateX(-10px); }
|
527 |
to { opacity: 1; transform: translateX(0); }
|
528 |
}
|
529 |
-
.
|
530 |
-
|
|
|
|
|
|
|
|
|
531 |
}
|
532 |
</style>
|
533 |
</head>
|
534 |
<body class="bg-gray-50 min-h-screen">
|
535 |
<!-- Header -->
|
536 |
-
<header class="gradient-bg text-white py-
|
537 |
<div class="container mx-auto px-4">
|
538 |
<div class="flex items-center justify-between">
|
539 |
<div class="flex items-center space-x-3">
|
540 |
-
<div class="w-
|
541 |
-
<i data-lucide="zap" class="w-
|
542 |
</div>
|
543 |
<div>
|
544 |
-
<h1 class="text-
|
545 |
-
<p class="text-purple-100 text-
|
546 |
</div>
|
547 |
</div>
|
548 |
<div class="text-right">
|
549 |
-
<p class="text-purple-100 text-sm">Advanced AI Model Evaluation
|
550 |
-
<p class="text-purple-200 text-xs">Powered by Hugging Face Models</p>
|
551 |
</div>
|
552 |
</div>
|
553 |
</div>
|
554 |
</header>
|
555 |
|
556 |
-
<div class="container mx-auto px-4 py-
|
557 |
-
<!-- Main
|
558 |
-
<div class="grid grid-cols-1 lg:grid-cols-
|
559 |
-
<!-- Left Panel -
|
560 |
-
<div class="lg:col-span-
|
561 |
-
<!--
|
562 |
-
<div class="
|
563 |
-
|
564 |
-
|
565 |
-
<
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
<div class="mb-4">
|
570 |
-
<div class="relative">
|
571 |
-
<input type="text" id="modelSearch" placeholder="Search models..."
|
572 |
-
class="w-full pl-10 pr-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-purple-500 focus:border-transparent">
|
573 |
-
<i data-lucide="search" class="w-5 h-5 text-gray-400 absolute left-3 top-2.5"></i>
|
574 |
</div>
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
<button onclick="filterModels('
|
581 |
-
<button onclick="filterModels('
|
582 |
-
|
583 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
584 |
</div>
|
585 |
</div>
|
586 |
-
|
587 |
-
<!-- Model Grid -->
|
588 |
-
<div id="modelGrid" class="grid grid-cols-1 md:grid-cols-2 gap-4 max-h-96 overflow-y-auto">
|
589 |
-
<!-- Models will be populated by JavaScript -->
|
590 |
-
</div>
|
591 |
-
|
592 |
-
<div class="mt-4 text-sm text-gray-600">
|
593 |
-
<span id="selectedModelsCount">0</span> models selected
|
594 |
-
</div>
|
595 |
-
</div>
|
596 |
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
<button onclick="filterDatasets('
|
608 |
-
<button onclick="filterDatasets('
|
609 |
-
<button onclick="filterDatasets('
|
610 |
-
<button onclick="filterDatasets('
|
611 |
-
<button onclick="filterDatasets('
|
612 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
613 |
</div>
|
614 |
</div>
|
615 |
-
|
616 |
-
<!-- Dataset Grid -->
|
617 |
-
<div id="datasetGrid" class="space-y-3 max-h-64 overflow-y-auto">
|
618 |
-
<!-- Datasets will be populated by JavaScript -->
|
619 |
-
</div>
|
620 |
-
</div>
|
621 |
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
|
|
|
|
|
|
|
|
630 |
<!-- Metrics Selection -->
|
631 |
-
<div>
|
632 |
-
<label class="block text-sm font-medium text-gray-700 mb-
|
633 |
-
<div id="metricsGrid" class="space-y-
|
634 |
<!-- Metrics will be populated by JavaScript -->
|
635 |
</div>
|
636 |
</div>
|
637 |
|
638 |
<!-- Parameters -->
|
639 |
-
<div class="space-y-
|
640 |
<div>
|
641 |
-
<label class="block text-
|
642 |
<input type="range" id="sampleSize" min="10" max="1000" value="50"
|
643 |
-
class="w-full h-
|
644 |
-
<div class="flex justify-between text-xs text-gray-500
|
645 |
<span>10</span>
|
646 |
<span id="sampleSizeValue">50</span>
|
647 |
<span>1000</span>
|
@@ -649,52 +655,51 @@ async def get_homepage():
|
|
649 |
</div>
|
650 |
|
651 |
<div>
|
652 |
-
<label class="block text-
|
653 |
<input type="range" id="temperature" min="0" max="2" step="0.1" value="0.7"
|
654 |
-
class="w-full h-
|
655 |
-
<div class="flex justify-between text-xs text-gray-500
|
656 |
<span>0.0</span>
|
657 |
<span id="temperatureValue">0.7</span>
|
658 |
<span>2.0</span>
|
659 |
</div>
|
660 |
</div>
|
661 |
-
|
662 |
-
<div>
|
663 |
-
<label class="block text-sm font-medium text-gray-700 mb-2">Max Tokens</label>
|
664 |
-
<input type="range" id="maxTokens" min="128" max="2048" step="128" value="512"
|
665 |
-
class="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer">
|
666 |
-
<div class="flex justify-between text-xs text-gray-500 mt-1">
|
667 |
-
<span>128</span>
|
668 |
-
<span id="maxTokensValue">512</span>
|
669 |
-
<span>2048</span>
|
670 |
-
</div>
|
671 |
-
</div>
|
672 |
</div>
|
673 |
-
|
674 |
-
|
675 |
-
<!-- Start Evaluation Button -->
|
676 |
-
<div class="mt-6">
|
677 |
<button onclick="startEvaluation()" id="startBtn"
|
678 |
-
class="w-full gradient-bg text-white py-
|
679 |
-
<i data-lucide="play" class="w-
|
680 |
Start Evaluation
|
681 |
</button>
|
682 |
</div>
|
683 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
684 |
</div>
|
685 |
|
686 |
-
<!-- Right Panel - Progress &
|
687 |
<div class="space-y-6">
|
688 |
<!-- Progress -->
|
689 |
-
<div class="bg-white rounded-xl shadow-lg p-
|
690 |
-
<div class="flex items-center space-x-
|
691 |
-
<i data-lucide="activity" class="w-
|
692 |
-
<h2 class="text-
|
693 |
</div>
|
694 |
|
695 |
<div id="progressSection" class="hidden">
|
696 |
-
<div class="mb-
|
697 |
-
<div class="flex justify-between text-
|
698 |
<span id="currentStep">Initializing...</span>
|
699 |
<span id="progressPercent">0%</span>
|
700 |
</div>
|
@@ -704,35 +709,23 @@ async def get_homepage():
|
|
704 |
</div>
|
705 |
</div>
|
706 |
|
707 |
-
<div id="idleMessage" class="text-center text-gray-500 py-
|
708 |
-
<i data-lucide="clock" class="w-
|
709 |
-
<p>Ready to start
|
710 |
</div>
|
711 |
</div>
|
712 |
|
713 |
<!-- Live Logs -->
|
714 |
-
<div class="bg-white rounded-xl shadow-lg p-
|
715 |
-
<div class="flex items-center space-x-
|
716 |
-
<i data-lucide="terminal" class="w-
|
717 |
-
<h2 class="text-
|
718 |
</div>
|
719 |
|
720 |
-
<div id="logsContainer" class="bg-gray-900 text-green-400 p-
|
721 |
<div class="text-gray-500">Waiting for evaluation to start...</div>
|
722 |
</div>
|
723 |
</div>
|
724 |
-
|
725 |
-
<!-- Results -->
|
726 |
-
<div id="resultsSection" class="bg-white rounded-xl shadow-lg p-6 card-hover hidden">
|
727 |
-
<div class="flex items-center space-x-3 mb-4">
|
728 |
-
<i data-lucide="bar-chart" class="w-6 h-6 text-purple-600"></i>
|
729 |
-
<h2 class="text-xl font-semibold text-gray-800">Results</h2>
|
730 |
-
</div>
|
731 |
-
|
732 |
-
<div id="resultsContent">
|
733 |
-
<!-- Results will be populated by JavaScript -->
|
734 |
-
</div>
|
735 |
-
</div>
|
736 |
</div>
|
737 |
</div>
|
738 |
</div>
|
@@ -769,17 +762,6 @@ async def get_homepage():
|
|
769 |
document.getElementById('temperature').addEventListener('input', function() {
|
770 |
document.getElementById('temperatureValue').textContent = this.value;
|
771 |
});
|
772 |
-
|
773 |
-
// Max tokens slider
|
774 |
-
document.getElementById('maxTokens').addEventListener('input', function() {
|
775 |
-
document.getElementById('maxTokensValue').textContent = this.value;
|
776 |
-
});
|
777 |
-
|
778 |
-
// Model search
|
779 |
-
document.getElementById('modelSearch').addEventListener('input', function() {
|
780 |
-
const searchTerm = this.value.toLowerCase();
|
781 |
-
filterModelsBySearch(searchTerm);
|
782 |
-
});
|
783 |
}
|
784 |
|
785 |
function renderModels() {
|
@@ -796,50 +778,92 @@ async def get_homepage():
|
|
796 |
|
797 |
function createModelCard(model, category) {
|
798 |
const div = document.createElement('div');
|
799 |
-
div.className = `model-card p-
|
800 |
div.dataset.category = category;
|
801 |
div.dataset.modelId = model.id;
|
802 |
|
803 |
div.innerHTML = `
|
804 |
-
<div class="flex items-start justify-between mb-
|
805 |
<div class="flex-1">
|
806 |
<h3 class="font-semibold text-gray-800 text-sm">${model.name}</h3>
|
807 |
-
<p class="text-xs text-gray-500">${model.provider}
|
808 |
</div>
|
809 |
<div class="text-xs bg-gray-100 px-2 py-1 rounded">${model.size}</div>
|
810 |
</div>
|
811 |
-
<p class="text-xs text-gray-600 mb-2">${model.description}</p>
|
812 |
<div class="flex flex-wrap gap-1">
|
813 |
-
${model.capabilities.map(cap => `<span class="text-xs bg-purple-100 text-purple-700 px-
|
814 |
</div>
|
815 |
`;
|
816 |
|
817 |
-
div.addEventListener('click', () => toggleModelSelection(model.id, div));
|
818 |
return div;
|
819 |
}
|
820 |
|
821 |
-
function toggleModelSelection(modelId, element) {
|
822 |
if (selectedModels.includes(modelId)) {
|
823 |
selectedModels = selectedModels.filter(id => id !== modelId);
|
824 |
-
element.classList.remove('
|
825 |
} else {
|
826 |
selectedModels.push(modelId);
|
827 |
-
element.classList.add('
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
828 |
}
|
|
|
829 |
updateSelectedModelsCount();
|
830 |
}
|
831 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
832 |
function updateSelectedModelsCount() {
|
833 |
-
document.getElementById('selectedModelsCount').textContent = selectedModels.length
|
834 |
}
|
835 |
|
836 |
function filterModels(category) {
|
837 |
// Update filter buttons
|
838 |
document.querySelectorAll('[id^="filter-"]').forEach(btn => {
|
839 |
-
btn.className = btn.className.replace('
|
840 |
});
|
841 |
document.getElementById(`filter-${category}`).className =
|
842 |
-
document.getElementById(`filter-${category}`).className.replace('
|
843 |
|
844 |
// Filter model cards
|
845 |
document.querySelectorAll('.model-card').forEach(card => {
|
@@ -851,19 +875,6 @@ async def get_homepage():
|
|
851 |
});
|
852 |
}
|
853 |
|
854 |
-
function filterModelsBySearch(searchTerm) {
|
855 |
-
document.querySelectorAll('.model-card').forEach(card => {
|
856 |
-
const modelName = card.querySelector('h3').textContent.toLowerCase();
|
857 |
-
const modelProvider = card.querySelector('p').textContent.toLowerCase();
|
858 |
-
|
859 |
-
if (modelName.includes(searchTerm) || modelProvider.includes(searchTerm)) {
|
860 |
-
card.style.display = 'block';
|
861 |
-
} else {
|
862 |
-
card.style.display = 'none';
|
863 |
-
}
|
864 |
-
});
|
865 |
-
}
|
866 |
-
|
867 |
function renderDatasets() {
|
868 |
const grid = document.getElementById('datasetGrid');
|
869 |
grid.innerHTML = '';
|
@@ -878,46 +889,70 @@ async def get_homepage():
|
|
878 |
|
879 |
function createDatasetCard(dataset, category) {
|
880 |
const div = document.createElement('div');
|
881 |
-
div.className = `dataset-card p-
|
882 |
div.dataset.category = category;
|
883 |
div.dataset.datasetId = dataset.id;
|
884 |
|
885 |
div.innerHTML = `
|
886 |
-
<div class="flex items-start justify-between mb-
|
887 |
<div class="flex-1">
|
888 |
<h3 class="font-semibold text-gray-800 text-sm">${dataset.name}</h3>
|
889 |
-
<p class="text-xs text-gray-600">${dataset.description}</p>
|
890 |
</div>
|
891 |
-
<div class="text-xs bg-gray-100 px-
|
892 |
</div>
|
893 |
-
<div class="flex justify-between items-center">
|
894 |
-
<span class="text-xs bg-blue-100 text-blue-700 px-
|
895 |
<span class="text-xs text-gray-500">${dataset.difficulty}</span>
|
896 |
</div>
|
897 |
`;
|
898 |
|
899 |
-
div.addEventListener('click', () => selectDataset(dataset.id, div));
|
900 |
return div;
|
901 |
}
|
902 |
|
903 |
-
function selectDataset(datasetId, element) {
|
904 |
// Remove previous selection
|
905 |
document.querySelectorAll('.dataset-card').forEach(card => {
|
906 |
-
card.classList.remove('
|
907 |
});
|
908 |
|
909 |
// Add selection to clicked element
|
910 |
-
element.classList.add('
|
911 |
selectedDataset = datasetId;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
912 |
}
|
913 |
|
914 |
function filterDatasets(category) {
|
915 |
// Update filter buttons
|
916 |
document.querySelectorAll('[id^="dataset-filter-"]').forEach(btn => {
|
917 |
-
btn.className = btn.className.replace('
|
918 |
});
|
919 |
document.getElementById(`dataset-filter-${category}`).className =
|
920 |
-
document.getElementById(`dataset-filter-${category}`).className.replace('
|
921 |
|
922 |
// Filter dataset cards
|
923 |
document.querySelectorAll('.dataset-card').forEach(card => {
|
@@ -939,7 +974,7 @@ async def get_homepage():
|
|
939 |
|
940 |
div.innerHTML = `
|
941 |
<input type="checkbox" id="metric-${metric.id}" class="rounded text-purple-600 focus:ring-purple-500">
|
942 |
-
<label for="metric-${metric.id}" class="text-
|
943 |
`;
|
944 |
|
945 |
const checkbox = div.querySelector('input');
|
@@ -949,12 +984,47 @@ async def get_homepage():
|
|
949 |
} else {
|
950 |
selectedMetrics = selectedMetrics.filter(id => id !== metric.id);
|
951 |
}
|
|
|
952 |
});
|
953 |
|
954 |
grid.appendChild(div);
|
955 |
});
|
956 |
}
|
957 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
958 |
function startEvaluation() {
|
959 |
// Validation
|
960 |
if (selectedModels.length === 0) {
|
@@ -979,7 +1049,7 @@ async def get_homepage():
|
|
979 |
metrics: selectedMetrics,
|
980 |
sample_size: parseInt(document.getElementById('sampleSize').value),
|
981 |
temperature: parseFloat(document.getElementById('temperature').value),
|
982 |
-
max_tokens:
|
983 |
top_p: 0.9
|
984 |
};
|
985 |
|
@@ -1091,27 +1161,27 @@ async def get_homepage():
|
|
1091 |
}
|
1092 |
|
1093 |
function showResults(results) {
|
1094 |
-
const
|
1095 |
const content = document.getElementById('resultsContent');
|
1096 |
|
1097 |
-
let html = '<div class="
|
1098 |
|
1099 |
Object.keys(results).forEach(modelId => {
|
1100 |
-
const modelName = modelId
|
1101 |
const modelResults = results[modelId];
|
1102 |
|
1103 |
html += `
|
1104 |
-
<div class="border rounded-lg p-4">
|
1105 |
<h3 class="font-semibold text-gray-800 mb-3">${modelName}</h3>
|
1106 |
-
<div class="
|
1107 |
`;
|
1108 |
|
1109 |
Object.keys(modelResults).forEach(metric => {
|
1110 |
const value = modelResults[metric];
|
1111 |
html += `
|
1112 |
-
<div class="
|
1113 |
-
<
|
1114 |
-
<
|
1115 |
</div>
|
1116 |
`;
|
1117 |
});
|
@@ -1121,20 +1191,20 @@ async def get_homepage():
|
|
1121 |
|
1122 |
html += '</div>';
|
1123 |
content.innerHTML = html;
|
1124 |
-
|
1125 |
}
|
1126 |
|
1127 |
function disableStartButton() {
|
1128 |
const btn = document.getElementById('startBtn');
|
1129 |
btn.disabled = true;
|
1130 |
-
btn.innerHTML = '<i data-lucide="loader" class="w-
|
1131 |
lucide.createIcons();
|
1132 |
}
|
1133 |
|
1134 |
function enableStartButton() {
|
1135 |
const btn = document.getElementById('startBtn');
|
1136 |
btn.disabled = false;
|
1137 |
-
btn.innerHTML = '<i data-lucide="play" class="w-
|
1138 |
lucide.createIcons();
|
1139 |
}
|
1140 |
</script>
|
|
|
1 |
"""
|
2 |
+
Improved NovaEval Space by Noveum.ai
|
3 |
+
Advanced AI Model Evaluation Platform with Enhanced UI
|
4 |
"""
|
5 |
|
6 |
import asyncio
|
|
|
31 |
app = FastAPI(
|
32 |
title="NovaEval by Noveum.ai",
|
33 |
description="Advanced AI Model Evaluation Platform with Hugging Face Models",
|
34 |
+
version="3.0.0"
|
35 |
)
|
36 |
|
37 |
app.add_middleware(
|
|
|
508 |
transform: translateY(-2px);
|
509 |
box-shadow: 0 10px 25px rgba(0,0,0,0.1);
|
510 |
}
|
511 |
+
.tag-selected {
|
512 |
+
background: linear-gradient(45deg, #667eea, #764ba2);
|
513 |
+
color: white;
|
514 |
}
|
515 |
+
.tag-unselected {
|
516 |
+
background: #f3f4f6;
|
517 |
+
color: #374151;
|
518 |
+
}
|
519 |
+
.tag-unselected:hover {
|
520 |
+
background: #e5e7eb;
|
521 |
}
|
522 |
.progress-bar {
|
523 |
transition: width 0.5s ease;
|
|
|
529 |
from { opacity: 0; transform: translateX(-10px); }
|
530 |
to { opacity: 1; transform: translateX(0); }
|
531 |
}
|
532 |
+
.compact-card {
|
533 |
+
min-height: 120px;
|
534 |
+
}
|
535 |
+
.selection-panel {
|
536 |
+
max-height: 400px;
|
537 |
+
overflow-y: auto;
|
538 |
}
|
539 |
</style>
|
540 |
</head>
|
541 |
<body class="bg-gray-50 min-h-screen">
|
542 |
<!-- Header -->
|
543 |
+
<header class="gradient-bg text-white py-4 shadow-lg">
|
544 |
<div class="container mx-auto px-4">
|
545 |
<div class="flex items-center justify-between">
|
546 |
<div class="flex items-center space-x-3">
|
547 |
+
<div class="w-8 h-8 bg-white rounded-lg flex items-center justify-center">
|
548 |
+
<i data-lucide="zap" class="w-5 h-5 text-purple-600"></i>
|
549 |
</div>
|
550 |
<div>
|
551 |
+
<h1 class="text-xl font-bold">NovaEval</h1>
|
552 |
+
<p class="text-purple-100 text-xs">by <a href="https://noveum.ai" target="_blank" class="underline hover:text-white">Noveum.ai</a></p>
|
553 |
</div>
|
554 |
</div>
|
555 |
<div class="text-right">
|
556 |
+
<p class="text-purple-100 text-sm">Advanced AI Model Evaluation</p>
|
|
|
557 |
</div>
|
558 |
</div>
|
559 |
</div>
|
560 |
</header>
|
561 |
|
562 |
+
<div class="container mx-auto px-4 py-6">
|
563 |
+
<!-- Main Grid Layout -->
|
564 |
+
<div class="grid grid-cols-1 lg:grid-cols-4 gap-6">
|
565 |
+
<!-- Left Panel - Selection (3 columns) -->
|
566 |
+
<div class="lg:col-span-3 space-y-6">
|
567 |
+
<!-- Selection Row -->
|
568 |
+
<div class="grid grid-cols-1 md:grid-cols-3 gap-6">
|
569 |
+
<!-- Models Selection -->
|
570 |
+
<div class="bg-white rounded-xl shadow-lg p-4 card-hover">
|
571 |
+
<div class="flex items-center space-x-2 mb-4">
|
572 |
+
<i data-lucide="cpu" class="w-5 h-5 text-purple-600"></i>
|
573 |
+
<h2 class="text-lg font-semibold text-gray-800">Models</h2>
|
574 |
+
<span id="selectedModelsCount" class="text-sm text-gray-500">(0)</span>
|
|
|
|
|
|
|
|
|
|
|
575 |
</div>
|
576 |
+
|
577 |
+
<!-- Model Size Filters -->
|
578 |
+
<div class="flex flex-wrap gap-1 mb-3">
|
579 |
+
<button onclick="filterModels('all')" class="px-2 py-1 text-xs rounded-full tag-selected transition-all" id="filter-all">All</button>
|
580 |
+
<button onclick="filterModels('small')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="filter-small">Small</button>
|
581 |
+
<button onclick="filterModels('medium')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="filter-medium">Medium</button>
|
582 |
+
<button onclick="filterModels('large')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="filter-large">Large</button>
|
583 |
+
</div>
|
584 |
+
|
585 |
+
<!-- Selected Models Tags -->
|
586 |
+
<div id="selectedModelsTags" class="mb-3 min-h-[24px]">
|
587 |
+
<!-- Selected model tags will appear here -->
|
588 |
+
</div>
|
589 |
+
|
590 |
+
<!-- Model Selection Panel -->
|
591 |
+
<div id="modelGrid" class="selection-panel space-y-2">
|
592 |
+
<!-- Models will be populated by JavaScript -->
|
593 |
</div>
|
594 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
595 |
|
596 |
+
<!-- Dataset Selection -->
|
597 |
+
<div class="bg-white rounded-xl shadow-lg p-4 card-hover">
|
598 |
+
<div class="flex items-center space-x-2 mb-4">
|
599 |
+
<i data-lucide="database" class="w-5 h-5 text-purple-600"></i>
|
600 |
+
<h2 class="text-lg font-semibold text-gray-800">Dataset</h2>
|
601 |
+
</div>
|
602 |
+
|
603 |
+
<!-- Dataset Category Filters -->
|
604 |
+
<div class="flex flex-wrap gap-1 mb-3">
|
605 |
+
<button onclick="filterDatasets('all')" class="px-2 py-1 text-xs rounded-full tag-selected transition-all" id="dataset-filter-all">All</button>
|
606 |
+
<button onclick="filterDatasets('reasoning')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="dataset-filter-reasoning">Reasoning</button>
|
607 |
+
<button onclick="filterDatasets('knowledge')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="dataset-filter-knowledge">Knowledge</button>
|
608 |
+
<button onclick="filterDatasets('math')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="dataset-filter-math">Math</button>
|
609 |
+
<button onclick="filterDatasets('code')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="dataset-filter-code">Code</button>
|
610 |
+
<button onclick="filterDatasets('language')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="dataset-filter-language">Language</button>
|
611 |
+
</div>
|
612 |
+
|
613 |
+
<!-- Selected Dataset Tag -->
|
614 |
+
<div id="selectedDatasetTag" class="mb-3 min-h-[24px]">
|
615 |
+
<!-- Selected dataset tag will appear here -->
|
616 |
+
</div>
|
617 |
+
|
618 |
+
<!-- Dataset Selection Panel -->
|
619 |
+
<div id="datasetGrid" class="selection-panel space-y-2">
|
620 |
+
<!-- Datasets will be populated by JavaScript -->
|
621 |
</div>
|
622 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
623 |
|
624 |
+
<!-- Metrics & Config -->
|
625 |
+
<div class="bg-white rounded-xl shadow-lg p-4 card-hover">
|
626 |
+
<div class="flex items-center space-x-2 mb-4">
|
627 |
+
<i data-lucide="settings" class="w-5 h-5 text-purple-600"></i>
|
628 |
+
<h2 class="text-lg font-semibold text-gray-800">Config</h2>
|
629 |
+
</div>
|
630 |
+
|
631 |
+
<!-- Selected Metrics Tags -->
|
632 |
+
<div id="selectedMetricsTags" class="mb-3 min-h-[24px]">
|
633 |
+
<!-- Selected metrics tags will appear here -->
|
634 |
+
</div>
|
635 |
+
|
636 |
<!-- Metrics Selection -->
|
637 |
+
<div class="mb-4">
|
638 |
+
<label class="block text-sm font-medium text-gray-700 mb-2">Metrics</label>
|
639 |
+
<div id="metricsGrid" class="space-y-1">
|
640 |
<!-- Metrics will be populated by JavaScript -->
|
641 |
</div>
|
642 |
</div>
|
643 |
|
644 |
<!-- Parameters -->
|
645 |
+
<div class="space-y-3">
|
646 |
<div>
|
647 |
+
<label class="block text-xs font-medium text-gray-700 mb-1">Sample Size</label>
|
648 |
<input type="range" id="sampleSize" min="10" max="1000" value="50"
|
649 |
+
class="w-full h-1 bg-gray-200 rounded-lg appearance-none cursor-pointer">
|
650 |
+
<div class="flex justify-between text-xs text-gray-500">
|
651 |
<span>10</span>
|
652 |
<span id="sampleSizeValue">50</span>
|
653 |
<span>1000</span>
|
|
|
655 |
</div>
|
656 |
|
657 |
<div>
|
658 |
+
<label class="block text-xs font-medium text-gray-700 mb-1">Temperature</label>
|
659 |
<input type="range" id="temperature" min="0" max="2" step="0.1" value="0.7"
|
660 |
+
class="w-full h-1 bg-gray-200 rounded-lg appearance-none cursor-pointer">
|
661 |
+
<div class="flex justify-between text-xs text-gray-500">
|
662 |
<span>0.0</span>
|
663 |
<span id="temperatureValue">0.7</span>
|
664 |
<span>2.0</span>
|
665 |
</div>
|
666 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
667 |
</div>
|
668 |
+
|
669 |
+
<!-- Start Button -->
|
|
|
|
|
670 |
<button onclick="startEvaluation()" id="startBtn"
|
671 |
+
class="w-full gradient-bg text-white py-2 px-4 rounded-lg font-semibold hover:opacity-90 transition-opacity disabled:opacity-50 disabled:cursor-not-allowed mt-4 text-sm">
|
672 |
+
<i data-lucide="play" class="w-4 h-4 inline mr-1"></i>
|
673 |
Start Evaluation
|
674 |
</button>
|
675 |
</div>
|
676 |
</div>
|
677 |
+
|
678 |
+
<!-- Results Panel -->
|
679 |
+
<div id="resultsPanel" class="bg-white rounded-xl shadow-lg p-6 card-hover hidden">
|
680 |
+
<div class="flex items-center space-x-3 mb-4">
|
681 |
+
<i data-lucide="bar-chart" class="w-6 h-6 text-purple-600"></i>
|
682 |
+
<h2 class="text-xl font-semibold text-gray-800">Evaluation Results</h2>
|
683 |
+
</div>
|
684 |
+
|
685 |
+
<div id="resultsContent">
|
686 |
+
<!-- Results will be populated by JavaScript -->
|
687 |
+
</div>
|
688 |
+
</div>
|
689 |
</div>
|
690 |
|
691 |
+
<!-- Right Panel - Progress & Logs (1 column) -->
|
692 |
<div class="space-y-6">
|
693 |
<!-- Progress -->
|
694 |
+
<div class="bg-white rounded-xl shadow-lg p-4 card-hover">
|
695 |
+
<div class="flex items-center space-x-2 mb-3">
|
696 |
+
<i data-lucide="activity" class="w-5 h-5 text-purple-600"></i>
|
697 |
+
<h2 class="text-lg font-semibold text-gray-800">Progress</h2>
|
698 |
</div>
|
699 |
|
700 |
<div id="progressSection" class="hidden">
|
701 |
+
<div class="mb-3">
|
702 |
+
<div class="flex justify-between text-xs text-gray-600 mb-1">
|
703 |
<span id="currentStep">Initializing...</span>
|
704 |
<span id="progressPercent">0%</span>
|
705 |
</div>
|
|
|
709 |
</div>
|
710 |
</div>
|
711 |
|
712 |
+
<div id="idleMessage" class="text-center text-gray-500 py-4">
|
713 |
+
<i data-lucide="clock" class="w-8 h-8 mx-auto mb-2 text-gray-300"></i>
|
714 |
+
<p class="text-sm">Ready to start</p>
|
715 |
</div>
|
716 |
</div>
|
717 |
|
718 |
<!-- Live Logs -->
|
719 |
+
<div class="bg-white rounded-xl shadow-lg p-4 card-hover">
|
720 |
+
<div class="flex items-center space-x-2 mb-3">
|
721 |
+
<i data-lucide="terminal" class="w-5 h-5 text-purple-600"></i>
|
722 |
+
<h2 class="text-lg font-semibold text-gray-800">Live Logs</h2>
|
723 |
</div>
|
724 |
|
725 |
+
<div id="logsContainer" class="bg-gray-900 text-green-400 p-3 rounded-lg h-64 overflow-y-auto font-mono text-xs">
|
726 |
<div class="text-gray-500">Waiting for evaluation to start...</div>
|
727 |
</div>
|
728 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
729 |
</div>
|
730 |
</div>
|
731 |
</div>
|
|
|
762 |
document.getElementById('temperature').addEventListener('input', function() {
|
763 |
document.getElementById('temperatureValue').textContent = this.value;
|
764 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
765 |
}
|
766 |
|
767 |
function renderModels() {
|
|
|
778 |
|
779 |
function createModelCard(model, category) {
|
780 |
const div = document.createElement('div');
|
781 |
+
div.className = `model-card p-2 border rounded-lg cursor-pointer hover:shadow-md transition-all compact-card`;
|
782 |
div.dataset.category = category;
|
783 |
div.dataset.modelId = model.id;
|
784 |
|
785 |
div.innerHTML = `
|
786 |
+
<div class="flex items-start justify-between mb-1">
|
787 |
<div class="flex-1">
|
788 |
<h3 class="font-semibold text-gray-800 text-sm">${model.name}</h3>
|
789 |
+
<p class="text-xs text-gray-500">${model.provider}</p>
|
790 |
</div>
|
791 |
<div class="text-xs bg-gray-100 px-2 py-1 rounded">${model.size}</div>
|
792 |
</div>
|
793 |
+
<p class="text-xs text-gray-600 mb-2 line-clamp-2">${model.description}</p>
|
794 |
<div class="flex flex-wrap gap-1">
|
795 |
+
${model.capabilities.slice(0, 2).map(cap => `<span class="text-xs bg-purple-100 text-purple-700 px-1 py-0.5 rounded">${cap}</span>`).join('')}
|
796 |
</div>
|
797 |
`;
|
798 |
|
799 |
+
div.addEventListener('click', () => toggleModelSelection(model.id, model.name, div));
|
800 |
return div;
|
801 |
}
|
802 |
|
803 |
+
function toggleModelSelection(modelId, modelName, element) {
|
804 |
if (selectedModels.includes(modelId)) {
|
805 |
selectedModels = selectedModels.filter(id => id !== modelId);
|
806 |
+
element.classList.remove('ring-2', 'ring-purple-500', 'bg-purple-50');
|
807 |
} else {
|
808 |
selectedModels.push(modelId);
|
809 |
+
element.classList.add('ring-2', 'ring-purple-500', 'bg-purple-50');
|
810 |
+
}
|
811 |
+
updateSelectedModelsTags();
|
812 |
+
updateSelectedModelsCount();
|
813 |
+
}
|
814 |
+
|
815 |
+
function updateSelectedModelsTags() {
|
816 |
+
const container = document.getElementById('selectedModelsTags');
|
817 |
+
container.innerHTML = '';
|
818 |
+
|
819 |
+
selectedModels.forEach(modelId => {
|
820 |
+
const modelName = getModelName(modelId);
|
821 |
+
const tag = document.createElement('span');
|
822 |
+
tag.className = 'inline-flex items-center px-2 py-1 text-xs bg-purple-100 text-purple-800 rounded-full mr-1 mb-1';
|
823 |
+
tag.innerHTML = `
|
824 |
+
${modelName}
|
825 |
+
<button onclick="removeModel('${modelId}')" class="ml-1 text-purple-600 hover:text-purple-800">
|
826 |
+
<i data-lucide="x" class="w-3 h-3"></i>
|
827 |
+
</button>
|
828 |
+
`;
|
829 |
+
container.appendChild(tag);
|
830 |
+
});
|
831 |
+
lucide.createIcons();
|
832 |
+
}
|
833 |
+
|
834 |
+
function removeModel(modelId) {
|
835 |
+
selectedModels = selectedModels.filter(id => id !== modelId);
|
836 |
+
// Update UI
|
837 |
+
const modelCard = document.querySelector(`[data-model-id="${modelId}"]`);
|
838 |
+
if (modelCard) {
|
839 |
+
modelCard.classList.remove('ring-2', 'ring-purple-500', 'bg-purple-50');
|
840 |
}
|
841 |
+
updateSelectedModelsTags();
|
842 |
updateSelectedModelsCount();
|
843 |
}
|
844 |
|
845 |
+
function getModelName(modelId) {
|
846 |
+
for (const category of Object.values(models)) {
|
847 |
+
for (const model of category) {
|
848 |
+
if (model.id === modelId) {
|
849 |
+
return model.name;
|
850 |
+
}
|
851 |
+
}
|
852 |
+
}
|
853 |
+
return modelId.split('/').pop();
|
854 |
+
}
|
855 |
+
|
856 |
function updateSelectedModelsCount() {
|
857 |
+
document.getElementById('selectedModelsCount').textContent = `(${selectedModels.length})`;
|
858 |
}
|
859 |
|
860 |
function filterModels(category) {
|
861 |
// Update filter buttons
|
862 |
document.querySelectorAll('[id^="filter-"]').forEach(btn => {
|
863 |
+
btn.className = btn.className.replace('tag-selected', 'tag-unselected');
|
864 |
});
|
865 |
document.getElementById(`filter-${category}`).className =
|
866 |
+
document.getElementById(`filter-${category}`).className.replace('tag-unselected', 'tag-selected');
|
867 |
|
868 |
// Filter model cards
|
869 |
document.querySelectorAll('.model-card').forEach(card => {
|
|
|
875 |
});
|
876 |
}
|
877 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
878 |
function renderDatasets() {
|
879 |
const grid = document.getElementById('datasetGrid');
|
880 |
grid.innerHTML = '';
|
|
|
889 |
|
890 |
function createDatasetCard(dataset, category) {
|
891 |
const div = document.createElement('div');
|
892 |
+
div.className = `dataset-card p-2 border rounded-lg cursor-pointer hover:shadow-md transition-all compact-card`;
|
893 |
div.dataset.category = category;
|
894 |
div.dataset.datasetId = dataset.id;
|
895 |
|
896 |
div.innerHTML = `
|
897 |
+
<div class="flex items-start justify-between mb-1">
|
898 |
<div class="flex-1">
|
899 |
<h3 class="font-semibold text-gray-800 text-sm">${dataset.name}</h3>
|
900 |
+
<p class="text-xs text-gray-600 line-clamp-2">${dataset.description}</p>
|
901 |
</div>
|
902 |
+
<div class="text-xs bg-gray-100 px-1 py-0.5 rounded">${dataset.samples.toLocaleString()}</div>
|
903 |
</div>
|
904 |
+
<div class="flex justify-between items-center mt-2">
|
905 |
+
<span class="text-xs bg-blue-100 text-blue-700 px-1 py-0.5 rounded">${dataset.task_type}</span>
|
906 |
<span class="text-xs text-gray-500">${dataset.difficulty}</span>
|
907 |
</div>
|
908 |
`;
|
909 |
|
910 |
+
div.addEventListener('click', () => selectDataset(dataset.id, dataset.name, div));
|
911 |
return div;
|
912 |
}
|
913 |
|
914 |
+
function selectDataset(datasetId, datasetName, element) {
|
915 |
// Remove previous selection
|
916 |
document.querySelectorAll('.dataset-card').forEach(card => {
|
917 |
+
card.classList.remove('ring-2', 'ring-purple-500', 'bg-purple-50');
|
918 |
});
|
919 |
|
920 |
// Add selection to clicked element
|
921 |
+
element.classList.add('ring-2', 'ring-purple-500', 'bg-purple-50');
|
922 |
selectedDataset = datasetId;
|
923 |
+
|
924 |
+
// Update selected dataset tag
|
925 |
+
updateSelectedDatasetTag(datasetName);
|
926 |
+
}
|
927 |
+
|
928 |
+
function updateSelectedDatasetTag(datasetName) {
|
929 |
+
const container = document.getElementById('selectedDatasetTag');
|
930 |
+
container.innerHTML = `
|
931 |
+
<span class="inline-flex items-center px-2 py-1 text-xs bg-blue-100 text-blue-800 rounded-full">
|
932 |
+
${datasetName}
|
933 |
+
<button onclick="removeDataset()" class="ml-1 text-blue-600 hover:text-blue-800">
|
934 |
+
<i data-lucide="x" class="w-3 h-3"></i>
|
935 |
+
</button>
|
936 |
+
</span>
|
937 |
+
`;
|
938 |
+
lucide.createIcons();
|
939 |
+
}
|
940 |
+
|
941 |
+
function removeDataset() {
|
942 |
+
selectedDataset = null;
|
943 |
+
document.getElementById('selectedDatasetTag').innerHTML = '';
|
944 |
+
document.querySelectorAll('.dataset-card').forEach(card => {
|
945 |
+
card.classList.remove('ring-2', 'ring-purple-500', 'bg-purple-50');
|
946 |
+
});
|
947 |
}
|
948 |
|
949 |
function filterDatasets(category) {
|
950 |
// Update filter buttons
|
951 |
document.querySelectorAll('[id^="dataset-filter-"]').forEach(btn => {
|
952 |
+
btn.className = btn.className.replace('tag-selected', 'tag-unselected');
|
953 |
});
|
954 |
document.getElementById(`dataset-filter-${category}`).className =
|
955 |
+
document.getElementById(`dataset-filter-${category}`).className.replace('tag-unselected', 'tag-selected');
|
956 |
|
957 |
// Filter dataset cards
|
958 |
document.querySelectorAll('.dataset-card').forEach(card => {
|
|
|
974 |
|
975 |
div.innerHTML = `
|
976 |
<input type="checkbox" id="metric-${metric.id}" class="rounded text-purple-600 focus:ring-purple-500">
|
977 |
+
<label for="metric-${metric.id}" class="text-xs text-gray-700 cursor-pointer">${metric.name}</label>
|
978 |
`;
|
979 |
|
980 |
const checkbox = div.querySelector('input');
|
|
|
984 |
} else {
|
985 |
selectedMetrics = selectedMetrics.filter(id => id !== metric.id);
|
986 |
}
|
987 |
+
updateSelectedMetricsTags();
|
988 |
});
|
989 |
|
990 |
grid.appendChild(div);
|
991 |
});
|
992 |
}
|
993 |
|
994 |
+
function updateSelectedMetricsTags() {
|
995 |
+
const container = document.getElementById('selectedMetricsTags');
|
996 |
+
container.innerHTML = '';
|
997 |
+
|
998 |
+
selectedMetrics.forEach(metricId => {
|
999 |
+
const metricName = getMetricName(metricId);
|
1000 |
+
const tag = document.createElement('span');
|
1001 |
+
tag.className = 'inline-flex items-center px-2 py-1 text-xs bg-green-100 text-green-800 rounded-full mr-1 mb-1';
|
1002 |
+
tag.innerHTML = `
|
1003 |
+
${metricName}
|
1004 |
+
<button onclick="removeMetric('${metricId}')" class="ml-1 text-green-600 hover:text-green-800">
|
1005 |
+
<i data-lucide="x" class="w-3 h-3"></i>
|
1006 |
+
</button>
|
1007 |
+
`;
|
1008 |
+
container.appendChild(tag);
|
1009 |
+
});
|
1010 |
+
lucide.createIcons();
|
1011 |
+
}
|
1012 |
+
|
1013 |
+
function removeMetric(metricId) {
|
1014 |
+
selectedMetrics = selectedMetrics.filter(id => id !== metricId);
|
1015 |
+
// Update checkbox
|
1016 |
+
const checkbox = document.getElementById(`metric-${metricId}`);
|
1017 |
+
if (checkbox) {
|
1018 |
+
checkbox.checked = false;
|
1019 |
+
}
|
1020 |
+
updateSelectedMetricsTags();
|
1021 |
+
}
|
1022 |
+
|
1023 |
+
function getMetricName(metricId) {
|
1024 |
+
const metric = metrics.find(m => m.id === metricId);
|
1025 |
+
return metric ? metric.name : metricId;
|
1026 |
+
}
|
1027 |
+
|
1028 |
function startEvaluation() {
|
1029 |
// Validation
|
1030 |
if (selectedModels.length === 0) {
|
|
|
1049 |
metrics: selectedMetrics,
|
1050 |
sample_size: parseInt(document.getElementById('sampleSize').value),
|
1051 |
temperature: parseFloat(document.getElementById('temperature').value),
|
1052 |
+
max_tokens: 512,
|
1053 |
top_p: 0.9
|
1054 |
};
|
1055 |
|
|
|
1161 |
}
|
1162 |
|
1163 |
function showResults(results) {
|
1164 |
+
const panel = document.getElementById('resultsPanel');
|
1165 |
const content = document.getElementById('resultsContent');
|
1166 |
|
1167 |
+
let html = '<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">';
|
1168 |
|
1169 |
Object.keys(results).forEach(modelId => {
|
1170 |
+
const modelName = getModelName(modelId);
|
1171 |
const modelResults = results[modelId];
|
1172 |
|
1173 |
html += `
|
1174 |
+
<div class="border rounded-lg p-4 bg-gray-50">
|
1175 |
<h3 class="font-semibold text-gray-800 mb-3">${modelName}</h3>
|
1176 |
+
<div class="space-y-2">
|
1177 |
`;
|
1178 |
|
1179 |
Object.keys(modelResults).forEach(metric => {
|
1180 |
const value = modelResults[metric];
|
1181 |
html += `
|
1182 |
+
<div class="flex justify-between items-center">
|
1183 |
+
<span class="text-sm text-gray-600">${metric.toUpperCase()}</span>
|
1184 |
+
<span class="text-lg font-semibold text-gray-800">${value}</span>
|
1185 |
</div>
|
1186 |
`;
|
1187 |
});
|
|
|
1191 |
|
1192 |
html += '</div>';
|
1193 |
content.innerHTML = html;
|
1194 |
+
panel.classList.remove('hidden');
|
1195 |
}
|
1196 |
|
1197 |
function disableStartButton() {
|
1198 |
const btn = document.getElementById('startBtn');
|
1199 |
btn.disabled = true;
|
1200 |
+
btn.innerHTML = '<i data-lucide="loader" class="w-4 h-4 inline mr-1 animate-spin"></i>Running...';
|
1201 |
lucide.createIcons();
|
1202 |
}
|
1203 |
|
1204 |
function enableStartButton() {
|
1205 |
const btn = document.getElementById('startBtn');
|
1206 |
btn.disabled = false;
|
1207 |
+
btn.innerHTML = '<i data-lucide="play" class="w-4 h-4 inline mr-1"></i>Start Evaluation';
|
1208 |
lucide.createIcons();
|
1209 |
}
|
1210 |
</script>
|