shashankagar commited on
Commit
55395f1
·
verified ·
1 Parent(s): 844aeac

Upload 4 files

Browse files
Files changed (1) hide show
  1. app.py +270 -200
app.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- Advanced NovaEval Space by Noveum.ai
3
- Comprehensive AI Model Evaluation Platform with Hugging Face Models
4
  """
5
 
6
  import asyncio
@@ -31,7 +31,7 @@ logger = logging.getLogger(__name__)
31
  app = FastAPI(
32
  title="NovaEval by Noveum.ai",
33
  description="Advanced AI Model Evaluation Platform with Hugging Face Models",
34
- version="2.0.0"
35
  )
36
 
37
  app.add_middleware(
@@ -508,13 +508,16 @@ async def get_homepage():
508
  transform: translateY(-2px);
509
  box-shadow: 0 10px 25px rgba(0,0,0,0.1);
510
  }
511
- .model-card {
512
- border: 2px solid transparent;
513
- transition: all 0.3s ease;
514
  }
515
- .model-card.selected {
516
- border-color: #667eea;
517
- background: rgba(102, 126, 234, 0.1);
 
 
 
518
  }
519
  .progress-bar {
520
  transition: width 0.5s ease;
@@ -526,122 +529,125 @@ async def get_homepage():
526
  from { opacity: 0; transform: translateX(-10px); }
527
  to { opacity: 1; transform: translateX(0); }
528
  }
529
- .metric-badge {
530
- background: linear-gradient(45deg, #667eea, #764ba2);
 
 
 
 
531
  }
532
  </style>
533
  </head>
534
  <body class="bg-gray-50 min-h-screen">
535
  <!-- Header -->
536
- <header class="gradient-bg text-white py-6 shadow-lg">
537
  <div class="container mx-auto px-4">
538
  <div class="flex items-center justify-between">
539
  <div class="flex items-center space-x-3">
540
- <div class="w-10 h-10 bg-white rounded-lg flex items-center justify-center">
541
- <i data-lucide="zap" class="w-6 h-6 text-purple-600"></i>
542
  </div>
543
  <div>
544
- <h1 class="text-2xl font-bold">NovaEval</h1>
545
- <p class="text-purple-100 text-sm">by <a href="https://noveum.ai" target="_blank" class="underline hover:text-white">Noveum.ai</a></p>
546
  </div>
547
  </div>
548
  <div class="text-right">
549
- <p class="text-purple-100 text-sm">Advanced AI Model Evaluation Platform</p>
550
- <p class="text-purple-200 text-xs">Powered by Hugging Face Models</p>
551
  </div>
552
  </div>
553
  </div>
554
  </header>
555
 
556
- <div class="container mx-auto px-4 py-8">
557
- <!-- Main Content -->
558
- <div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
559
- <!-- Left Panel - Configuration -->
560
- <div class="lg:col-span-2 space-y-6">
561
- <!-- Model Selection -->
562
- <div class="bg-white rounded-xl shadow-lg p-6 card-hover">
563
- <div class="flex items-center space-x-3 mb-6">
564
- <i data-lucide="cpu" class="w-6 h-6 text-purple-600"></i>
565
- <h2 class="text-xl font-semibold text-gray-800">Select Models</h2>
566
- </div>
567
-
568
- <!-- Model Search -->
569
- <div class="mb-4">
570
- <div class="relative">
571
- <input type="text" id="modelSearch" placeholder="Search models..."
572
- class="w-full pl-10 pr-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-purple-500 focus:border-transparent">
573
- <i data-lucide="search" class="w-5 h-5 text-gray-400 absolute left-3 top-2.5"></i>
574
  </div>
575
- </div>
576
-
577
- <!-- Model Categories -->
578
- <div class="mb-4">
579
- <div class="flex space-x-2">
580
- <button onclick="filterModels('all')" class="px-4 py-2 bg-purple-600 text-white rounded-lg text-sm hover:bg-purple-700 transition-colors" id="filter-all">All</button>
581
- <button onclick="filterModels('small')" class="px-4 py-2 bg-gray-200 text-gray-700 rounded-lg text-sm hover:bg-gray-300 transition-colors" id="filter-small">Small (1-3B)</button>
582
- <button onclick="filterModels('medium')" class="px-4 py-2 bg-gray-200 text-gray-700 rounded-lg text-sm hover:bg-gray-300 transition-colors" id="filter-medium">Medium (7B)</button>
583
- <button onclick="filterModels('large')" class="px-4 py-2 bg-gray-200 text-gray-700 rounded-lg text-sm hover:bg-gray-300 transition-colors" id="filter-large">Large (14B+)</button>
 
 
 
 
 
 
 
 
584
  </div>
585
  </div>
586
-
587
- <!-- Model Grid -->
588
- <div id="modelGrid" class="grid grid-cols-1 md:grid-cols-2 gap-4 max-h-96 overflow-y-auto">
589
- <!-- Models will be populated by JavaScript -->
590
- </div>
591
-
592
- <div class="mt-4 text-sm text-gray-600">
593
- <span id="selectedModelsCount">0</span> models selected
594
- </div>
595
- </div>
596
 
597
- <!-- Dataset Selection -->
598
- <div class="bg-white rounded-xl shadow-lg p-6 card-hover">
599
- <div class="flex items-center space-x-3 mb-6">
600
- <i data-lucide="database" class="w-6 h-6 text-purple-600"></i>
601
- <h2 class="text-xl font-semibold text-gray-800">Select Dataset</h2>
602
- </div>
603
-
604
- <!-- Dataset Categories -->
605
- <div class="mb-4">
606
- <div class="flex flex-wrap gap-2">
607
- <button onclick="filterDatasets('all')" class="px-3 py-1 bg-purple-600 text-white rounded-full text-sm hover:bg-purple-700 transition-colors" id="dataset-filter-all">All</button>
608
- <button onclick="filterDatasets('reasoning')" class="px-3 py-1 bg-gray-200 text-gray-700 rounded-full text-sm hover:bg-gray-300 transition-colors" id="dataset-filter-reasoning">Reasoning</button>
609
- <button onclick="filterDatasets('knowledge')" class="px-3 py-1 bg-gray-200 text-gray-700 rounded-full text-sm hover:bg-gray-300 transition-colors" id="dataset-filter-knowledge">Knowledge</button>
610
- <button onclick="filterDatasets('math')" class="px-3 py-1 bg-gray-200 text-gray-700 rounded-full text-sm hover:bg-gray-300 transition-colors" id="dataset-filter-math">Math</button>
611
- <button onclick="filterDatasets('code')" class="px-3 py-1 bg-gray-200 text-gray-700 rounded-full text-sm hover:bg-gray-300 transition-colors" id="dataset-filter-code">Code</button>
612
- <button onclick="filterDatasets('language')" class="px-3 py-1 bg-gray-200 text-gray-700 rounded-full text-sm hover:bg-gray-300 transition-colors" id="dataset-filter-language">Language</button>
 
 
 
 
 
 
 
 
 
613
  </div>
614
  </div>
615
-
616
- <!-- Dataset Grid -->
617
- <div id="datasetGrid" class="space-y-3 max-h-64 overflow-y-auto">
618
- <!-- Datasets will be populated by JavaScript -->
619
- </div>
620
- </div>
621
 
622
- <!-- Configuration -->
623
- <div class="bg-white rounded-xl shadow-lg p-6 card-hover">
624
- <div class="flex items-center space-x-3 mb-6">
625
- <i data-lucide="settings" class="w-6 h-6 text-purple-600"></i>
626
- <h2 class="text-xl font-semibold text-gray-800">Evaluation Configuration</h2>
627
- </div>
628
-
629
- <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
 
 
 
 
630
  <!-- Metrics Selection -->
631
- <div>
632
- <label class="block text-sm font-medium text-gray-700 mb-3">Metrics</label>
633
- <div id="metricsGrid" class="space-y-2">
634
  <!-- Metrics will be populated by JavaScript -->
635
  </div>
636
  </div>
637
 
638
  <!-- Parameters -->
639
- <div class="space-y-4">
640
  <div>
641
- <label class="block text-sm font-medium text-gray-700 mb-2">Sample Size</label>
642
  <input type="range" id="sampleSize" min="10" max="1000" value="50"
643
- class="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer">
644
- <div class="flex justify-between text-xs text-gray-500 mt-1">
645
  <span>10</span>
646
  <span id="sampleSizeValue">50</span>
647
  <span>1000</span>
@@ -649,52 +655,51 @@ async def get_homepage():
649
  </div>
650
 
651
  <div>
652
- <label class="block text-sm font-medium text-gray-700 mb-2">Temperature</label>
653
  <input type="range" id="temperature" min="0" max="2" step="0.1" value="0.7"
654
- class="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer">
655
- <div class="flex justify-between text-xs text-gray-500 mt-1">
656
  <span>0.0</span>
657
  <span id="temperatureValue">0.7</span>
658
  <span>2.0</span>
659
  </div>
660
  </div>
661
-
662
- <div>
663
- <label class="block text-sm font-medium text-gray-700 mb-2">Max Tokens</label>
664
- <input type="range" id="maxTokens" min="128" max="2048" step="128" value="512"
665
- class="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer">
666
- <div class="flex justify-between text-xs text-gray-500 mt-1">
667
- <span>128</span>
668
- <span id="maxTokensValue">512</span>
669
- <span>2048</span>
670
- </div>
671
- </div>
672
  </div>
673
- </div>
674
-
675
- <!-- Start Evaluation Button -->
676
- <div class="mt-6">
677
  <button onclick="startEvaluation()" id="startBtn"
678
- class="w-full gradient-bg text-white py-3 px-6 rounded-lg font-semibold hover:opacity-90 transition-opacity disabled:opacity-50 disabled:cursor-not-allowed">
679
- <i data-lucide="play" class="w-5 h-5 inline mr-2"></i>
680
  Start Evaluation
681
  </button>
682
  </div>
683
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
684
  </div>
685
 
686
- <!-- Right Panel - Progress & Results -->
687
  <div class="space-y-6">
688
  <!-- Progress -->
689
- <div class="bg-white rounded-xl shadow-lg p-6 card-hover">
690
- <div class="flex items-center space-x-3 mb-4">
691
- <i data-lucide="activity" class="w-6 h-6 text-purple-600"></i>
692
- <h2 class="text-xl font-semibold text-gray-800">Progress</h2>
693
  </div>
694
 
695
  <div id="progressSection" class="hidden">
696
- <div class="mb-4">
697
- <div class="flex justify-between text-sm text-gray-600 mb-2">
698
  <span id="currentStep">Initializing...</span>
699
  <span id="progressPercent">0%</span>
700
  </div>
@@ -704,35 +709,23 @@ async def get_homepage():
704
  </div>
705
  </div>
706
 
707
- <div id="idleMessage" class="text-center text-gray-500 py-8">
708
- <i data-lucide="clock" class="w-12 h-12 mx-auto mb-3 text-gray-300"></i>
709
- <p>Ready to start evaluation</p>
710
  </div>
711
  </div>
712
 
713
  <!-- Live Logs -->
714
- <div class="bg-white rounded-xl shadow-lg p-6 card-hover">
715
- <div class="flex items-center space-x-3 mb-4">
716
- <i data-lucide="terminal" class="w-6 h-6 text-purple-600"></i>
717
- <h2 class="text-xl font-semibold text-gray-800">Live Logs</h2>
718
  </div>
719
 
720
- <div id="logsContainer" class="bg-gray-900 text-green-400 p-4 rounded-lg h-64 overflow-y-auto font-mono text-sm">
721
  <div class="text-gray-500">Waiting for evaluation to start...</div>
722
  </div>
723
  </div>
724
-
725
- <!-- Results -->
726
- <div id="resultsSection" class="bg-white rounded-xl shadow-lg p-6 card-hover hidden">
727
- <div class="flex items-center space-x-3 mb-4">
728
- <i data-lucide="bar-chart" class="w-6 h-6 text-purple-600"></i>
729
- <h2 class="text-xl font-semibold text-gray-800">Results</h2>
730
- </div>
731
-
732
- <div id="resultsContent">
733
- <!-- Results will be populated by JavaScript -->
734
- </div>
735
- </div>
736
  </div>
737
  </div>
738
  </div>
@@ -769,17 +762,6 @@ async def get_homepage():
769
  document.getElementById('temperature').addEventListener('input', function() {
770
  document.getElementById('temperatureValue').textContent = this.value;
771
  });
772
-
773
- // Max tokens slider
774
- document.getElementById('maxTokens').addEventListener('input', function() {
775
- document.getElementById('maxTokensValue').textContent = this.value;
776
- });
777
-
778
- // Model search
779
- document.getElementById('modelSearch').addEventListener('input', function() {
780
- const searchTerm = this.value.toLowerCase();
781
- filterModelsBySearch(searchTerm);
782
- });
783
  }
784
 
785
  function renderModels() {
@@ -796,50 +778,92 @@ async def get_homepage():
796
 
797
  function createModelCard(model, category) {
798
  const div = document.createElement('div');
799
- div.className = `model-card p-4 border rounded-lg cursor-pointer hover:shadow-md transition-all`;
800
  div.dataset.category = category;
801
  div.dataset.modelId = model.id;
802
 
803
  div.innerHTML = `
804
- <div class="flex items-start justify-between mb-2">
805
  <div class="flex-1">
806
  <h3 class="font-semibold text-gray-800 text-sm">${model.name}</h3>
807
- <p class="text-xs text-gray-500">${model.provider} • ${model.size}</p>
808
  </div>
809
  <div class="text-xs bg-gray-100 px-2 py-1 rounded">${model.size}</div>
810
  </div>
811
- <p class="text-xs text-gray-600 mb-2">${model.description}</p>
812
  <div class="flex flex-wrap gap-1">
813
- ${model.capabilities.map(cap => `<span class="text-xs bg-purple-100 text-purple-700 px-2 py-1 rounded">${cap}</span>`).join('')}
814
  </div>
815
  `;
816
 
817
- div.addEventListener('click', () => toggleModelSelection(model.id, div));
818
  return div;
819
  }
820
 
821
- function toggleModelSelection(modelId, element) {
822
  if (selectedModels.includes(modelId)) {
823
  selectedModels = selectedModels.filter(id => id !== modelId);
824
- element.classList.remove('selected');
825
  } else {
826
  selectedModels.push(modelId);
827
- element.classList.add('selected');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
828
  }
 
829
  updateSelectedModelsCount();
830
  }
831
 
 
 
 
 
 
 
 
 
 
 
 
832
  function updateSelectedModelsCount() {
833
- document.getElementById('selectedModelsCount').textContent = selectedModels.length;
834
  }
835
 
836
  function filterModels(category) {
837
  // Update filter buttons
838
  document.querySelectorAll('[id^="filter-"]').forEach(btn => {
839
- btn.className = btn.className.replace('bg-purple-600 text-white', 'bg-gray-200 text-gray-700');
840
  });
841
  document.getElementById(`filter-${category}`).className =
842
- document.getElementById(`filter-${category}`).className.replace('bg-gray-200 text-gray-700', 'bg-purple-600 text-white');
843
 
844
  // Filter model cards
845
  document.querySelectorAll('.model-card').forEach(card => {
@@ -851,19 +875,6 @@ async def get_homepage():
851
  });
852
  }
853
 
854
- function filterModelsBySearch(searchTerm) {
855
- document.querySelectorAll('.model-card').forEach(card => {
856
- const modelName = card.querySelector('h3').textContent.toLowerCase();
857
- const modelProvider = card.querySelector('p').textContent.toLowerCase();
858
-
859
- if (modelName.includes(searchTerm) || modelProvider.includes(searchTerm)) {
860
- card.style.display = 'block';
861
- } else {
862
- card.style.display = 'none';
863
- }
864
- });
865
- }
866
-
867
  function renderDatasets() {
868
  const grid = document.getElementById('datasetGrid');
869
  grid.innerHTML = '';
@@ -878,46 +889,70 @@ async def get_homepage():
878
 
879
  function createDatasetCard(dataset, category) {
880
  const div = document.createElement('div');
881
- div.className = `dataset-card p-3 border rounded-lg cursor-pointer hover:shadow-md transition-all`;
882
  div.dataset.category = category;
883
  div.dataset.datasetId = dataset.id;
884
 
885
  div.innerHTML = `
886
- <div class="flex items-start justify-between mb-2">
887
  <div class="flex-1">
888
  <h3 class="font-semibold text-gray-800 text-sm">${dataset.name}</h3>
889
- <p class="text-xs text-gray-600">${dataset.description}</p>
890
  </div>
891
- <div class="text-xs bg-gray-100 px-2 py-1 rounded">${dataset.samples.toLocaleString()}</div>
892
  </div>
893
- <div class="flex justify-between items-center">
894
- <span class="text-xs bg-blue-100 text-blue-700 px-2 py-1 rounded">${dataset.task_type}</span>
895
  <span class="text-xs text-gray-500">${dataset.difficulty}</span>
896
  </div>
897
  `;
898
 
899
- div.addEventListener('click', () => selectDataset(dataset.id, div));
900
  return div;
901
  }
902
 
903
- function selectDataset(datasetId, element) {
904
  // Remove previous selection
905
  document.querySelectorAll('.dataset-card').forEach(card => {
906
- card.classList.remove('selected');
907
  });
908
 
909
  // Add selection to clicked element
910
- element.classList.add('selected');
911
  selectedDataset = datasetId;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
912
  }
913
 
914
  function filterDatasets(category) {
915
  // Update filter buttons
916
  document.querySelectorAll('[id^="dataset-filter-"]').forEach(btn => {
917
- btn.className = btn.className.replace('bg-purple-600 text-white', 'bg-gray-200 text-gray-700');
918
  });
919
  document.getElementById(`dataset-filter-${category}`).className =
920
- document.getElementById(`dataset-filter-${category}`).className.replace('bg-gray-200 text-gray-700', 'bg-purple-600 text-white');
921
 
922
  // Filter dataset cards
923
  document.querySelectorAll('.dataset-card').forEach(card => {
@@ -939,7 +974,7 @@ async def get_homepage():
939
 
940
  div.innerHTML = `
941
  <input type="checkbox" id="metric-${metric.id}" class="rounded text-purple-600 focus:ring-purple-500">
942
- <label for="metric-${metric.id}" class="text-sm text-gray-700 cursor-pointer">${metric.name}</label>
943
  `;
944
 
945
  const checkbox = div.querySelector('input');
@@ -949,12 +984,47 @@ async def get_homepage():
949
  } else {
950
  selectedMetrics = selectedMetrics.filter(id => id !== metric.id);
951
  }
 
952
  });
953
 
954
  grid.appendChild(div);
955
  });
956
  }
957
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
958
  function startEvaluation() {
959
  // Validation
960
  if (selectedModels.length === 0) {
@@ -979,7 +1049,7 @@ async def get_homepage():
979
  metrics: selectedMetrics,
980
  sample_size: parseInt(document.getElementById('sampleSize').value),
981
  temperature: parseFloat(document.getElementById('temperature').value),
982
- max_tokens: parseInt(document.getElementById('maxTokens').value),
983
  top_p: 0.9
984
  };
985
 
@@ -1091,27 +1161,27 @@ async def get_homepage():
1091
  }
1092
 
1093
  function showResults(results) {
1094
- const section = document.getElementById('resultsSection');
1095
  const content = document.getElementById('resultsContent');
1096
 
1097
- let html = '<div class="space-y-4">';
1098
 
1099
  Object.keys(results).forEach(modelId => {
1100
- const modelName = modelId.split('/').pop();
1101
  const modelResults = results[modelId];
1102
 
1103
  html += `
1104
- <div class="border rounded-lg p-4">
1105
  <h3 class="font-semibold text-gray-800 mb-3">${modelName}</h3>
1106
- <div class="grid grid-cols-2 gap-3">
1107
  `;
1108
 
1109
  Object.keys(modelResults).forEach(metric => {
1110
  const value = modelResults[metric];
1111
  html += `
1112
- <div class="bg-gray-50 p-3 rounded">
1113
- <div class="text-sm text-gray-600">${metric.toUpperCase()}</div>
1114
- <div class="text-lg font-semibold text-gray-800">${value}</div>
1115
  </div>
1116
  `;
1117
  });
@@ -1121,20 +1191,20 @@ async def get_homepage():
1121
 
1122
  html += '</div>';
1123
  content.innerHTML = html;
1124
- section.classList.remove('hidden');
1125
  }
1126
 
1127
  function disableStartButton() {
1128
  const btn = document.getElementById('startBtn');
1129
  btn.disabled = true;
1130
- btn.innerHTML = '<i data-lucide="loader" class="w-5 h-5 inline mr-2 animate-spin"></i>Running Evaluation...';
1131
  lucide.createIcons();
1132
  }
1133
 
1134
  function enableStartButton() {
1135
  const btn = document.getElementById('startBtn');
1136
  btn.disabled = false;
1137
- btn.innerHTML = '<i data-lucide="play" class="w-5 h-5 inline mr-2"></i>Start Evaluation';
1138
  lucide.createIcons();
1139
  }
1140
  </script>
 
1
  """
2
+ Improved NovaEval Space by Noveum.ai
3
+ Advanced AI Model Evaluation Platform with Enhanced UI
4
  """
5
 
6
  import asyncio
 
31
  app = FastAPI(
32
  title="NovaEval by Noveum.ai",
33
  description="Advanced AI Model Evaluation Platform with Hugging Face Models",
34
+ version="3.0.0"
35
  )
36
 
37
  app.add_middleware(
 
508
  transform: translateY(-2px);
509
  box-shadow: 0 10px 25px rgba(0,0,0,0.1);
510
  }
511
+ .tag-selected {
512
+ background: linear-gradient(45deg, #667eea, #764ba2);
513
+ color: white;
514
  }
515
+ .tag-unselected {
516
+ background: #f3f4f6;
517
+ color: #374151;
518
+ }
519
+ .tag-unselected:hover {
520
+ background: #e5e7eb;
521
  }
522
  .progress-bar {
523
  transition: width 0.5s ease;
 
529
  from { opacity: 0; transform: translateX(-10px); }
530
  to { opacity: 1; transform: translateX(0); }
531
  }
532
+ .compact-card {
533
+ min-height: 120px;
534
+ }
535
+ .selection-panel {
536
+ max-height: 400px;
537
+ overflow-y: auto;
538
  }
539
  </style>
540
  </head>
541
  <body class="bg-gray-50 min-h-screen">
542
  <!-- Header -->
543
+ <header class="gradient-bg text-white py-4 shadow-lg">
544
  <div class="container mx-auto px-4">
545
  <div class="flex items-center justify-between">
546
  <div class="flex items-center space-x-3">
547
+ <div class="w-8 h-8 bg-white rounded-lg flex items-center justify-center">
548
+ <i data-lucide="zap" class="w-5 h-5 text-purple-600"></i>
549
  </div>
550
  <div>
551
+ <h1 class="text-xl font-bold">NovaEval</h1>
552
+ <p class="text-purple-100 text-xs">by <a href="https://noveum.ai" target="_blank" class="underline hover:text-white">Noveum.ai</a></p>
553
  </div>
554
  </div>
555
  <div class="text-right">
556
+ <p class="text-purple-100 text-sm">Advanced AI Model Evaluation</p>
 
557
  </div>
558
  </div>
559
  </div>
560
  </header>
561
 
562
+ <div class="container mx-auto px-4 py-6">
563
+ <!-- Main Grid Layout -->
564
+ <div class="grid grid-cols-1 lg:grid-cols-4 gap-6">
565
+ <!-- Left Panel - Selection (3 columns) -->
566
+ <div class="lg:col-span-3 space-y-6">
567
+ <!-- Selection Row -->
568
+ <div class="grid grid-cols-1 md:grid-cols-3 gap-6">
569
+ <!-- Models Selection -->
570
+ <div class="bg-white rounded-xl shadow-lg p-4 card-hover">
571
+ <div class="flex items-center space-x-2 mb-4">
572
+ <i data-lucide="cpu" class="w-5 h-5 text-purple-600"></i>
573
+ <h2 class="text-lg font-semibold text-gray-800">Models</h2>
574
+ <span id="selectedModelsCount" class="text-sm text-gray-500">(0)</span>
 
 
 
 
 
575
  </div>
576
+
577
+ <!-- Model Size Filters -->
578
+ <div class="flex flex-wrap gap-1 mb-3">
579
+ <button onclick="filterModels('all')" class="px-2 py-1 text-xs rounded-full tag-selected transition-all" id="filter-all">All</button>
580
+ <button onclick="filterModels('small')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="filter-small">Small</button>
581
+ <button onclick="filterModels('medium')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="filter-medium">Medium</button>
582
+ <button onclick="filterModels('large')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="filter-large">Large</button>
583
+ </div>
584
+
585
+ <!-- Selected Models Tags -->
586
+ <div id="selectedModelsTags" class="mb-3 min-h-[24px]">
587
+ <!-- Selected model tags will appear here -->
588
+ </div>
589
+
590
+ <!-- Model Selection Panel -->
591
+ <div id="modelGrid" class="selection-panel space-y-2">
592
+ <!-- Models will be populated by JavaScript -->
593
  </div>
594
  </div>
 
 
 
 
 
 
 
 
 
 
595
 
596
+ <!-- Dataset Selection -->
597
+ <div class="bg-white rounded-xl shadow-lg p-4 card-hover">
598
+ <div class="flex items-center space-x-2 mb-4">
599
+ <i data-lucide="database" class="w-5 h-5 text-purple-600"></i>
600
+ <h2 class="text-lg font-semibold text-gray-800">Dataset</h2>
601
+ </div>
602
+
603
+ <!-- Dataset Category Filters -->
604
+ <div class="flex flex-wrap gap-1 mb-3">
605
+ <button onclick="filterDatasets('all')" class="px-2 py-1 text-xs rounded-full tag-selected transition-all" id="dataset-filter-all">All</button>
606
+ <button onclick="filterDatasets('reasoning')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="dataset-filter-reasoning">Reasoning</button>
607
+ <button onclick="filterDatasets('knowledge')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="dataset-filter-knowledge">Knowledge</button>
608
+ <button onclick="filterDatasets('math')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="dataset-filter-math">Math</button>
609
+ <button onclick="filterDatasets('code')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="dataset-filter-code">Code</button>
610
+ <button onclick="filterDatasets('language')" class="px-2 py-1 text-xs rounded-full tag-unselected transition-all" id="dataset-filter-language">Language</button>
611
+ </div>
612
+
613
+ <!-- Selected Dataset Tag -->
614
+ <div id="selectedDatasetTag" class="mb-3 min-h-[24px]">
615
+ <!-- Selected dataset tag will appear here -->
616
+ </div>
617
+
618
+ <!-- Dataset Selection Panel -->
619
+ <div id="datasetGrid" class="selection-panel space-y-2">
620
+ <!-- Datasets will be populated by JavaScript -->
621
  </div>
622
  </div>
 
 
 
 
 
 
623
 
624
+ <!-- Metrics & Config -->
625
+ <div class="bg-white rounded-xl shadow-lg p-4 card-hover">
626
+ <div class="flex items-center space-x-2 mb-4">
627
+ <i data-lucide="settings" class="w-5 h-5 text-purple-600"></i>
628
+ <h2 class="text-lg font-semibold text-gray-800">Config</h2>
629
+ </div>
630
+
631
+ <!-- Selected Metrics Tags -->
632
+ <div id="selectedMetricsTags" class="mb-3 min-h-[24px]">
633
+ <!-- Selected metrics tags will appear here -->
634
+ </div>
635
+
636
  <!-- Metrics Selection -->
637
+ <div class="mb-4">
638
+ <label class="block text-sm font-medium text-gray-700 mb-2">Metrics</label>
639
+ <div id="metricsGrid" class="space-y-1">
640
  <!-- Metrics will be populated by JavaScript -->
641
  </div>
642
  </div>
643
 
644
  <!-- Parameters -->
645
+ <div class="space-y-3">
646
  <div>
647
+ <label class="block text-xs font-medium text-gray-700 mb-1">Sample Size</label>
648
  <input type="range" id="sampleSize" min="10" max="1000" value="50"
649
+ class="w-full h-1 bg-gray-200 rounded-lg appearance-none cursor-pointer">
650
+ <div class="flex justify-between text-xs text-gray-500">
651
  <span>10</span>
652
  <span id="sampleSizeValue">50</span>
653
  <span>1000</span>
 
655
  </div>
656
 
657
  <div>
658
+ <label class="block text-xs font-medium text-gray-700 mb-1">Temperature</label>
659
  <input type="range" id="temperature" min="0" max="2" step="0.1" value="0.7"
660
+ class="w-full h-1 bg-gray-200 rounded-lg appearance-none cursor-pointer">
661
+ <div class="flex justify-between text-xs text-gray-500">
662
  <span>0.0</span>
663
  <span id="temperatureValue">0.7</span>
664
  <span>2.0</span>
665
  </div>
666
  </div>
 
 
 
 
 
 
 
 
 
 
 
667
  </div>
668
+
669
+ <!-- Start Button -->
 
 
670
  <button onclick="startEvaluation()" id="startBtn"
671
+ class="w-full gradient-bg text-white py-2 px-4 rounded-lg font-semibold hover:opacity-90 transition-opacity disabled:opacity-50 disabled:cursor-not-allowed mt-4 text-sm">
672
+ <i data-lucide="play" class="w-4 h-4 inline mr-1"></i>
673
  Start Evaluation
674
  </button>
675
  </div>
676
  </div>
677
+
678
+ <!-- Results Panel -->
679
+ <div id="resultsPanel" class="bg-white rounded-xl shadow-lg p-6 card-hover hidden">
680
+ <div class="flex items-center space-x-3 mb-4">
681
+ <i data-lucide="bar-chart" class="w-6 h-6 text-purple-600"></i>
682
+ <h2 class="text-xl font-semibold text-gray-800">Evaluation Results</h2>
683
+ </div>
684
+
685
+ <div id="resultsContent">
686
+ <!-- Results will be populated by JavaScript -->
687
+ </div>
688
+ </div>
689
  </div>
690
 
691
+ <!-- Right Panel - Progress & Logs (1 column) -->
692
  <div class="space-y-6">
693
  <!-- Progress -->
694
+ <div class="bg-white rounded-xl shadow-lg p-4 card-hover">
695
+ <div class="flex items-center space-x-2 mb-3">
696
+ <i data-lucide="activity" class="w-5 h-5 text-purple-600"></i>
697
+ <h2 class="text-lg font-semibold text-gray-800">Progress</h2>
698
  </div>
699
 
700
  <div id="progressSection" class="hidden">
701
+ <div class="mb-3">
702
+ <div class="flex justify-between text-xs text-gray-600 mb-1">
703
  <span id="currentStep">Initializing...</span>
704
  <span id="progressPercent">0%</span>
705
  </div>
 
709
  </div>
710
  </div>
711
 
712
+ <div id="idleMessage" class="text-center text-gray-500 py-4">
713
+ <i data-lucide="clock" class="w-8 h-8 mx-auto mb-2 text-gray-300"></i>
714
+ <p class="text-sm">Ready to start</p>
715
  </div>
716
  </div>
717
 
718
  <!-- Live Logs -->
719
+ <div class="bg-white rounded-xl shadow-lg p-4 card-hover">
720
+ <div class="flex items-center space-x-2 mb-3">
721
+ <i data-lucide="terminal" class="w-5 h-5 text-purple-600"></i>
722
+ <h2 class="text-lg font-semibold text-gray-800">Live Logs</h2>
723
  </div>
724
 
725
+ <div id="logsContainer" class="bg-gray-900 text-green-400 p-3 rounded-lg h-64 overflow-y-auto font-mono text-xs">
726
  <div class="text-gray-500">Waiting for evaluation to start...</div>
727
  </div>
728
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
729
  </div>
730
  </div>
731
  </div>
 
762
  document.getElementById('temperature').addEventListener('input', function() {
763
  document.getElementById('temperatureValue').textContent = this.value;
764
  });
 
 
 
 
 
 
 
 
 
 
 
765
  }
766
 
767
  function renderModels() {
 
778
 
779
  function createModelCard(model, category) {
780
  const div = document.createElement('div');
781
+ div.className = `model-card p-2 border rounded-lg cursor-pointer hover:shadow-md transition-all compact-card`;
782
  div.dataset.category = category;
783
  div.dataset.modelId = model.id;
784
 
785
  div.innerHTML = `
786
+ <div class="flex items-start justify-between mb-1">
787
  <div class="flex-1">
788
  <h3 class="font-semibold text-gray-800 text-sm">${model.name}</h3>
789
+ <p class="text-xs text-gray-500">${model.provider}</p>
790
  </div>
791
  <div class="text-xs bg-gray-100 px-2 py-1 rounded">${model.size}</div>
792
  </div>
793
+ <p class="text-xs text-gray-600 mb-2 line-clamp-2">${model.description}</p>
794
  <div class="flex flex-wrap gap-1">
795
+ ${model.capabilities.slice(0, 2).map(cap => `<span class="text-xs bg-purple-100 text-purple-700 px-1 py-0.5 rounded">${cap}</span>`).join('')}
796
  </div>
797
  `;
798
 
799
+ div.addEventListener('click', () => toggleModelSelection(model.id, model.name, div));
800
  return div;
801
  }
802
 
803
+ function toggleModelSelection(modelId, modelName, element) {
804
  if (selectedModels.includes(modelId)) {
805
  selectedModels = selectedModels.filter(id => id !== modelId);
806
+ element.classList.remove('ring-2', 'ring-purple-500', 'bg-purple-50');
807
  } else {
808
  selectedModels.push(modelId);
809
+ element.classList.add('ring-2', 'ring-purple-500', 'bg-purple-50');
810
+ }
811
+ updateSelectedModelsTags();
812
+ updateSelectedModelsCount();
813
+ }
814
+
815
+ function updateSelectedModelsTags() {
816
+ const container = document.getElementById('selectedModelsTags');
817
+ container.innerHTML = '';
818
+
819
+ selectedModels.forEach(modelId => {
820
+ const modelName = getModelName(modelId);
821
+ const tag = document.createElement('span');
822
+ tag.className = 'inline-flex items-center px-2 py-1 text-xs bg-purple-100 text-purple-800 rounded-full mr-1 mb-1';
823
+ tag.innerHTML = `
824
+ ${modelName}
825
+ <button onclick="removeModel('${modelId}')" class="ml-1 text-purple-600 hover:text-purple-800">
826
+ <i data-lucide="x" class="w-3 h-3"></i>
827
+ </button>
828
+ `;
829
+ container.appendChild(tag);
830
+ });
831
+ lucide.createIcons();
832
+ }
833
+
834
+ function removeModel(modelId) {
835
+ selectedModels = selectedModels.filter(id => id !== modelId);
836
+ // Update UI
837
+ const modelCard = document.querySelector(`[data-model-id="${modelId}"]`);
838
+ if (modelCard) {
839
+ modelCard.classList.remove('ring-2', 'ring-purple-500', 'bg-purple-50');
840
  }
841
+ updateSelectedModelsTags();
842
  updateSelectedModelsCount();
843
  }
844
 
845
+ function getModelName(modelId) {
846
+ for (const category of Object.values(models)) {
847
+ for (const model of category) {
848
+ if (model.id === modelId) {
849
+ return model.name;
850
+ }
851
+ }
852
+ }
853
+ return modelId.split('/').pop();
854
+ }
855
+
856
  function updateSelectedModelsCount() {
857
+ document.getElementById('selectedModelsCount').textContent = `(${selectedModels.length})`;
858
  }
859
 
860
  function filterModels(category) {
861
  // Update filter buttons
862
  document.querySelectorAll('[id^="filter-"]').forEach(btn => {
863
+ btn.className = btn.className.replace('tag-selected', 'tag-unselected');
864
  });
865
  document.getElementById(`filter-${category}`).className =
866
+ document.getElementById(`filter-${category}`).className.replace('tag-unselected', 'tag-selected');
867
 
868
  // Filter model cards
869
  document.querySelectorAll('.model-card').forEach(card => {
 
875
  });
876
  }
877
 
 
 
 
 
 
 
 
 
 
 
 
 
 
878
  function renderDatasets() {
879
  const grid = document.getElementById('datasetGrid');
880
  grid.innerHTML = '';
 
889
 
890
  function createDatasetCard(dataset, category) {
891
  const div = document.createElement('div');
892
+ div.className = `dataset-card p-2 border rounded-lg cursor-pointer hover:shadow-md transition-all compact-card`;
893
  div.dataset.category = category;
894
  div.dataset.datasetId = dataset.id;
895
 
896
  div.innerHTML = `
897
+ <div class="flex items-start justify-between mb-1">
898
  <div class="flex-1">
899
  <h3 class="font-semibold text-gray-800 text-sm">${dataset.name}</h3>
900
+ <p class="text-xs text-gray-600 line-clamp-2">${dataset.description}</p>
901
  </div>
902
+ <div class="text-xs bg-gray-100 px-1 py-0.5 rounded">${dataset.samples.toLocaleString()}</div>
903
  </div>
904
+ <div class="flex justify-between items-center mt-2">
905
+ <span class="text-xs bg-blue-100 text-blue-700 px-1 py-0.5 rounded">${dataset.task_type}</span>
906
  <span class="text-xs text-gray-500">${dataset.difficulty}</span>
907
  </div>
908
  `;
909
 
910
+ div.addEventListener('click', () => selectDataset(dataset.id, dataset.name, div));
911
  return div;
912
  }
913
 
914
+ function selectDataset(datasetId, datasetName, element) {
915
  // Remove previous selection
916
  document.querySelectorAll('.dataset-card').forEach(card => {
917
+ card.classList.remove('ring-2', 'ring-purple-500', 'bg-purple-50');
918
  });
919
 
920
  // Add selection to clicked element
921
+ element.classList.add('ring-2', 'ring-purple-500', 'bg-purple-50');
922
  selectedDataset = datasetId;
923
+
924
+ // Update selected dataset tag
925
+ updateSelectedDatasetTag(datasetName);
926
+ }
927
+
928
+ function updateSelectedDatasetTag(datasetName) {
929
+ const container = document.getElementById('selectedDatasetTag');
930
+ container.innerHTML = `
931
+ <span class="inline-flex items-center px-2 py-1 text-xs bg-blue-100 text-blue-800 rounded-full">
932
+ ${datasetName}
933
+ <button onclick="removeDataset()" class="ml-1 text-blue-600 hover:text-blue-800">
934
+ <i data-lucide="x" class="w-3 h-3"></i>
935
+ </button>
936
+ </span>
937
+ `;
938
+ lucide.createIcons();
939
+ }
940
+
941
+ function removeDataset() {
942
+ selectedDataset = null;
943
+ document.getElementById('selectedDatasetTag').innerHTML = '';
944
+ document.querySelectorAll('.dataset-card').forEach(card => {
945
+ card.classList.remove('ring-2', 'ring-purple-500', 'bg-purple-50');
946
+ });
947
  }
948
 
949
  function filterDatasets(category) {
950
  // Update filter buttons
951
  document.querySelectorAll('[id^="dataset-filter-"]').forEach(btn => {
952
+ btn.className = btn.className.replace('tag-selected', 'tag-unselected');
953
  });
954
  document.getElementById(`dataset-filter-${category}`).className =
955
+ document.getElementById(`dataset-filter-${category}`).className.replace('tag-unselected', 'tag-selected');
956
 
957
  // Filter dataset cards
958
  document.querySelectorAll('.dataset-card').forEach(card => {
 
974
 
975
  div.innerHTML = `
976
  <input type="checkbox" id="metric-${metric.id}" class="rounded text-purple-600 focus:ring-purple-500">
977
+ <label for="metric-${metric.id}" class="text-xs text-gray-700 cursor-pointer">${metric.name}</label>
978
  `;
979
 
980
  const checkbox = div.querySelector('input');
 
984
  } else {
985
  selectedMetrics = selectedMetrics.filter(id => id !== metric.id);
986
  }
987
+ updateSelectedMetricsTags();
988
  });
989
 
990
  grid.appendChild(div);
991
  });
992
  }
993
 
994
+ function updateSelectedMetricsTags() {
995
+ const container = document.getElementById('selectedMetricsTags');
996
+ container.innerHTML = '';
997
+
998
+ selectedMetrics.forEach(metricId => {
999
+ const metricName = getMetricName(metricId);
1000
+ const tag = document.createElement('span');
1001
+ tag.className = 'inline-flex items-center px-2 py-1 text-xs bg-green-100 text-green-800 rounded-full mr-1 mb-1';
1002
+ tag.innerHTML = `
1003
+ ${metricName}
1004
+ <button onclick="removeMetric('${metricId}')" class="ml-1 text-green-600 hover:text-green-800">
1005
+ <i data-lucide="x" class="w-3 h-3"></i>
1006
+ </button>
1007
+ `;
1008
+ container.appendChild(tag);
1009
+ });
1010
+ lucide.createIcons();
1011
+ }
1012
+
1013
+ function removeMetric(metricId) {
1014
+ selectedMetrics = selectedMetrics.filter(id => id !== metricId);
1015
+ // Update checkbox
1016
+ const checkbox = document.getElementById(`metric-${metricId}`);
1017
+ if (checkbox) {
1018
+ checkbox.checked = false;
1019
+ }
1020
+ updateSelectedMetricsTags();
1021
+ }
1022
+
1023
+ function getMetricName(metricId) {
1024
+ const metric = metrics.find(m => m.id === metricId);
1025
+ return metric ? metric.name : metricId;
1026
+ }
1027
+
1028
  function startEvaluation() {
1029
  // Validation
1030
  if (selectedModels.length === 0) {
 
1049
  metrics: selectedMetrics,
1050
  sample_size: parseInt(document.getElementById('sampleSize').value),
1051
  temperature: parseFloat(document.getElementById('temperature').value),
1052
+ max_tokens: 512,
1053
  top_p: 0.9
1054
  };
1055
 
 
1161
  }
1162
 
1163
  function showResults(results) {
1164
+ const panel = document.getElementById('resultsPanel');
1165
  const content = document.getElementById('resultsContent');
1166
 
1167
+ let html = '<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">';
1168
 
1169
  Object.keys(results).forEach(modelId => {
1170
+ const modelName = getModelName(modelId);
1171
  const modelResults = results[modelId];
1172
 
1173
  html += `
1174
+ <div class="border rounded-lg p-4 bg-gray-50">
1175
  <h3 class="font-semibold text-gray-800 mb-3">${modelName}</h3>
1176
+ <div class="space-y-2">
1177
  `;
1178
 
1179
  Object.keys(modelResults).forEach(metric => {
1180
  const value = modelResults[metric];
1181
  html += `
1182
+ <div class="flex justify-between items-center">
1183
+ <span class="text-sm text-gray-600">${metric.toUpperCase()}</span>
1184
+ <span class="text-lg font-semibold text-gray-800">${value}</span>
1185
  </div>
1186
  `;
1187
  });
 
1191
 
1192
  html += '</div>';
1193
  content.innerHTML = html;
1194
+ panel.classList.remove('hidden');
1195
  }
1196
 
1197
  function disableStartButton() {
1198
  const btn = document.getElementById('startBtn');
1199
  btn.disabled = true;
1200
+ btn.innerHTML = '<i data-lucide="loader" class="w-4 h-4 inline mr-1 animate-spin"></i>Running...';
1201
  lucide.createIcons();
1202
  }
1203
 
1204
  function enableStartButton() {
1205
  const btn = document.getElementById('startBtn');
1206
  btn.disabled = false;
1207
+ btn.innerHTML = '<i data-lucide="play" class="w-4 h-4 inline mr-1"></i>Start Evaluation';
1208
  lucide.createIcons();
1209
  }
1210
  </script>