Alina Lozowski commited on
Commit
e7abd9e
1 Parent(s): 0ab6122

Migrating to the React project

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.example +3 -0
  2. .gitattributes +0 -36
  3. .gitignore +44 -21
  4. .pre-commit-config.yaml +0 -53
  5. Dockerfile +63 -0
  6. Makefile +0 -18
  7. README.md +74 -14
  8. app.py +0 -492
  9. backend/Dockerfile.dev +25 -0
  10. backend/README.md +352 -0
  11. backend/app/api/__init__.py +5 -0
  12. backend/app/api/dependencies.py +34 -0
  13. backend/app/api/endpoints/leaderboard.py +49 -0
  14. backend/app/api/endpoints/models.py +103 -0
  15. backend/app/api/endpoints/votes.py +105 -0
  16. backend/app/api/router.py +9 -0
  17. backend/app/asgi.py +106 -0
  18. backend/app/config/__init__.py +6 -0
  19. backend/app/config/base.py +38 -0
  20. backend/app/config/hf_config.py +35 -0
  21. backend/app/config/logging_config.py +38 -0
  22. backend/app/core/cache.py +109 -0
  23. backend/app/core/fastapi_cache.py +48 -0
  24. backend/app/main.py +18 -0
  25. backend/app/services/__init__.py +3 -0
  26. backend/app/services/hf_service.py +50 -0
  27. backend/app/services/leaderboard.py +205 -0
  28. backend/app/services/models.py +559 -0
  29. backend/app/services/rate_limiter.py +72 -0
  30. backend/app/services/votes.py +391 -0
  31. backend/app/utils/__init__.py +3 -0
  32. backend/app/utils/logging.py +105 -0
  33. backend/app/utils/model_validation.py +208 -0
  34. backend/pyproject.toml +30 -0
  35. backend/utils/analyze_prod_datasets.py +170 -0
  36. backend/utils/analyze_prod_models.py +105 -0
  37. backend/utils/last_activity.py +164 -0
  38. backend/utils/sync_datasets_locally.py +130 -0
  39. docker-compose.yml +35 -0
  40. frontend/Dockerfile.dev +15 -0
  41. frontend/README.md +80 -0
  42. frontend/package.json +55 -0
  43. frontend/public/index.html +74 -0
  44. frontend/public/logo256.png +0 -0
  45. frontend/public/logo32.png +0 -0
  46. frontend/public/og-image.jpg +0 -0
  47. frontend/public/robots.txt +3 -0
  48. frontend/server.js +85 -0
  49. frontend/src/App.js +115 -0
  50. frontend/src/components/Footer/Footer.js +29 -0
.env.example ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ENVIRONMENT=development
2
+ HF_TOKEN=xxx
3
+ HF_HOME=.cache
.gitattributes DELETED
@@ -1,36 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tflite filter=lfs diff=lfs merge=lfs -text
29
- *.tgz filter=lfs diff=lfs merge=lfs -text
30
- *.wasm filter=lfs diff=lfs merge=lfs -text
31
- *.xz filter=lfs diff=lfs merge=lfs -text
32
- *.zip filter=lfs diff=lfs merge=lfs -text
33
- *.zst filter=lfs diff=lfs merge=lfs -text
34
- *tfevents* filter=lfs diff=lfs merge=lfs -text
35
- scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
36
- gif.gif filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore CHANGED
@@ -1,22 +1,45 @@
1
- venv/
2
- .venv/
3
- __pycache__/
4
- .env
5
- .ipynb_checkpoints
6
- *ipynb
7
- .vscode/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  .DS_Store
9
- .ruff_cache/
10
- .python-version
11
- .profile_app.python
12
- *pstats
13
- *.lock
14
-
15
- eval-queue/
16
- eval-results/
17
- dynamic-info/
18
- downloads/
19
- model-votes/
20
- open-llm-leaderboard___contents/
21
-
22
- src/assets/model_counts.html
 
 
 
 
 
 
 
 
 
 
1
+ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2
+
3
+ __pycache__
4
+ .cache/
5
+
6
+ # dependencies
7
+
8
+ frontend/node_modules
9
+ /.pnp
10
+ .pnp.js
11
+
12
+ # testing
13
+
14
+ /coverage
15
+
16
+ # production
17
+
18
+ /build
19
+
20
+ # misc
21
+
22
  .DS_Store
23
+ .env.local
24
+ .env.development.local
25
+ .env.test.local
26
+ .env.production.local
27
+
28
+ npm-debug.log*
29
+ yarn-debug.log*
30
+ yarn-error.log\*
31
+
32
+ src/dataframe.json
33
+
34
+ yarn.lock
35
+ package-lock.json
36
+
37
+ /public
38
+
39
+ .claudesync/
40
+
41
+ # Environment variables
42
+ .env
43
+ .env.*
44
+ !.env.example
45
+
.pre-commit-config.yaml DELETED
@@ -1,53 +0,0 @@
1
- # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- default_language_version:
16
- python: python3
17
-
18
- ci:
19
- autofix_prs: true
20
- autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
21
- autoupdate_schedule: quarterly
22
-
23
- repos:
24
- - repo: https://github.com/pre-commit/pre-commit-hooks
25
- rev: v4.3.0
26
- hooks:
27
- - id: check-yaml
28
- - id: check-case-conflict
29
- - id: detect-private-key
30
- - id: check-added-large-files
31
- args: ['--maxkb=1000']
32
- - id: requirements-txt-fixer
33
- - id: end-of-file-fixer
34
- - id: trailing-whitespace
35
-
36
- - repo: https://github.com/PyCQA/isort
37
- rev: 5.12.0
38
- hooks:
39
- - id: isort
40
- name: Format imports
41
-
42
- - repo: https://github.com/psf/black
43
- rev: 22.12.0
44
- hooks:
45
- - id: black
46
- name: Format code
47
- additional_dependencies: ['click==8.0.2']
48
-
49
- - repo: https://github.com/charliermarsh/ruff-pre-commit
50
- # Ruff version.
51
- rev: 'v0.0.267'
52
- hooks:
53
- - id: ruff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Build frontend
2
+ FROM node:18 as frontend-build
3
+ WORKDIR /app
4
+ COPY frontend/package*.json ./
5
+ RUN npm install
6
+ COPY frontend/ ./
7
+
8
+ RUN npm run build
9
+
10
+ # Build backend
11
+ FROM python:3.9-slim
12
+ WORKDIR /app
13
+
14
+ # Create non-root user
15
+ RUN useradd -m -u 1000 user
16
+
17
+ # Install poetry
18
+ RUN pip install poetry
19
+
20
+ # Create and configure cache directory
21
+ RUN mkdir -p /app/.cache && \
22
+ chown -R user:user /app
23
+
24
+ # Copy and install backend dependencies
25
+ COPY backend/pyproject.toml backend/poetry.lock* ./
26
+ RUN poetry config virtualenvs.create false \
27
+ && poetry install --no-interaction --no-ansi --no-root --only main
28
+
29
+ # Copy backend code
30
+ COPY backend/ .
31
+
32
+ # Install Node.js and npm
33
+ RUN apt-get update && apt-get install -y \
34
+ curl \
35
+ netcat-openbsd \
36
+ && curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
37
+ && apt-get install -y nodejs \
38
+ && rm -rf /var/lib/apt/lists/*
39
+
40
+ # Copy frontend server and build
41
+ COPY --from=frontend-build /app/build ./frontend/build
42
+ COPY --from=frontend-build /app/package*.json ./frontend/
43
+ COPY --from=frontend-build /app/server.js ./frontend/
44
+
45
+ # Install frontend production dependencies
46
+ WORKDIR /app/frontend
47
+ RUN npm install --production
48
+ WORKDIR /app
49
+
50
+ # Environment variables
51
+ ENV HF_HOME=/app/.cache \
52
+ TRANSFORMERS_CACHE=/app/.cache \
53
+ HF_DATASETS_CACHE=/app/.cache \
54
+ INTERNAL_API_PORT=7861 \
55
+ PORT=7860 \
56
+ NODE_ENV=production
57
+
58
+ # Note: HF_TOKEN should be provided at runtime, not build time
59
+ USER user
60
+ EXPOSE 7860
61
+
62
+ # Start both servers with wait-for
63
+ CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"]
Makefile DELETED
@@ -1,18 +0,0 @@
1
- .PHONY: style format quality all
2
-
3
- # Applies code style fixes to the specified file or directory
4
- style:
5
- @echo "Applying style fixes to $(file)"
6
- ruff format $(file)
7
- ruff check --fix $(file) --line-length 119
8
-
9
- # Checks code quality for the specified file or directory
10
- quality:
11
- @echo "Checking code quality for $(file)"
12
- ruff check $(file) --line-length 119
13
-
14
- # Applies PEP8 formatting and checks the entire codebase
15
- all:
16
- @echo "Formatting and checking the entire codebase"
17
- ruff format .
18
- ruff check --fix . --line-length 119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,25 +1,85 @@
1
  ---
2
  title: Open LLM Leaderboard 2
3
  emoji: 🏆
4
- colorFrom: green
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 4.44.0
8
- app_file: app.py
9
  pinned: true
10
  license: apache-2.0
11
  duplicated_from: open-llm-leaderboard/open_llm_leaderboard
12
- fullWidth: true
13
- startup_duration_timeout: 1h
14
- hf_oauth: true
15
- space_ci:
16
- private: true
17
- secrets:
18
- - HF_TOKEN
19
- - WEBHOOK_SECRET
20
  tags:
21
  - leaderboard
22
  short_description: Track, rank and evaluate open LLMs and chatbots
23
  ---
24
 
25
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Open LLM Leaderboard 2
3
  emoji: 🏆
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: docker
7
+ hf_oauth: true
 
8
  pinned: true
9
  license: apache-2.0
10
  duplicated_from: open-llm-leaderboard/open_llm_leaderboard
 
 
 
 
 
 
 
 
11
  tags:
12
  - leaderboard
13
  short_description: Track, rank and evaluate open LLMs and chatbots
14
  ---
15
 
16
+ # Open LLM Leaderboard
17
+
18
+ Modern React interface for comparing Large Language Models (LLMs) in an open and reproducible way.
19
+
20
+ ## Features
21
+
22
+ - 📊 Interactive table with advanced sorting and filtering
23
+ - 🔍 Semantic model search
24
+ - 📌 Pin models for comparison
25
+ - 📱 Responsive and modern interface
26
+ - 🎨 Dark/Light mode
27
+ - ⚡️ Optimized performance with virtualization
28
+
29
+ ## Architecture
30
+
31
+ The project is split into two main parts:
32
+
33
+ ### Frontend (React)
34
+
35
+ ```
36
+ frontend/
37
+ ├── src/
38
+ │ ├── components/ # Reusable UI components
39
+ │ ├── pages/ # Application pages
40
+ │ ├── hooks/ # Custom React hooks
41
+ │ ├── context/ # React contexts
42
+ │ └── constants/ # Constants and configurations
43
+ ├── public/ # Static assets
44
+ └── server.js # Express server for production
45
+ ```
46
+
47
+ ### Backend (FastAPI)
48
+
49
+ ```
50
+ backend/
51
+ ├── app/
52
+ │ ├── api/ # API router and endpoints
53
+ │ │ └── endpoints/ # Specific API endpoints
54
+ │ ├── core/ # Core functionality
55
+ │ ├── config/ # Configuration
56
+ │ └── services/ # Business logic services
57
+ │ ├── leaderboard.py
58
+ │ ├── models.py
59
+ │ ├── votes.py
60
+ │ └── hf_service.py
61
+ └── utils/ # Utility functions
62
+ ```
63
+
64
+ ## Technologies
65
+
66
+ ### Frontend
67
+
68
+ - React
69
+ - Material-UI
70
+ - TanStack Table & Virtual
71
+ - Express.js
72
+
73
+ ### Backend
74
+
75
+ - FastAPI
76
+ - Hugging Face API
77
+ - Docker
78
+
79
+ ## Development
80
+
81
+ The application is containerized using Docker and can be run using:
82
+
83
+ ```bash
84
+ docker-compose up
85
+ ```
app.py DELETED
@@ -1,492 +0,0 @@
1
- import logging
2
- import time
3
- import schedule
4
- import datetime
5
- import gradio as gr
6
- from threading import Thread
7
- import datasets
8
- from huggingface_hub import snapshot_download, WebhooksServer, WebhookPayload, RepoCard
9
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
10
- from apscheduler.schedulers.background import BackgroundScheduler
11
-
12
- # Start ephemeral Spaces on PRs (see config in README.md)
13
- from gradio_space_ci.webhook import IS_EPHEMERAL_SPACE, SPACE_ID, configure_space_ci
14
-
15
- from src.display.about import (
16
- CITATION_BUTTON_LABEL,
17
- CITATION_BUTTON_TEXT,
18
- EVALUATION_QUEUE_TEXT,
19
- INTRODUCTION_TEXT,
20
- TITLE,
21
- )
22
- from src.display.css_html_js import custom_css
23
- from src.display.utils import (
24
- BENCHMARK_COLS,
25
- COLS,
26
- EVAL_COLS,
27
- EVAL_TYPES,
28
- AutoEvalColumn,
29
- ModelType,
30
- Precision,
31
- WeightType,
32
- fields,
33
- EvalQueueColumn
34
- )
35
- from src.envs import (
36
- API,
37
- EVAL_REQUESTS_PATH,
38
- AGGREGATED_REPO,
39
- HF_TOKEN,
40
- QUEUE_REPO,
41
- REPO_ID,
42
- VOTES_REPO,
43
- VOTES_PATH,
44
- HF_HOME,
45
- )
46
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
47
- from src.submission.submit import add_new_eval
48
- from src.voting.vote_system import VoteManager, run_scheduler
49
-
50
- # Configure logging
51
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
52
-
53
- # Start ephemeral Spaces on PRs (see config in README.md)
54
- from gradio_space_ci.webhook import IS_EPHEMERAL_SPACE, SPACE_ID, configure_space_ci
55
-
56
- # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
57
- # This controls whether a full initialization should be performed.
58
- DO_FULL_INIT = True # os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
59
- NEW_DATA_ON_LEADERBOARD = True
60
- LEADERBOARD_DF = None
61
-
62
- def restart_space():
63
- logging.info(f"Restarting space with repo ID: {REPO_ID}")
64
- try:
65
- # Check if new data is pending and download if necessary
66
- if NEW_DATA_ON_LEADERBOARD:
67
- logging.info("Fetching latest leaderboard data before restart.")
68
- get_latest_data_leaderboard()
69
-
70
- # Now restart the space
71
- API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
72
- logging.info("Space restarted successfully.")
73
- except Exception as e:
74
- logging.error(f"Failed to restart space: {e}")
75
-
76
- def time_diff_wrapper(func):
77
- def wrapper(*args, **kwargs):
78
- start_time = time.time()
79
- result = func(*args, **kwargs)
80
- end_time = time.time()
81
- diff = end_time - start_time
82
- logging.info(f"Time taken for {func.__name__}: {diff} seconds")
83
- return result
84
-
85
- return wrapper
86
-
87
-
88
- @time_diff_wrapper
89
- def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
90
- """Download dataset with exponential backoff retries."""
91
- attempt = 0
92
- while attempt < max_attempts:
93
- try:
94
- logging.info(f"Downloading {repo_id} to {local_dir}")
95
- snapshot_download(
96
- repo_id=repo_id,
97
- local_dir=local_dir,
98
- repo_type=repo_type,
99
- tqdm_class=None,
100
- etag_timeout=30,
101
- max_workers=8,
102
- )
103
- logging.info("Download successful")
104
- return
105
- except Exception as e:
106
- wait_time = backoff_factor**attempt
107
- logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
108
- time.sleep(wait_time)
109
- attempt += 1
110
- raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
111
-
112
- def get_latest_data_leaderboard(leaderboard_initial_df=None):
113
- global NEW_DATA_ON_LEADERBOARD
114
- global LEADERBOARD_DF
115
- if NEW_DATA_ON_LEADERBOARD:
116
- logging.info("Leaderboard updated at reload!")
117
- try:
118
- leaderboard_dataset = datasets.load_dataset(
119
- AGGREGATED_REPO,
120
- "default",
121
- split="train",
122
- cache_dir=HF_HOME,
123
- download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD, # Always download fresh data
124
- verification_mode="no_checks"
125
- )
126
- LEADERBOARD_DF = get_leaderboard_df(
127
- leaderboard_dataset=leaderboard_dataset,
128
- cols=COLS,
129
- benchmark_cols=BENCHMARK_COLS,
130
- )
131
- logging.info("Leaderboard dataset successfully downloaded.")
132
- except Exception as e:
133
- logging.error(f"Failed to download leaderboard dataset: {e}")
134
- return
135
-
136
- # Reset the flag after successful download
137
- NEW_DATA_ON_LEADERBOARD = False
138
- else:
139
- LEADERBOARD_DF = leaderboard_initial_df
140
- logging.info("Using cached leaderboard dataset.")
141
- return LEADERBOARD_DF
142
-
143
-
144
- def get_latest_data_queue():
145
- eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
146
- return eval_queue_dfs
147
-
148
- def init_space():
149
- """Initializes the application space, loading only necessary data."""
150
- global NEW_DATA_ON_LEADERBOARD
151
- NEW_DATA_ON_LEADERBOARD = True # Ensure new data is always pulled on restart
152
-
153
- if DO_FULL_INIT:
154
- # These downloads only occur on full initialization
155
- try:
156
- download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
157
- download_dataset(VOTES_REPO, VOTES_PATH)
158
- except Exception:
159
- restart_space()
160
-
161
- # Always redownload the leaderboard DataFrame
162
- global LEADERBOARD_DF
163
- LEADERBOARD_DF = get_latest_data_leaderboard()
164
-
165
- # Evaluation queue DataFrame retrieval is independent of initialization detail level
166
- eval_queue_dfs = get_latest_data_queue()
167
-
168
- return LEADERBOARD_DF, eval_queue_dfs
169
-
170
- # Initialize VoteManager
171
- vote_manager = VoteManager(VOTES_PATH, EVAL_REQUESTS_PATH, VOTES_REPO)
172
-
173
-
174
- # Schedule the upload_votes method to run every 15 minutes
175
- schedule.every(15).minutes.do(vote_manager.upload_votes)
176
-
177
- # Start the scheduler in a separate thread
178
- scheduler_thread = Thread(target=run_scheduler, args=(vote_manager,), daemon=True)
179
- scheduler_thread.start()
180
-
181
- # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
182
- # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
183
- LEADERBOARD_DF, eval_queue_dfs = init_space()
184
- finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
185
-
186
-
187
- def init_leaderboard(dataframe):
188
- if dataframe is None or dataframe.empty:
189
- raise ValueError("Leaderboard DataFrame is empty or None.")
190
- return Leaderboard(
191
- value=dataframe,
192
- datatype=[c.type for c in fields(AutoEvalColumn)],
193
- select_columns=SelectColumns(
194
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
195
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
196
- label="Select Columns to Display:",
197
- ),
198
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.fullname.name, AutoEvalColumn.license.name],
199
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
200
- filter_columns=[
201
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
202
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
203
- ColumnFilter(
204
- AutoEvalColumn.params.name,
205
- type="slider",
206
- min=0.01,
207
- max=150,
208
- label="Select the number of parameters (B)",
209
- ),
210
- ColumnFilter(
211
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
212
- ),
213
- ColumnFilter(
214
- AutoEvalColumn.merged.name, type="boolean", label="Merge/MoErge", default=False
215
- ),
216
- ColumnFilter(AutoEvalColumn.moe.name, type="boolean", label="MoE", default=False),
217
- ColumnFilter(AutoEvalColumn.not_flagged.name, type="boolean", label="Flagged", default=True),
218
- ColumnFilter(AutoEvalColumn.official_providers.name, type="boolean", label="Show only official providers", default=False),
219
- ],
220
- bool_checkboxgroup_label="Hide models",
221
- interactive=False,
222
- )
223
-
224
- main_block = gr.Blocks(css=custom_css)
225
- with main_block:
226
- with gr.Row(elem_id="header-row"):
227
- gr.HTML(TITLE)
228
-
229
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
230
-
231
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
232
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
233
- leaderboard = init_leaderboard(LEADERBOARD_DF)
234
-
235
- with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=5):
236
- with gr.Column():
237
- with gr.Row():
238
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
239
-
240
- with gr.Row():
241
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
242
- login_button = gr.LoginButton(elem_id="oauth-button")
243
-
244
- with gr.Row():
245
- with gr.Column():
246
- model_name_textbox = gr.Textbox(label="Model name")
247
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="latest")
248
- with gr.Row():
249
- model_type = gr.Dropdown(
250
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
251
- label="Model type",
252
- multiselect=False,
253
- value=ModelType.FT.to_str(" : "),
254
- interactive=True,
255
- )
256
- chat_template_toggle = gr.Checkbox(
257
- label="Use chat template",
258
- value=False,
259
- info="Is your model a chat model?",
260
- )
261
-
262
- with gr.Column():
263
- precision = gr.Dropdown(
264
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
265
- label="Precision",
266
- multiselect=False,
267
- value="float16",
268
- interactive=True,
269
- )
270
- weight_type = gr.Dropdown(
271
- choices=[i.value.name for i in WeightType],
272
- label="Weights type",
273
- multiselect=False,
274
- value=WeightType.Original.value.name,
275
- interactive=True,
276
- )
277
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)", interactive=False)
278
-
279
- with gr.Column():
280
- with gr.Accordion(
281
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
282
- open=False,
283
- ):
284
- with gr.Row():
285
- finished_eval_table = gr.components.Dataframe(
286
- value=finished_eval_queue_df,
287
- headers=EVAL_COLS,
288
- datatype=EVAL_TYPES,
289
- row_count=5,
290
- interactive=False,
291
- )
292
- with gr.Accordion(
293
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
294
- open=False,
295
- ):
296
- with gr.Row():
297
- running_eval_table = gr.components.Dataframe(
298
- value=running_eval_queue_df,
299
- headers=EVAL_COLS,
300
- datatype=EVAL_TYPES,
301
- row_count=5,
302
- interactive=False,
303
- )
304
-
305
- with gr.Accordion(
306
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
307
- open=False,
308
- ):
309
- with gr.Row():
310
- pending_eval_table = gr.components.Dataframe(
311
- value=pending_eval_queue_df,
312
- headers=EVAL_COLS,
313
- datatype=EVAL_TYPES,
314
- row_count=5,
315
- interactive=False,
316
- )
317
-
318
- submit_button = gr.Button("Submit Eval")
319
- submission_result = gr.Markdown()
320
-
321
- # The chat template checkbox update function
322
- def update_chat_checkbox(model_type_value):
323
- return ModelType.from_str(model_type_value) == ModelType.chat
324
-
325
- model_type.change(
326
- fn=update_chat_checkbox,
327
- inputs=[model_type], # Pass the current checkbox value
328
- outputs=chat_template_toggle,
329
- )
330
-
331
- # The base_model_name_textbox interactivity and value reset function
332
- def update_base_model_name_textbox(weight_type_value):
333
- # Convert the dropdown value back to the corresponding WeightType Enum
334
- weight_type_enum = WeightType[weight_type_value]
335
-
336
- # Determine if the textbox should be interactive
337
- interactive = weight_type_enum in [WeightType.Adapter, WeightType.Delta]
338
-
339
- # Reset the value if weight type is "Original"
340
- reset_value = "" if not interactive else None
341
-
342
- return gr.update(interactive=interactive, value=reset_value)
343
-
344
- weight_type.change(
345
- fn=update_base_model_name_textbox,
346
- inputs=[weight_type],
347
- outputs=[base_model_name_textbox],
348
- )
349
-
350
- submit_button.click(
351
- add_new_eval,
352
- [
353
- model_name_textbox,
354
- base_model_name_textbox,
355
- revision_name_textbox,
356
- precision,
357
- weight_type,
358
- model_type,
359
- chat_template_toggle,
360
- ],
361
- submission_result,
362
- )
363
-
364
- # Ensure the values in 'pending_eval_queue_df' are correct and ready for the DataFrame component
365
- with gr.TabItem("🆙 Model Vote"):
366
- with gr.Row():
367
- gr.Markdown(
368
- "## Vote for the models which should be evaluated first! \nYou'll need to sign in with the button above first. All votes are recorded.",
369
- elem_classes="markdown-text"
370
- )
371
- login_button = gr.LoginButton(elem_id="oauth-button")
372
-
373
-
374
- with gr.Row():
375
- pending_models = pending_eval_queue_df[EvalQueueColumn.model_name.name].to_list()
376
-
377
- with gr.Column():
378
- selected_model = gr.Dropdown(
379
- choices=pending_models,
380
- label="Models",
381
- multiselect=False,
382
- value="str",
383
- interactive=True,
384
- )
385
-
386
- vote_button = gr.Button("Vote", variant="primary")
387
-
388
- with gr.Row():
389
- with gr.Accordion(
390
- f"Available models pending ({len(pending_eval_queue_df)})",
391
- open=True,
392
- ):
393
- with gr.Row():
394
- pending_eval_table_votes = gr.components.Dataframe(
395
- value=vote_manager.create_request_vote_df(
396
- pending_eval_queue_df
397
- ),
398
- headers=EVAL_COLS,
399
- datatype=EVAL_TYPES,
400
- row_count=5,
401
- interactive=False
402
- )
403
-
404
- # Set the click event for the vote button
405
- vote_button.click(
406
- vote_manager.add_vote,
407
- inputs=[selected_model, pending_eval_table],
408
- outputs=[pending_eval_table_votes]
409
- )
410
-
411
-
412
- with gr.Row():
413
- with gr.Accordion("📙 Citation", open=False):
414
- citation_button = gr.Textbox(
415
- value=CITATION_BUTTON_TEXT,
416
- label=CITATION_BUTTON_LABEL,
417
- lines=20,
418
- elem_id="citation-button",
419
- show_copy_button=True,
420
- )
421
-
422
- main_block.load(fn=get_latest_data_leaderboard, inputs=[leaderboard], outputs=[leaderboard])
423
- leaderboard.change(fn=get_latest_data_queue, inputs=None, outputs=[finished_eval_table, running_eval_table, pending_eval_table])
424
- pending_eval_table.change(fn=vote_manager.create_request_vote_df, inputs=[pending_eval_table], outputs=[pending_eval_table_votes])
425
-
426
- main_block.queue(default_concurrency_limit=40)
427
-
428
-
429
- def enable_space_ci_and_return_server(ui: gr.Blocks) -> WebhooksServer:
430
- # Taken from https://huggingface.co/spaces/Wauplin/gradio-space-ci/blob/075119aee75ab5e7150bf0814eec91c83482e790/src/gradio_space_ci/webhook.py#L61
431
- # Compared to original, this one do not monkeypatch Gradio which allows us to define more webhooks.
432
- # ht to Lucain!
433
- if SPACE_ID is None:
434
- print("Not in a Space: Space CI disabled.")
435
- return WebhooksServer(ui=main_block)
436
-
437
- if IS_EPHEMERAL_SPACE:
438
- print("In an ephemeral Space: Space CI disabled.")
439
- return WebhooksServer(ui=main_block)
440
-
441
- card = RepoCard.load(repo_id_or_path=SPACE_ID, repo_type="space")
442
- config = card.data.get("space_ci", {})
443
- print(f"Enabling Space CI with config from README: {config}")
444
-
445
- return configure_space_ci(
446
- blocks=ui,
447
- trusted_authors=config.get("trusted_authors"),
448
- private=config.get("private", "auto"),
449
- variables=config.get("variables", "auto"),
450
- secrets=config.get("secrets"),
451
- hardware=config.get("hardware"),
452
- storage=config.get("storage"),
453
- )
454
-
455
- # Create webhooks server (with CI url if in Space and not ephemeral)
456
- webhooks_server = enable_space_ci_and_return_server(ui=main_block)
457
-
458
- # Add webhooks
459
- @webhooks_server.add_webhook
460
- def update_leaderboard(payload: WebhookPayload) -> None:
461
- """Redownloads the leaderboard dataset each time it updates"""
462
- if payload.repo.type == "dataset" and payload.event.action == "update":
463
- global NEW_DATA_ON_LEADERBOARD
464
- logging.info("New data detected, downloading updated leaderboard dataset.")
465
-
466
- # Mark the flag for new data
467
- NEW_DATA_ON_LEADERBOARD = True
468
-
469
- # Now actually download the latest data immediately
470
- get_latest_data_leaderboard()
471
-
472
- # The below code is not used at the moment, as we can manage the queue file locally
473
- LAST_UPDATE_QUEUE = datetime.datetime.now()
474
- @webhooks_server.add_webhook
475
- def update_queue(payload: WebhookPayload) -> None:
476
- """Redownloads the queue dataset each time it updates"""
477
- if payload.repo.type == "dataset" and payload.event.action == "update":
478
- current_time = datetime.datetime.now()
479
- global LAST_UPDATE_QUEUE
480
- if current_time - LAST_UPDATE_QUEUE > datetime.timedelta(minutes=10):
481
- print("Would have updated the queue")
482
- # We only redownload is last update was more than 10 minutes ago, as the queue is
483
- # updated regularly and heavy to download
484
- download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
485
- LAST_UPDATE_QUEUE = datetime.datetime.now()
486
-
487
- webhooks_server.launch()
488
-
489
- scheduler = BackgroundScheduler()
490
- scheduler.add_job(restart_space, "interval", hours=1) # Restart every 1h
491
- logging.info("Scheduler initialized to restart space every 1 hour.")
492
- scheduler.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/Dockerfile.dev ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install required system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Install poetry
11
+ RUN pip install poetry
12
+
13
+ # Copy Poetry configuration files
14
+ COPY pyproject.toml poetry.lock* ./
15
+
16
+ # Install dependencies
17
+ RUN poetry config virtualenvs.create false && \
18
+ poetry install --no-interaction --no-ansi --no-root
19
+
20
+ # Environment variables configuration for logs
21
+ ENV PYTHONUNBUFFERED=1
22
+ ENV LOG_LEVEL=INFO
23
+
24
+ # In dev, mount volume directly
25
+ CMD ["uvicorn", "app.asgi:app", "--host", "0.0.0.0", "--port", "7860", "--reload", "--log-level", "warning", "--no-access-log"]
backend/README.md ADDED
@@ -0,0 +1,352 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Backend - Open LLM Leaderboard 🏆
2
+
3
+ FastAPI backend for the Open LLM Leaderboard. This service is part of a larger architecture that includes a React frontend. For complete project installation, see the [main README](../README.md).
4
+
5
+ ## ✨ Features
6
+
7
+ - 📊 REST API for LLM models leaderboard management
8
+ - 🗳️ Voting and ranking system
9
+ - 🔄 HuggingFace Hub integration
10
+ - 🚀 Caching and performance optimizations
11
+
12
+ ## 🏗 Architecture
13
+
14
+ ```mermaid
15
+ flowchart TD
16
+ Client(["**Frontend**<br><br>React Application"]) --> API["**API Server**<br><br>FastAPI REST Endpoints"]
17
+
18
+ subgraph Backend
19
+ API --> Core["**Core Layer**<br><br>• Middleware<br>• Cache<br>• Rate Limiting"]
20
+ Core --> Services["**Services Layer**<br><br>• Business Logic<br>• Data Processing"]
21
+
22
+ subgraph Services Layer
23
+ Services --> Models["**Model Service**<br><br>• Model Submission<br>• Evaluation Pipeline"]
24
+ Services --> Votes["**Vote Service**<br><br>• Vote Management<br>• Data Synchronization"]
25
+ Services --> Board["**Leaderboard Service**<br><br>• Rankings<br>• Performance Metrics"]
26
+ end
27
+
28
+ Models --> Cache["**Cache Layer**<br><br>• In-Memory Store<br>• Auto Invalidation"]
29
+ Votes --> Cache
30
+ Board --> Cache
31
+
32
+ Models --> HF["**HuggingFace Hub**<br><br>• Models Repository<br>• Datasets Access"]
33
+ Votes --> HF
34
+ Board --> HF
35
+ end
36
+
37
+ style Client fill:#f9f,stroke:#333,stroke-width:2px
38
+ style Models fill:#bbf,stroke:#333,stroke-width:2px
39
+ style Votes fill:#bbf,stroke:#333,stroke-width:2px
40
+ style Board fill:#bbf,stroke:#333,stroke-width:2px
41
+ style HF fill:#bfb,stroke:#333,stroke-width:2px
42
+ ```
43
+
44
+ ## 🛠️ HuggingFace Datasets
45
+
46
+ The application uses several datasets on the HuggingFace Hub:
47
+
48
+ ### 1. Requests Dataset (`{HF_ORGANIZATION}/requests`)
49
+
50
+ - **Operations**:
51
+ - 📤 `POST /api/models/submit`: Adds a JSON file for each new model submission
52
+ - 📥 `GET /api/models/status`: Reads files to get models status
53
+ - **Format**: One JSON file per model with submission details
54
+ - **Updates**: On each new model submission
55
+
56
+ ### 2. Votes Dataset (`{HF_ORGANIZATION}/votes`)
57
+
58
+ - **Operations**:
59
+ - 📤 `POST /api/votes/{model_id}`: Adds a new vote
60
+ - 📥 `GET /api/votes/model/{provider}/{model}`: Reads model votes
61
+ - 📥 `GET /api/votes/user/{user_id}`: Reads user votes
62
+ - **Format**: JSONL with one vote per line
63
+ - **Sync**: Bidirectional between local cache and Hub
64
+
65
+ ### 3. Contents Dataset (`{HF_ORGANIZATION}/contents`)
66
+
67
+ - **Operations**:
68
+ - 📥 `GET /api/leaderboard`: Reads raw data
69
+ - 📥 `GET /api/leaderboard/formatted`: Reads and formats data
70
+ - **Format**: Main dataset containing all scores and metrics
71
+ - **Updates**: Automatic after model evaluations
72
+
73
+ ### 4. Maintainers Highlight Dataset (`{HF_ORGANIZATION}/maintainers-highlight`)
74
+
75
+ - **Operations**:
76
+ - 📥 Read-only access for highlighted models
77
+ - **Format**: List of models selected by maintainers
78
+ - **Updates**: Manual by maintainers
79
+
80
+ ## 🛠 Local Development
81
+
82
+ ### Prerequisites
83
+
84
+ - Python 3.9+
85
+ - [Poetry](https://python-poetry.org/docs/#installation)
86
+
87
+ ### Standalone Installation (without Docker)
88
+
89
+ ```bash
90
+ # Install dependencies
91
+ poetry install
92
+
93
+ # Setup configuration
94
+ cp .env.example .env
95
+
96
+ # Start development server
97
+ poetry run uvicorn app.asgi:app --host 0.0.0.0 --port 7860 --reload
98
+ ```
99
+
100
+ Server will be available at http://localhost:7860
101
+
102
+ ## ⚙️ Configuration
103
+
104
+ | Variable | Description | Default |
105
+ | ------------ | ------------------------------------ | ----------- |
106
+ | ENVIRONMENT | Environment (development/production) | development |
107
+ | HF_TOKEN | HuggingFace authentication token | - |
108
+ | PORT | Server port | 7860 |
109
+ | LOG_LEVEL | Logging level (INFO/DEBUG/WARNING) | INFO |
110
+ | CORS_ORIGINS | Allowed CORS origins | ["*"] |
111
+ | CACHE_TTL | Cache Time To Live in seconds | 300 |
112
+
113
+ ## 🔧 Middleware
114
+
115
+ The backend uses several middleware layers for optimal performance and security:
116
+
117
+ - **CORS Middleware**: Handles Cross-Origin Resource Sharing
118
+ - **GZIP Middleware**: Compresses responses > 500 bytes
119
+ - **Rate Limiting**: Prevents API abuse
120
+ - **Caching**: In-memory caching with automatic invalidation
121
+
122
+ ## 📝 Logging
123
+
124
+ The application uses a structured logging system with:
125
+
126
+ - Formatted console output
127
+ - Different log levels per component
128
+ - Request/Response logging
129
+ - Performance metrics
130
+ - Error tracking
131
+
132
+ ## 📁 File Structure
133
+
134
+ ```
135
+ backend/
136
+ ├── app/ # Source code
137
+ │ ├── api/ # Routes and endpoints
138
+ │ │ └── endpoints/ # Endpoint handlers
139
+ │ ├── core/ # Configurations
140
+ │ ├── services/ # Business logic
141
+ │ └── utils/ # Utilities
142
+ └── tests/ # Tests
143
+ ```
144
+
145
+ ## 📚 API
146
+
147
+ Swagger documentation available at http://localhost:7860/docs
148
+
149
+ ### Main Endpoints & Data Structures
150
+
151
+ #### Leaderboard
152
+
153
+ - `GET /api/leaderboard/formatted` - Formatted data with computed fields and metadata
154
+
155
+ ```typescript
156
+ Response {
157
+ models: [{
158
+ id: string, // eval_name
159
+ model: {
160
+ name: string, // fullname
161
+ sha: string, // Model sha
162
+ precision: string, // e.g. "fp16", "int8"
163
+ type: string, // e.g. "fined-tuned-on-domain-specific-dataset"
164
+ weight_type: string,
165
+ architecture: string,
166
+ average_score: number,
167
+ has_chat_template: boolean
168
+ },
169
+ evaluations: {
170
+ ifeval: {
171
+ name: "IFEval",
172
+ value: number, // Raw score
173
+ normalized_score: number
174
+ },
175
+ bbh: {
176
+ name: "BBH",
177
+ value: number,
178
+ normalized_score: number
179
+ },
180
+ math: {
181
+ name: "MATH Level 5",
182
+ value: number,
183
+ normalized_score: number
184
+ },
185
+ gpqa: {
186
+ name: "GPQA",
187
+ value: number,
188
+ normalized_score: number
189
+ },
190
+ musr: {
191
+ name: "MUSR",
192
+ value: number,
193
+ normalized_score: number
194
+ },
195
+ mmlu_pro: {
196
+ name: "MMLU-PRO",
197
+ value: number,
198
+ normalized_score: number
199
+ }
200
+ },
201
+ features: {
202
+ is_not_available_on_hub: boolean,
203
+ is_merged: boolean,
204
+ is_moe: boolean,
205
+ is_flagged: boolean,
206
+ is_highlighted_by_maintainer: boolean
207
+ },
208
+ metadata: {
209
+ upload_date: string,
210
+ submission_date: string,
211
+ generation: string,
212
+ base_model: string,
213
+ hub_license: string,
214
+ hub_hearts: number,
215
+ params_billions: number,
216
+ co2_cost: number // CO₂ cost in kg
217
+ }
218
+ }]
219
+ }
220
+ ```
221
+
222
+ - `GET /api/leaderboard` - Raw data from the HuggingFace dataset
223
+ ```typescript
224
+ Response {
225
+ models: [{
226
+ eval_name: string,
227
+ Precision: string,
228
+ Type: string,
229
+ "Weight type": string,
230
+ Architecture: string,
231
+ Model: string,
232
+ fullname: string,
233
+ "Model sha": string,
234
+ "Average ⬆️": number,
235
+ "Hub License": string,
236
+ "Hub ❤️": number,
237
+ "#Params (B)": number,
238
+ "Available on the hub": boolean,
239
+ Not_Merged: boolean,
240
+ MoE: boolean,
241
+ Flagged: boolean,
242
+ "Chat Template": boolean,
243
+ "CO₂ cost (kg)": number,
244
+ "IFEval Raw": number,
245
+ IFEval: number,
246
+ "BBH Raw": number,
247
+ BBH: number,
248
+ "MATH Lvl 5 Raw": number,
249
+ "MATH Lvl 5": number,
250
+ "GPQA Raw": number,
251
+ GPQA: number,
252
+ "MUSR Raw": number,
253
+ MUSR: number,
254
+ "MMLU-PRO Raw": number,
255
+ "MMLU-PRO": number,
256
+ "Maintainer's Highlight": boolean,
257
+ "Upload To Hub Date": string,
258
+ "Submission Date": string,
259
+ Generation: string,
260
+ "Base Model": string
261
+ }]
262
+ }
263
+ ```
264
+
265
+ #### Models
266
+
267
+ - `GET /api/models/status` - Get all models grouped by status
268
+ ```typescript
269
+ Response {
270
+ pending: [{
271
+ name: string,
272
+ submitter: string,
273
+ revision: string,
274
+ wait_time: string,
275
+ submission_time: string,
276
+ status: "PENDING" | "EVALUATING" | "FINISHED",
277
+ precision: string
278
+ }],
279
+ evaluating: Array<Model>,
280
+ finished: Array<Model>
281
+ }
282
+ ```
283
+ - `GET /api/models/pending` - Get pending models only
284
+ - `POST /api/models/submit` - Submit model
285
+
286
+ ```typescript
287
+ Request {
288
+ user_id: string,
289
+ model_id: string,
290
+ base_model?: string,
291
+ precision?: string,
292
+ model_type: string
293
+ }
294
+
295
+ Response {
296
+ status: string,
297
+ message: string
298
+ }
299
+ ```
300
+
301
+ - `GET /api/models/{model_id}/status` - Get model status
302
+
303
+ #### Votes
304
+
305
+ - `POST /api/votes/{model_id}` - Vote
306
+
307
+ ```typescript
308
+ Request {
309
+ vote_type: "up" | "down",
310
+ user_id: string // HuggingFace username
311
+ }
312
+
313
+ Response {
314
+ success: boolean,
315
+ message: string
316
+ }
317
+ ```
318
+
319
+ - `GET /api/votes/model/{provider}/{model}` - Get model votes
320
+ ```typescript
321
+ Response {
322
+ total_votes: number,
323
+ up_votes: number,
324
+ down_votes: number
325
+ }
326
+ ```
327
+ - `GET /api/votes/user/{user_id}` - Get user votes
328
+ ```typescript
329
+ Response Array<{
330
+ model_id: string,
331
+ vote_type: string,
332
+ timestamp: string
333
+ }>
334
+ ```
335
+
336
+ ## 🔒 Authentication
337
+
338
+ The backend uses HuggingFace token-based authentication for secure API access. Make sure to:
339
+
340
+ 1. Set your HF_TOKEN in the .env file
341
+ 2. Include the token in API requests via Bearer authentication
342
+ 3. Keep your token secure and never commit it to version control
343
+
344
+ ## 🚀 Performance
345
+
346
+ The backend implements several optimizations:
347
+
348
+ - In-memory caching with configurable TTL (Time To Live)
349
+ - Batch processing for model evaluations
350
+ - Rate limiting for API endpoints
351
+ - Efficient database queries with proper indexing
352
+ - Automatic cache invalidation for votes
backend/app/api/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """
2
+ API package initialization
3
+ """
4
+
5
+ __all__ = ["endpoints"]
backend/app/api/dependencies.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Depends, HTTPException
2
+ import logging
3
+ from app.services.models import ModelService
4
+ from app.services.votes import VoteService
5
+ from app.utils.logging import LogFormatter
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ model_service = ModelService()
10
+ vote_service = VoteService()
11
+
12
+ async def get_model_service() -> ModelService:
13
+ """Dependency to get ModelService instance"""
14
+ try:
15
+ logger.info(LogFormatter.info("Initializing model service dependency"))
16
+ await model_service.initialize()
17
+ logger.info(LogFormatter.success("Model service initialized"))
18
+ return model_service
19
+ except Exception as e:
20
+ error_msg = "Failed to initialize model service"
21
+ logger.error(LogFormatter.error(error_msg, e))
22
+ raise HTTPException(status_code=500, detail=str(e))
23
+
24
+ async def get_vote_service() -> VoteService:
25
+ """Dependency to get VoteService instance"""
26
+ try:
27
+ logger.info(LogFormatter.info("Initializing vote service dependency"))
28
+ await vote_service.initialize()
29
+ logger.info(LogFormatter.success("Vote service initialized"))
30
+ return vote_service
31
+ except Exception as e:
32
+ error_msg = "Failed to initialize vote service"
33
+ logger.error(LogFormatter.error(error_msg, e))
34
+ raise HTTPException(status_code=500, detail=str(e))
backend/app/api/endpoints/leaderboard.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from typing import List, Dict, Any
3
+ from app.services.leaderboard import LeaderboardService
4
+ from app.core.fastapi_cache import cached, build_cache_key
5
+ import logging
6
+ from app.utils.logging import LogFormatter
7
+
8
+ logger = logging.getLogger(__name__)
9
+ router = APIRouter()
10
+ leaderboard_service = LeaderboardService()
11
+
12
+ def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
13
+ """Build cache key for leaderboard data"""
14
+ key_type = "raw" if func.__name__ == "get_leaderboard" else "formatted"
15
+ key = build_cache_key(namespace, key_type)
16
+ logger.debug(LogFormatter.info(f"Built leaderboard cache key: {key}"))
17
+ return key
18
+
19
+ @router.get("")
20
+ @cached(expire=300, key_builder=leaderboard_key_builder)
21
+ async def get_leaderboard() -> List[Dict[str, Any]]:
22
+ """
23
+ Get raw leaderboard data
24
+ Response will be automatically GZIP compressed if size > 500 bytes
25
+ """
26
+ try:
27
+ logger.info(LogFormatter.info("Fetching raw leaderboard data"))
28
+ data = await leaderboard_service.fetch_raw_data()
29
+ logger.info(LogFormatter.success(f"Retrieved {len(data)} leaderboard entries"))
30
+ return data
31
+ except Exception as e:
32
+ logger.error(LogFormatter.error("Failed to fetch raw leaderboard data", e))
33
+ raise
34
+
35
+ @router.get("/formatted")
36
+ @cached(expire=300, key_builder=leaderboard_key_builder)
37
+ async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
38
+ """
39
+ Get formatted leaderboard data with restructured objects
40
+ Response will be automatically GZIP compressed if size > 500 bytes
41
+ """
42
+ try:
43
+ logger.info(LogFormatter.info("Fetching formatted leaderboard data"))
44
+ data = await leaderboard_service.get_formatted_data()
45
+ logger.info(LogFormatter.success(f"Retrieved {len(data)} formatted entries"))
46
+ return data
47
+ except Exception as e:
48
+ logger.error(LogFormatter.error("Failed to fetch formatted leaderboard data", e))
49
+ raise
backend/app/api/endpoints/models.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Depends
2
+ from typing import Dict, Any, List
3
+ import logging
4
+ from app.services.models import ModelService
5
+ from app.api.dependencies import get_model_service
6
+ from app.core.fastapi_cache import cached
7
+ from app.utils.logging import LogFormatter
8
+
9
+ logger = logging.getLogger(__name__)
10
+ router = APIRouter(tags=["models"])
11
+
12
+ @router.get("/status")
13
+ @cached(expire=300)
14
+ async def get_models_status(
15
+ model_service: ModelService = Depends(get_model_service)
16
+ ) -> Dict[str, List[Dict[str, Any]]]:
17
+ """Get all models grouped by status"""
18
+ try:
19
+ logger.info(LogFormatter.info("Fetching status for all models"))
20
+ result = await model_service.get_models()
21
+ stats = {
22
+ status: len(models) for status, models in result.items()
23
+ }
24
+ for line in LogFormatter.stats(stats, "Models by Status"):
25
+ logger.info(line)
26
+ return result
27
+ except Exception as e:
28
+ logger.error(LogFormatter.error("Failed to get models status", e))
29
+ raise HTTPException(status_code=500, detail=str(e))
30
+
31
+ @router.get("/pending")
32
+ @cached(expire=60)
33
+ async def get_pending_models(
34
+ model_service: ModelService = Depends(get_model_service)
35
+ ) -> List[Dict[str, Any]]:
36
+ """Get all models waiting for evaluation"""
37
+ try:
38
+ logger.info(LogFormatter.info("Fetching pending models"))
39
+ models = await model_service.get_models()
40
+ pending = models.get("pending", [])
41
+ logger.info(LogFormatter.success(f"Found {len(pending)} pending models"))
42
+ return pending
43
+ except Exception as e:
44
+ logger.error(LogFormatter.error("Failed to get pending models", e))
45
+ raise HTTPException(status_code=500, detail=str(e))
46
+
47
+ @router.post("/submit")
48
+ async def submit_model(
49
+ model_data: Dict[str, Any],
50
+ model_service: ModelService = Depends(get_model_service)
51
+ ) -> Dict[str, Any]:
52
+ try:
53
+ logger.info(LogFormatter.section("MODEL SUBMISSION"))
54
+
55
+ user_id = model_data.pop('user_id', None)
56
+ if not user_id:
57
+ error_msg = "user_id is required"
58
+ logger.error(LogFormatter.error("Validation failed", error_msg))
59
+ raise ValueError(error_msg)
60
+
61
+ # Log submission details
62
+ submission_info = {
63
+ "Model_ID": model_data.get("model_id"),
64
+ "User": user_id,
65
+ "Base_Model": model_data.get("base_model"),
66
+ "Precision": model_data.get("precision"),
67
+ "Model_Type": model_data.get("model_type")
68
+ }
69
+ for line in LogFormatter.tree(submission_info, "Submission Details"):
70
+ logger.info(line)
71
+
72
+ result = await model_service.submit_model(model_data, user_id)
73
+ logger.info(LogFormatter.success("Model submitted successfully"))
74
+ return result
75
+
76
+ except ValueError as e:
77
+ logger.error(LogFormatter.error("Invalid submission data", e))
78
+ raise HTTPException(status_code=400, detail=str(e))
79
+ except Exception as e:
80
+ logger.error(LogFormatter.error("Submission failed", e))
81
+ raise HTTPException(status_code=500, detail=str(e))
82
+
83
+ @router.get("/{model_id}/status")
84
+ async def get_model_status(
85
+ model_id: str,
86
+ model_service: ModelService = Depends(get_model_service)
87
+ ) -> Dict[str, Any]:
88
+ try:
89
+ logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
90
+ status = await model_service.get_model_status(model_id)
91
+
92
+ if status["status"] != "not_found":
93
+ logger.info(LogFormatter.success("Status found"))
94
+ for line in LogFormatter.tree(status, "Model Status"):
95
+ logger.info(line)
96
+ else:
97
+ logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
98
+
99
+ return status
100
+
101
+ except Exception as e:
102
+ logger.error(LogFormatter.error("Failed to get model status", e))
103
+ raise HTTPException(status_code=500, detail=str(e))
backend/app/api/endpoints/votes.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Query, Depends
2
+ from typing import Dict, Any, List
3
+ from app.services.votes import VoteService
4
+ from app.core.fastapi_cache import cached, build_cache_key, invalidate_cache_key
5
+ import logging
6
+ from app.utils.logging import LogFormatter
7
+
8
+ logger = logging.getLogger(__name__)
9
+ router = APIRouter()
10
+ vote_service = VoteService()
11
+
12
+ def model_votes_key_builder(func, namespace: str = "model_votes", **kwargs):
13
+ """Build cache key for model votes"""
14
+ provider = kwargs.get('provider')
15
+ model = kwargs.get('model')
16
+ key = build_cache_key(namespace, provider, model)
17
+ logger.debug(LogFormatter.info(f"Built model votes cache key: {key}"))
18
+ return key
19
+
20
+ def user_votes_key_builder(func, namespace: str = "user_votes", **kwargs):
21
+ """Build cache key for user votes"""
22
+ user_id = kwargs.get('user_id')
23
+ key = build_cache_key(namespace, user_id)
24
+ logger.debug(LogFormatter.info(f"Built user votes cache key: {key}"))
25
+ return key
26
+
27
+ @router.post("/{model_id:path}")
28
+ async def add_vote(
29
+ model_id: str,
30
+ vote_type: str = Query(..., description="Type of vote (up/down)"),
31
+ user_id: str = Query(..., description="HuggingFace username")
32
+ ) -> Dict[str, Any]:
33
+ try:
34
+ logger.info(LogFormatter.section("ADDING VOTE"))
35
+ stats = {
36
+ "Model": model_id,
37
+ "User": user_id,
38
+ "Type": vote_type
39
+ }
40
+ for line in LogFormatter.tree(stats, "Vote Details"):
41
+ logger.info(line)
42
+
43
+ await vote_service.initialize()
44
+ result = await vote_service.add_vote(model_id, user_id, vote_type)
45
+
46
+ # Invalidate affected caches
47
+ try:
48
+ logger.info(LogFormatter.subsection("CACHE INVALIDATION"))
49
+ provider, model = model_id.split('/', 1)
50
+
51
+ # Build and invalidate cache keys
52
+ model_cache_key = build_cache_key("model_votes", provider, model)
53
+ user_cache_key = build_cache_key("user_votes", user_id)
54
+
55
+ invalidate_cache_key(model_cache_key)
56
+ invalidate_cache_key(user_cache_key)
57
+
58
+ cache_stats = {
59
+ "Model_Cache": model_cache_key,
60
+ "User_Cache": user_cache_key
61
+ }
62
+ for line in LogFormatter.tree(cache_stats, "Invalidated Caches"):
63
+ logger.info(line)
64
+
65
+ except Exception as e:
66
+ logger.error(LogFormatter.error("Failed to invalidate cache", e))
67
+
68
+ return result
69
+ except Exception as e:
70
+ logger.error(LogFormatter.error("Failed to add vote", e))
71
+ raise HTTPException(status_code=400, detail=str(e))
72
+
73
+ @router.get("/model/{provider}/{model}")
74
+ @cached(expire=60, key_builder=model_votes_key_builder)
75
+ async def get_model_votes(
76
+ provider: str,
77
+ model: str
78
+ ) -> Dict[str, Any]:
79
+ """Get all votes for a specific model"""
80
+ try:
81
+ logger.info(LogFormatter.info(f"Fetching votes for model: {provider}/{model}"))
82
+ await vote_service.initialize()
83
+ model_id = f"{provider}/{model}"
84
+ result = await vote_service.get_model_votes(model_id)
85
+ logger.info(LogFormatter.success(f"Found {result.get('total_votes', 0)} votes"))
86
+ return result
87
+ except Exception as e:
88
+ logger.error(LogFormatter.error("Failed to get model votes", e))
89
+ raise HTTPException(status_code=400, detail=str(e))
90
+
91
+ @router.get("/user/{user_id}")
92
+ @cached(expire=60, key_builder=user_votes_key_builder)
93
+ async def get_user_votes(
94
+ user_id: str
95
+ ) -> List[Dict[str, Any]]:
96
+ """Get all votes from a specific user"""
97
+ try:
98
+ logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
99
+ await vote_service.initialize()
100
+ votes = await vote_service.get_user_votes(user_id)
101
+ logger.info(LogFormatter.success(f"Found {len(votes)} votes"))
102
+ return votes
103
+ except Exception as e:
104
+ logger.error(LogFormatter.error("Failed to get user votes", e))
105
+ raise HTTPException(status_code=400, detail=str(e))
backend/app/api/router.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+ from app.api.endpoints import leaderboard, votes, models
4
+
5
+ router = APIRouter()
6
+
7
+ router.include_router(leaderboard.router, prefix="/leaderboard", tags=["leaderboard"])
8
+ router.include_router(votes.router, prefix="/votes", tags=["votes"])
9
+ router.include_router(models.router, prefix="/models", tags=["models"])
backend/app/asgi.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ASGI entry point for the Open LLM Leaderboard API.
3
+ """
4
+ import os
5
+ import uvicorn
6
+ import logging
7
+ import logging.config
8
+ from fastapi import FastAPI
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from fastapi.middleware.gzip import GZipMiddleware
11
+ import sys
12
+
13
+ from app.api.router import router
14
+ from app.core.fastapi_cache import setup_cache
15
+ from app.utils.logging import LogFormatter
16
+ from app.config import hf_config
17
+
18
+ # Configure logging before anything else
19
+ LOGGING_CONFIG = {
20
+ "version": 1,
21
+ "disable_existing_loggers": True,
22
+ "formatters": {
23
+ "default": {
24
+ "format": "%(name)s - %(levelname)s - %(message)s",
25
+ }
26
+ },
27
+ "handlers": {
28
+ "default": {
29
+ "formatter": "default",
30
+ "class": "logging.StreamHandler",
31
+ "stream": "ext://sys.stdout",
32
+ }
33
+ },
34
+ "loggers": {
35
+ "uvicorn": {
36
+ "handlers": ["default"],
37
+ "level": "WARNING",
38
+ "propagate": False,
39
+ },
40
+ "uvicorn.error": {
41
+ "level": "WARNING",
42
+ "handlers": ["default"],
43
+ "propagate": False,
44
+ },
45
+ "uvicorn.access": {
46
+ "handlers": ["default"],
47
+ "level": "INFO",
48
+ "propagate": False,
49
+ },
50
+ "app": {
51
+ "handlers": ["default"],
52
+ "level": "INFO",
53
+ "propagate": False,
54
+ }
55
+ },
56
+ "root": {
57
+ "handlers": ["default"],
58
+ "level": "INFO",
59
+ }
60
+ }
61
+
62
+ # Apply logging configuration
63
+ logging.config.dictConfig(LOGGING_CONFIG)
64
+ logger = logging.getLogger("app")
65
+
66
+ # Create FastAPI application
67
+ app = FastAPI(
68
+ title="Open LLM Leaderboard",
69
+ version="1.0.0",
70
+ docs_url="/docs",
71
+ )
72
+
73
+ # Add CORS middleware
74
+ app.add_middleware(
75
+ CORSMiddleware,
76
+ allow_origins=["*"],
77
+ allow_credentials=True,
78
+ allow_methods=["*"],
79
+ allow_headers=["*"],
80
+ )
81
+
82
+ # Add GZIP compression
83
+ app.add_middleware(GZipMiddleware, minimum_size=500)
84
+
85
+ # Include API router
86
+ app.include_router(router, prefix="/api")
87
+
88
+ @app.on_event("startup")
89
+ async def startup_event():
90
+ """Initialize services on startup"""
91
+ logger.info("\n")
92
+ logger.info(LogFormatter.section("APPLICATION STARTUP"))
93
+
94
+ # Log HF configuration
95
+ logger.info(LogFormatter.section("HUGGING FACE CONFIGURATION"))
96
+ logger.info(LogFormatter.info(f"Organization: {hf_config.HF_ORGANIZATION}"))
97
+ logger.info(LogFormatter.info(f"Token Status: {'Present' if hf_config.HF_TOKEN else 'Missing'}"))
98
+ logger.info(LogFormatter.info(f"Using repositories:"))
99
+ logger.info(LogFormatter.info(f" - Queue: {hf_config.QUEUE_REPO}"))
100
+ logger.info(LogFormatter.info(f" - Aggregated: {hf_config.AGGREGATED_REPO}"))
101
+ logger.info(LogFormatter.info(f" - Votes: {hf_config.VOTES_REPO}"))
102
+ logger.info(LogFormatter.info(f" - Maintainers Highlight: {hf_config.MAINTAINERS_HIGHLIGHT_REPO}"))
103
+
104
+ # Setup cache
105
+ setup_cache()
106
+ logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))
backend/app/config/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """
2
+ Configuration module for the Open LLM Leaderboard backend.
3
+ All configuration values are imported from base.py to avoid circular dependencies.
4
+ """
5
+
6
+ from .base import *
backend/app/config/base.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
+ # Server configuration
5
+ HOST = "0.0.0.0"
6
+ PORT = 7860
7
+ WORKERS = 4
8
+ RELOAD = True if os.environ.get("ENVIRONMENT") == "development" else False
9
+
10
+ # CORS configuration
11
+ ORIGINS = ["http://localhost:3000"] if os.getenv("ENVIRONMENT") == "development" else ["*"]
12
+
13
+ # Cache configuration
14
+ CACHE_TTL = int(os.environ.get("CACHE_TTL", 300)) # 5 minutes default
15
+
16
+ # Rate limiting
17
+ RATE_LIMIT_PERIOD = 7 # days
18
+ RATE_LIMIT_QUOTA = 5
19
+ HAS_HIGHER_RATE_LIMIT = []
20
+
21
+ # HuggingFace configuration
22
+ HF_TOKEN = os.environ.get("HF_TOKEN")
23
+ HF_ORGANIZATION = "open-llm-leaderboard"
24
+ API = {
25
+ "INFERENCE": "https://api-inference.huggingface.co/models",
26
+ "HUB": "https://huggingface.co"
27
+ }
28
+
29
+ # Cache paths
30
+ CACHE_ROOT = Path(os.environ.get("HF_HOME", ".cache"))
31
+ DATASETS_CACHE = CACHE_ROOT / "datasets"
32
+ MODELS_CACHE = CACHE_ROOT / "models"
33
+ VOTES_CACHE = CACHE_ROOT / "votes"
34
+ EVAL_CACHE = CACHE_ROOT / "eval-queue"
35
+
36
+ # Repository configuration
37
+ QUEUE_REPO = f"{HF_ORGANIZATION}/requests"
38
+ EVAL_REQUESTS_PATH = EVAL_CACHE / "eval_requests.jsonl"
backend/app/config/hf_config.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hugging Face configuration module
3
+ """
4
+ import os
5
+ import logging
6
+ from typing import Optional
7
+ from huggingface_hub import HfApi
8
+ from pathlib import Path
9
+ from app.core.cache import cache_config
10
+ from app.utils.logging import LogFormatter
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Organization or user who owns the datasets
15
+ HF_ORGANIZATION = "open-llm-leaderboard"
16
+ # HF_ORGANIZATION = "open-llm-leaderboard"
17
+
18
+ # Get HF token directly from environment
19
+ HF_TOKEN = os.environ.get("HF_TOKEN")
20
+ if not HF_TOKEN:
21
+ logger.warning("HF_TOKEN not found in environment variables. Some features may be limited.")
22
+
23
+ # Initialize HF API
24
+ API = HfApi(token=HF_TOKEN)
25
+
26
+ # Repository configuration
27
+ QUEUE_REPO = f"{HF_ORGANIZATION}/requests"
28
+ AGGREGATED_REPO = f"{HF_ORGANIZATION}/contents"
29
+ VOTES_REPO = f"{HF_ORGANIZATION}/votes"
30
+ MAINTAINERS_HIGHLIGHT_REPO = f"{HF_ORGANIZATION}/maintainers-highlight"
31
+
32
+ # File paths from cache config
33
+ VOTES_PATH = cache_config.votes_file
34
+ EVAL_REQUESTS_PATH = cache_config.eval_requests_file
35
+ MODEL_CACHE_DIR = cache_config.models_cache
backend/app/config/logging_config.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from tqdm import tqdm
4
+
5
+ def get_tqdm_handler():
6
+ """
7
+ Creates a special handler for tqdm that doesn't interfere with other logs.
8
+ """
9
+ class TqdmLoggingHandler(logging.Handler):
10
+ def emit(self, record):
11
+ try:
12
+ msg = self.format(record)
13
+ tqdm.write(msg)
14
+ self.flush()
15
+ except Exception:
16
+ self.handleError(record)
17
+
18
+ return TqdmLoggingHandler()
19
+
20
+ def setup_service_logger(service_name: str) -> logging.Logger:
21
+ """
22
+ Configure a specific logger for a given service.
23
+ """
24
+ logger = logging.getLogger(f"app.services.{service_name}")
25
+
26
+ # If the logger already has handlers, don't reconfigure it
27
+ if logger.handlers:
28
+ return logger
29
+
30
+ # Add tqdm handler for this service
31
+ tqdm_handler = get_tqdm_handler()
32
+ tqdm_handler.setFormatter(logging.Formatter('%(name)s - %(levelname)s - %(message)s'))
33
+ logger.addHandler(tqdm_handler)
34
+
35
+ # Don't propagate logs to parent loggers
36
+ logger.propagate = False
37
+
38
+ return logger
backend/app/core/cache.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from pathlib import Path
4
+ from datetime import timedelta
5
+ import logging
6
+ from app.utils.logging import LogFormatter
7
+ from app.config.base import (
8
+ CACHE_ROOT,
9
+ DATASETS_CACHE,
10
+ MODELS_CACHE,
11
+ VOTES_CACHE,
12
+ EVAL_CACHE,
13
+ CACHE_TTL
14
+ )
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ class CacheConfig:
19
+ def __init__(self):
20
+ # Get cache paths from config
21
+ self.cache_root = CACHE_ROOT
22
+ self.datasets_cache = DATASETS_CACHE
23
+ self.models_cache = MODELS_CACHE
24
+ self.votes_cache = VOTES_CACHE
25
+ self.eval_cache = EVAL_CACHE
26
+
27
+ # Specific files
28
+ self.votes_file = self.votes_cache / "votes_data.jsonl"
29
+ self.eval_requests_file = self.eval_cache / "eval_requests.jsonl"
30
+
31
+ # Cache TTL
32
+ self.cache_ttl = timedelta(seconds=CACHE_TTL)
33
+
34
+ self._initialize_cache_dirs()
35
+ self._setup_environment()
36
+
37
+ def _initialize_cache_dirs(self):
38
+ """Initialize all necessary cache directories"""
39
+ try:
40
+ logger.info(LogFormatter.section("CACHE INITIALIZATION"))
41
+
42
+ cache_dirs = {
43
+ "Root": self.cache_root,
44
+ "Datasets": self.datasets_cache,
45
+ "Models": self.models_cache,
46
+ "Votes": self.votes_cache,
47
+ "Eval": self.eval_cache
48
+ }
49
+
50
+ for name, cache_dir in cache_dirs.items():
51
+ cache_dir.mkdir(parents=True, exist_ok=True)
52
+ logger.info(LogFormatter.success(f"{name} cache directory: {cache_dir}"))
53
+
54
+ except Exception as e:
55
+ logger.error(LogFormatter.error("Failed to create cache directories", e))
56
+ raise
57
+
58
+ def _setup_environment(self):
59
+ """Configure HuggingFace environment variables"""
60
+ logger.info(LogFormatter.subsection("ENVIRONMENT SETUP"))
61
+
62
+ env_vars = {
63
+ "HF_HOME": str(self.cache_root),
64
+ "TRANSFORMERS_CACHE": str(self.models_cache),
65
+ "HF_DATASETS_CACHE": str(self.datasets_cache)
66
+ }
67
+
68
+ for var, value in env_vars.items():
69
+ os.environ[var] = value
70
+ logger.info(LogFormatter.info(f"Set {var}={value}"))
71
+
72
+ def get_cache_path(self, cache_type: str) -> Path:
73
+ """Returns the path for a specific cache type"""
74
+ cache_paths = {
75
+ "datasets": self.datasets_cache,
76
+ "models": self.models_cache,
77
+ "votes": self.votes_cache,
78
+ "eval": self.eval_cache
79
+ }
80
+ return cache_paths.get(cache_type, self.cache_root)
81
+
82
+ def flush_cache(self, cache_type: str = None):
83
+ """Flush specified cache or all caches if no type is specified"""
84
+ try:
85
+ if cache_type:
86
+ logger.info(LogFormatter.section(f"FLUSHING {cache_type.upper()} CACHE"))
87
+ cache_dir = self.get_cache_path(cache_type)
88
+ if cache_dir.exists():
89
+ stats = {
90
+ "Cache_Type": cache_type,
91
+ "Directory": str(cache_dir)
92
+ }
93
+ for line in LogFormatter.tree(stats, "Cache Details"):
94
+ logger.info(line)
95
+ shutil.rmtree(cache_dir)
96
+ cache_dir.mkdir(parents=True, exist_ok=True)
97
+ logger.info(LogFormatter.success("Cache cleared successfully"))
98
+ else:
99
+ logger.info(LogFormatter.section("FLUSHING ALL CACHES"))
100
+ for cache_type in ["datasets", "models", "votes", "eval"]:
101
+ self.flush_cache(cache_type)
102
+ logger.info(LogFormatter.success("All caches cleared successfully"))
103
+
104
+ except Exception as e:
105
+ logger.error(LogFormatter.error("Failed to flush cache", e))
106
+ raise
107
+
108
+ # Singleton instance of cache configuration
109
+ cache_config = CacheConfig()
backend/app/core/fastapi_cache.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi_cache import FastAPICache
2
+ from fastapi_cache.backends.inmemory import InMemoryBackend
3
+ from fastapi_cache.decorator import cache
4
+ from datetime import timedelta
5
+ from app.config import CACHE_TTL
6
+ import logging
7
+ from app.utils.logging import LogFormatter
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def setup_cache():
12
+ """Initialize FastAPI Cache with in-memory backend"""
13
+ FastAPICache.init(
14
+ backend=InMemoryBackend(),
15
+ prefix="fastapi-cache",
16
+ expire=CACHE_TTL
17
+ )
18
+ logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))
19
+
20
+ def invalidate_cache_key(key: str):
21
+ """Invalidate a specific cache key"""
22
+ try:
23
+ backend = FastAPICache.get_backend()
24
+ if hasattr(backend, 'delete'):
25
+ backend.delete(key)
26
+ logger.info(LogFormatter.success(f"Cache invalidated for key: {key}"))
27
+ else:
28
+ logger.warning(LogFormatter.warning("Cache backend does not support deletion"))
29
+ except Exception as e:
30
+ logger.error(LogFormatter.error(f"Failed to invalidate cache key: {key}", e))
31
+
32
+ def build_cache_key(namespace: str, *args) -> str:
33
+ """Build a consistent cache key"""
34
+ key = f"fastapi-cache:{namespace}:{':'.join(str(arg) for arg in args)}"
35
+ logger.debug(LogFormatter.info(f"Built cache key: {key}"))
36
+ return key
37
+
38
+ def cached(expire: int = CACHE_TTL, key_builder=None):
39
+ """Decorator for caching endpoint responses
40
+
41
+ Args:
42
+ expire (int): Cache TTL in seconds
43
+ key_builder (callable, optional): Custom key builder function
44
+ """
45
+ return cache(
46
+ expire=expire,
47
+ key_builder=key_builder
48
+ )
backend/app/main.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from app.config.logging_config import setup_logging
3
+ import logging
4
+
5
+ # Initialize logging configuration
6
+ setup_logging()
7
+ logger = logging.getLogger(__name__)
8
+
9
+ app = FastAPI(title="Open LLM Leaderboard API")
10
+
11
+ @app.on_event("startup")
12
+ async def startup_event():
13
+ logger.info("Starting up the application...")
14
+
15
+ # Import and include routers after app initialization
16
+ from app.api import models, votes
17
+ app.include_router(models.router, prefix="/api", tags=["models"])
18
+ app.include_router(votes.router, prefix="/api", tags=["votes"])
backend/app/services/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from . import hf_service, leaderboard, votes, models
2
+
3
+ __all__ = ["hf_service", "leaderboard", "votes", "models"]
backend/app/services/hf_service.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+ from huggingface_hub import HfApi
3
+ from app.config import HF_TOKEN, API
4
+ from app.core.cache import cache_config
5
+ from app.utils.logging import LogFormatter
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class HuggingFaceService:
11
+ def __init__(self):
12
+ self.api = API
13
+ self.token = HF_TOKEN
14
+ self.cache_dir = cache_config.models_cache
15
+
16
+ async def check_authentication(self) -> bool:
17
+ """Check if the HF token is valid"""
18
+ if not self.token:
19
+ return False
20
+ try:
21
+ logger.info(LogFormatter.info("Checking HF token validity..."))
22
+ self.api.get_token_permission()
23
+ logger.info(LogFormatter.success("HF token is valid"))
24
+ return True
25
+ except Exception as e:
26
+ logger.error(LogFormatter.error("HF token validation failed", e))
27
+ return False
28
+
29
+ async def get_user_info(self) -> Optional[dict]:
30
+ """Get information about the authenticated user"""
31
+ try:
32
+ logger.info(LogFormatter.info("Fetching user information..."))
33
+ info = self.api.get_token_permission()
34
+ logger.info(LogFormatter.success(f"User info retrieved for: {info.get('user', 'Unknown')}"))
35
+ return info
36
+ except Exception as e:
37
+ logger.error(LogFormatter.error("Failed to get user info", e))
38
+ return None
39
+
40
+ def _log_repo_operation(self, operation: str, repo: str, details: str = None):
41
+ """Helper to log repository operations"""
42
+ logger.info(LogFormatter.section(f"HF REPOSITORY OPERATION - {operation.upper()}"))
43
+ stats = {
44
+ "Operation": operation,
45
+ "Repository": repo,
46
+ }
47
+ if details:
48
+ stats["Details"] = details
49
+ for line in LogFormatter.tree(stats):
50
+ logger.info(line)
backend/app/services/leaderboard.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.core.cache import cache_config
2
+ from datetime import datetime
3
+ from typing import List, Dict, Any
4
+ import datasets
5
+ from fastapi import HTTPException
6
+ import logging
7
+ from app.config.base import HF_ORGANIZATION
8
+ from app.utils.logging import LogFormatter
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class LeaderboardService:
13
+ def __init__(self):
14
+ pass
15
+
16
+ async def fetch_raw_data(self) -> List[Dict[str, Any]]:
17
+ """Fetch raw leaderboard data from HuggingFace dataset"""
18
+ try:
19
+ logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
20
+ logger.info(LogFormatter.info(f"Loading dataset from {HF_ORGANIZATION}/contents"))
21
+
22
+ dataset = datasets.load_dataset(
23
+ f"{HF_ORGANIZATION}/contents",
24
+ cache_dir=cache_config.get_cache_path("datasets")
25
+ )["train"]
26
+
27
+ df = dataset.to_pandas()
28
+ data = df.to_dict('records')
29
+
30
+ stats = {
31
+ "Total_Entries": len(data),
32
+ "Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB"
33
+ }
34
+ for line in LogFormatter.stats(stats, "Dataset Statistics"):
35
+ logger.info(line)
36
+
37
+ return data
38
+
39
+ except Exception as e:
40
+ logger.error(LogFormatter.error("Failed to fetch leaderboard data", e))
41
+ raise HTTPException(status_code=500, detail=str(e))
42
+
43
+ async def get_formatted_data(self) -> List[Dict[str, Any]]:
44
+ """Get formatted leaderboard data"""
45
+ try:
46
+ logger.info(LogFormatter.section("FORMATTING LEADERBOARD DATA"))
47
+
48
+ raw_data = await self.fetch_raw_data()
49
+ formatted_data = []
50
+ type_counts = {}
51
+ error_count = 0
52
+
53
+ # Initialize progress tracking
54
+ total_items = len(raw_data)
55
+ logger.info(LogFormatter.info(f"Processing {total_items:,} entries..."))
56
+
57
+ for i, item in enumerate(raw_data, 1):
58
+ try:
59
+ formatted_item = await self.transform_data(item)
60
+ formatted_data.append(formatted_item)
61
+
62
+ # Count model types
63
+ model_type = formatted_item["model"]["type"]
64
+ type_counts[model_type] = type_counts.get(model_type, 0) + 1
65
+
66
+ except Exception as e:
67
+ error_count += 1
68
+ logger.error(LogFormatter.error(f"Failed to format entry {i}/{total_items}", e))
69
+ continue
70
+
71
+ # Log progress every 10%
72
+ if i % max(1, total_items // 10) == 0:
73
+ progress = (i / total_items) * 100
74
+ logger.info(LogFormatter.info(f"Progress: {LogFormatter.progress_bar(i, total_items)}"))
75
+
76
+ # Log final statistics
77
+ stats = {
78
+ "Total_Processed": total_items,
79
+ "Successful": len(formatted_data),
80
+ "Failed": error_count
81
+ }
82
+ logger.info(LogFormatter.section("PROCESSING SUMMARY"))
83
+ for line in LogFormatter.stats(stats, "Processing Statistics"):
84
+ logger.info(line)
85
+
86
+ # Log model type distribution
87
+ type_stats = {f"Type_{k}": v for k, v in type_counts.items()}
88
+ logger.info(LogFormatter.subsection("MODEL TYPE DISTRIBUTION"))
89
+ for line in LogFormatter.stats(type_stats):
90
+ logger.info(line)
91
+
92
+ return formatted_data
93
+
94
+ except Exception as e:
95
+ logger.error(LogFormatter.error("Failed to format leaderboard data", e))
96
+ raise HTTPException(status_code=500, detail=str(e))
97
+
98
+ async def transform_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
99
+ """Transform raw data into the format expected by the frontend"""
100
+ try:
101
+ # Extract model name for logging
102
+ model_name = data.get("fullname", "Unknown")
103
+ logger.debug(LogFormatter.info(f"Transforming data for model: {model_name}"))
104
+
105
+ evaluations = {
106
+ "ifeval": {
107
+ "name": "IFEval",
108
+ "value": data.get("IFEval Raw", 0),
109
+ "normalized_score": data.get("IFEval", 0)
110
+ },
111
+ "bbh": {
112
+ "name": "BBH",
113
+ "value": data.get("BBH Raw", 0),
114
+ "normalized_score": data.get("BBH", 0)
115
+ },
116
+ "math": {
117
+ "name": "MATH Level 5",
118
+ "value": data.get("MATH Lvl 5 Raw", 0),
119
+ "normalized_score": data.get("MATH Lvl 5", 0)
120
+ },
121
+ "gpqa": {
122
+ "name": "GPQA",
123
+ "value": data.get("GPQA Raw", 0),
124
+ "normalized_score": data.get("GPQA", 0)
125
+ },
126
+ "musr": {
127
+ "name": "MUSR",
128
+ "value": data.get("MUSR Raw", 0),
129
+ "normalized_score": data.get("MUSR", 0)
130
+ },
131
+ "mmlu_pro": {
132
+ "name": "MMLU-PRO",
133
+ "value": data.get("MMLU-PRO Raw", 0),
134
+ "normalized_score": data.get("MMLU-PRO", 0)
135
+ }
136
+ }
137
+
138
+ features = {
139
+ "is_not_available_on_hub": not data.get("Available on the hub", False),
140
+ "is_merged": not data.get("Not_Merged", False),
141
+ "is_moe": not data.get("MoE", False),
142
+ "is_flagged": data.get("Flagged", False),
143
+ "is_highlighted_by_maintainer": data.get("Official Providers", False)
144
+ }
145
+
146
+ metadata = {
147
+ "upload_date": data.get("Upload To Hub Date"),
148
+ "submission_date": data.get("Submission Date"),
149
+ "generation": data.get("Generation"),
150
+ "base_model": data.get("Base Model"),
151
+ "hub_license": data.get("Hub License"),
152
+ "hub_hearts": data.get("Hub ❤️"),
153
+ "params_billions": data.get("#Params (B)"),
154
+ "co2_cost": data.get("CO₂ cost (kg)", 0)
155
+ }
156
+
157
+ # Clean model type by removing emojis if present
158
+ original_type = data.get("Type", "")
159
+ model_type = original_type.lower().strip()
160
+
161
+ # Remove emojis and parentheses
162
+ if "(" in model_type:
163
+ model_type = model_type.split("(")[0].strip()
164
+ model_type = ''.join(c for c in model_type if not c in '🔶🟢🟩💬🤝🌸 ')
165
+
166
+ # Map old model types to new ones
167
+ model_type_mapping = {
168
+ "fine-tuned": "fined-tuned-on-domain-specific-dataset",
169
+ "fine tuned": "fined-tuned-on-domain-specific-dataset",
170
+ "finetuned": "fined-tuned-on-domain-specific-dataset",
171
+ "fine_tuned": "fined-tuned-on-domain-specific-dataset",
172
+ "ft": "fined-tuned-on-domain-specific-dataset",
173
+ "finetuning": "fined-tuned-on-domain-specific-dataset",
174
+ "fine tuning": "fined-tuned-on-domain-specific-dataset",
175
+ "fine-tuning": "fined-tuned-on-domain-specific-dataset"
176
+ }
177
+
178
+ mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
179
+
180
+ if mapped_type != model_type:
181
+ logger.debug(LogFormatter.info(f"Model type mapped: {original_type} -> {mapped_type}"))
182
+
183
+ transformed_data = {
184
+ "id": data.get("eval_name"),
185
+ "model": {
186
+ "name": data.get("fullname"),
187
+ "sha": data.get("Model sha"),
188
+ "precision": data.get("Precision"),
189
+ "type": mapped_type,
190
+ "weight_type": data.get("Weight type"),
191
+ "architecture": data.get("Architecture"),
192
+ "average_score": data.get("Average ⬆️"),
193
+ "has_chat_template": data.get("Chat Template", False)
194
+ },
195
+ "evaluations": evaluations,
196
+ "features": features,
197
+ "metadata": metadata
198
+ }
199
+
200
+ logger.debug(LogFormatter.success(f"Successfully transformed data for {model_name}"))
201
+ return transformed_data
202
+
203
+ except Exception as e:
204
+ logger.error(LogFormatter.error(f"Failed to transform data for {data.get('fullname', 'Unknown')}", e))
205
+ raise
backend/app/services/models.py ADDED
@@ -0,0 +1,559 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timezone
2
+ from typing import Dict, Any, Optional, List
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ import logging
7
+ import aiohttp
8
+ import asyncio
9
+ import time
10
+ from huggingface_hub import HfApi, CommitOperationAdd
11
+ from huggingface_hub.utils import build_hf_headers
12
+ import datasets
13
+ from datasets import load_dataset, disable_progress_bar
14
+ import sys
15
+ import contextlib
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ import tempfile
18
+
19
+ from app.config import (
20
+ QUEUE_REPO,
21
+ HF_TOKEN,
22
+ EVAL_REQUESTS_PATH
23
+ )
24
+ from app.config.hf_config import HF_ORGANIZATION
25
+ from app.services.hf_service import HuggingFaceService
26
+ from app.utils.model_validation import ModelValidator
27
+ from app.services.votes import VoteService
28
+ from app.core.cache import cache_config
29
+ from app.utils.logging import LogFormatter
30
+
31
+ # Disable datasets progress bars globally
32
+ disable_progress_bar()
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ # Context manager to temporarily disable stdout and stderr
37
+ @contextlib.contextmanager
38
+ def suppress_output():
39
+ stdout = sys.stdout
40
+ stderr = sys.stderr
41
+ devnull = open(os.devnull, 'w')
42
+ try:
43
+ sys.stdout = devnull
44
+ sys.stderr = devnull
45
+ yield
46
+ finally:
47
+ sys.stdout = stdout
48
+ sys.stderr = stderr
49
+ devnull.close()
50
+
51
+ class ProgressTracker:
52
+ def __init__(self, total: int, desc: str = "Progress", update_frequency: int = 10):
53
+ self.total = total
54
+ self.current = 0
55
+ self.desc = desc
56
+ self.start_time = time.time()
57
+ self.update_frequency = update_frequency # Percentage steps
58
+ self.last_update = -1
59
+
60
+ # Initial log with fancy formatting
61
+ logger.info(LogFormatter.section(desc))
62
+ logger.info(LogFormatter.info(f"Starting processing of {total:,} items..."))
63
+ sys.stdout.flush()
64
+
65
+ def update(self, n: int = 1):
66
+ self.current += n
67
+ current_percentage = (self.current * 100) // self.total
68
+
69
+ # Only update on frequency steps (e.g., 0%, 10%, 20%, etc.)
70
+ if current_percentage >= self.last_update + self.update_frequency or current_percentage == 100:
71
+ elapsed = time.time() - self.start_time
72
+ rate = self.current / elapsed if elapsed > 0 else 0
73
+ remaining = (self.total - self.current) / rate if rate > 0 else 0
74
+
75
+ # Create progress stats
76
+ stats = {
77
+ "Progress": LogFormatter.progress_bar(self.current, self.total),
78
+ "Items": f"{self.current:,}/{self.total:,}",
79
+ "Time": f"⏱️ {elapsed:.1f}s elapsed, {remaining:.1f}s remaining",
80
+ "Rate": f"🚀 {rate:.1f} items/s"
81
+ }
82
+
83
+ # Log progress using tree format
84
+ for line in LogFormatter.tree(stats):
85
+ logger.info(line)
86
+ sys.stdout.flush()
87
+
88
+ self.last_update = (current_percentage // self.update_frequency) * self.update_frequency
89
+
90
+ def close(self):
91
+ elapsed = time.time() - self.start_time
92
+ rate = self.total / elapsed if elapsed > 0 else 0
93
+
94
+ # Final summary with fancy formatting
95
+ logger.info(LogFormatter.section("COMPLETED"))
96
+ stats = {
97
+ "Total": f"{self.total:,} items",
98
+ "Time": f"{elapsed:.1f}s",
99
+ "Rate": f"{rate:.1f} items/s"
100
+ }
101
+ for line in LogFormatter.stats(stats):
102
+ logger.info(line)
103
+ logger.info("="*50)
104
+ sys.stdout.flush()
105
+
106
+ class ModelService(HuggingFaceService):
107
+ _instance: Optional['ModelService'] = None
108
+ _initialized = False
109
+
110
+ def __new__(cls):
111
+ if cls._instance is None:
112
+ logger.info(LogFormatter.info("Creating new ModelService instance"))
113
+ cls._instance = super(ModelService, cls).__new__(cls)
114
+ return cls._instance
115
+
116
+ def __init__(self):
117
+ if not hasattr(self, '_init_done'):
118
+ logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
119
+ super().__init__()
120
+ self.validator = ModelValidator()
121
+ self.vote_service = VoteService()
122
+ self.eval_requests_path = cache_config.eval_requests_file
123
+ logger.info(LogFormatter.info(f"Using eval requests path: {self.eval_requests_path}"))
124
+
125
+ self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
126
+ self.hf_api = HfApi(token=HF_TOKEN)
127
+ self.cached_models = None
128
+ self.last_cache_update = 0
129
+ self.cache_ttl = cache_config.cache_ttl.total_seconds()
130
+ self._init_done = True
131
+ logger.info(LogFormatter.success("Initialization complete"))
132
+
133
+ async def _download_and_process_file(self, file: str, session: aiohttp.ClientSession, progress: ProgressTracker) -> Optional[Dict]:
134
+ """Download and process a file asynchronously"""
135
+ try:
136
+ # Build file URL
137
+ url = f"https://huggingface.co/datasets/{QUEUE_REPO}/resolve/main/{file}"
138
+ headers = build_hf_headers(token=self.token)
139
+
140
+ # Download file
141
+ async with session.get(url, headers=headers) as response:
142
+ if response.status != 200:
143
+ logger.error(LogFormatter.error(f"Failed to download {file}", f"HTTP {response.status}"))
144
+ progress.update()
145
+ return None
146
+
147
+ try:
148
+ # First read content as text
149
+ text_content = await response.text()
150
+ # Then parse JSON
151
+ content = json.loads(text_content)
152
+ except json.JSONDecodeError as e:
153
+ logger.error(LogFormatter.error(f"Failed to decode JSON from {file}", e))
154
+ progress.update()
155
+ return None
156
+
157
+ # Get status and determine target status
158
+ status = content.get("status", "PENDING").upper()
159
+ target_status = None
160
+ status_map = {
161
+ "PENDING": ["PENDING", "RERUN"],
162
+ "EVALUATING": ["RUNNING"],
163
+ "FINISHED": ["FINISHED", "PENDING_NEW_EVAL"]
164
+ }
165
+
166
+ for target, source_statuses in status_map.items():
167
+ if status in source_statuses:
168
+ target_status = target
169
+ break
170
+
171
+ if not target_status:
172
+ progress.update()
173
+ return None
174
+
175
+ # Calculate wait time
176
+ try:
177
+ submit_time = datetime.fromisoformat(content["submitted_time"].replace("Z", "+00:00"))
178
+ if submit_time.tzinfo is None:
179
+ submit_time = submit_time.replace(tzinfo=timezone.utc)
180
+ current_time = datetime.now(timezone.utc)
181
+ wait_time = current_time - submit_time
182
+
183
+ model_info = {
184
+ "name": content["model"],
185
+ "submitter": content.get("sender", "Unknown"),
186
+ "revision": content["revision"],
187
+ "wait_time": f"{wait_time.total_seconds():.1f}s",
188
+ "submission_time": content["submitted_time"],
189
+ "status": target_status,
190
+ "precision": content.get("precision", "Unknown")
191
+ }
192
+
193
+ progress.update()
194
+ return model_info
195
+
196
+ except (ValueError, TypeError) as e:
197
+ logger.error(LogFormatter.error(f"Failed to process {file}", e))
198
+ progress.update()
199
+ return None
200
+
201
+ except Exception as e:
202
+ logger.error(LogFormatter.error(f"Failed to load {file}", e))
203
+ progress.update()
204
+ return None
205
+
206
+ async def _refresh_models_cache(self):
207
+ """Refresh the models cache"""
208
+ try:
209
+ logger.info(LogFormatter.section("CACHE REFRESH"))
210
+ self._log_repo_operation("read", f"{HF_ORGANIZATION}/requests", "Refreshing models cache")
211
+
212
+ # Initialize models dictionary
213
+ models = {
214
+ "finished": [],
215
+ "evaluating": [],
216
+ "pending": []
217
+ }
218
+
219
+ try:
220
+ logger.info(LogFormatter.subsection("DATASET LOADING"))
221
+ logger.info(LogFormatter.info("Loading dataset files..."))
222
+
223
+ # List files in repository
224
+ with suppress_output():
225
+ files = self.hf_api.list_repo_files(
226
+ repo_id=QUEUE_REPO,
227
+ repo_type="dataset",
228
+ token=self.token
229
+ )
230
+
231
+ # Filter JSON files
232
+ json_files = [f for f in files if f.endswith('.json')]
233
+ total_files = len(json_files)
234
+
235
+ # Log repository stats
236
+ stats = {
237
+ "Total_Files": len(files),
238
+ "JSON_Files": total_files,
239
+ }
240
+ for line in LogFormatter.stats(stats, "Repository Statistics"):
241
+ logger.info(line)
242
+
243
+ if not json_files:
244
+ raise Exception("No JSON files found in repository")
245
+
246
+ # Initialize progress tracker
247
+ progress = ProgressTracker(total_files, "PROCESSING FILES")
248
+
249
+ try:
250
+ # Create aiohttp session to reuse connections
251
+ async with aiohttp.ClientSession() as session:
252
+ # Process files in chunks
253
+ chunk_size = 50
254
+
255
+ for i in range(0, len(json_files), chunk_size):
256
+ chunk = json_files[i:i + chunk_size]
257
+ chunk_tasks = [
258
+ self._download_and_process_file(file, session, progress)
259
+ for file in chunk
260
+ ]
261
+ results = await asyncio.gather(*chunk_tasks)
262
+
263
+ # Process results
264
+ for result in results:
265
+ if result:
266
+ status = result.pop("status")
267
+ models[status.lower()].append(result)
268
+
269
+ finally:
270
+ progress.close()
271
+
272
+ # Final summary with fancy formatting
273
+ logger.info(LogFormatter.section("CACHE SUMMARY"))
274
+ stats = {
275
+ "Finished": len(models["finished"]),
276
+ "Evaluating": len(models["evaluating"]),
277
+ "Pending": len(models["pending"])
278
+ }
279
+ for line in LogFormatter.stats(stats, "Models by Status"):
280
+ logger.info(line)
281
+ logger.info("="*50)
282
+
283
+ except Exception as e:
284
+ logger.error(LogFormatter.error("Error processing files", e))
285
+ raise
286
+
287
+ # Update cache
288
+ self.cached_models = models
289
+ self.last_cache_update = time.time()
290
+ logger.info(LogFormatter.success("Cache updated successfully"))
291
+
292
+ return models
293
+
294
+ except Exception as e:
295
+ logger.error(LogFormatter.error("Cache refresh failed", e))
296
+ raise
297
+
298
+ async def initialize(self):
299
+ """Initialize the model service"""
300
+ if self._initialized:
301
+ logger.info(LogFormatter.info("Service already initialized, using cached data"))
302
+ return
303
+
304
+ try:
305
+ logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
306
+
307
+ # Check if cache already exists
308
+ cache_path = cache_config.get_cache_path("datasets")
309
+ if not cache_path.exists() or not any(cache_path.iterdir()):
310
+ logger.info(LogFormatter.info("No existing cache found, initializing datasets cache..."))
311
+ cache_config.flush_cache("datasets")
312
+ else:
313
+ logger.info(LogFormatter.info("Using existing datasets cache"))
314
+
315
+ # Ensure eval requests directory exists
316
+ self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
317
+ logger.info(LogFormatter.info(f"Eval requests directory: {self.eval_requests_path}"))
318
+
319
+ # List existing files
320
+ if self.eval_requests_path.exists():
321
+ files = list(self.eval_requests_path.glob("**/*.json"))
322
+ stats = {
323
+ "Total_Files": len(files),
324
+ "Directory": str(self.eval_requests_path)
325
+ }
326
+ for line in LogFormatter.stats(stats, "Eval Requests"):
327
+ logger.info(line)
328
+
329
+ # Load initial cache
330
+ await self._refresh_models_cache()
331
+
332
+ self._initialized = True
333
+ logger.info(LogFormatter.success("Model service initialization complete"))
334
+
335
+ except Exception as e:
336
+ logger.error(LogFormatter.error("Initialization failed", e))
337
+ raise
338
+
339
+ async def get_models(self) -> Dict[str, List[Dict[str, Any]]]:
340
+ """Get all models with their status"""
341
+ if not self._initialized:
342
+ logger.info(LogFormatter.info("Service not initialized, initializing now..."))
343
+ await self.initialize()
344
+
345
+ current_time = time.time()
346
+ cache_age = current_time - self.last_cache_update
347
+
348
+ # Check if cache needs refresh
349
+ if not self.cached_models:
350
+ logger.info(LogFormatter.info("No cached data available, refreshing cache..."))
351
+ return await self._refresh_models_cache()
352
+ elif cache_age > self.cache_ttl:
353
+ logger.info(LogFormatter.info(f"Cache expired ({cache_age:.1f}s old, TTL: {self.cache_ttl}s)"))
354
+ return await self._refresh_models_cache()
355
+ else:
356
+ logger.info(LogFormatter.info(f"Using cached data ({cache_age:.1f}s old)"))
357
+ return self.cached_models
358
+
359
+ async def submit_model(
360
+ self,
361
+ model_data: Dict[str, Any],
362
+ user_id: str
363
+ ) -> Dict[str, Any]:
364
+ logger.info(LogFormatter.section("MODEL SUBMISSION"))
365
+ self._log_repo_operation("write", f"{HF_ORGANIZATION}/requests", f"Submitting model {model_data['model_id']} by {user_id}")
366
+ stats = {
367
+ "Model": model_data["model_id"],
368
+ "User": user_id,
369
+ "Revision": model_data["revision"],
370
+ "Precision": model_data["precision"],
371
+ "Type": model_data["model_type"]
372
+ }
373
+ for line in LogFormatter.tree(stats, "Submission Details"):
374
+ logger.info(line)
375
+
376
+ # Validate required fields
377
+ required_fields = [
378
+ "model_id", "base_model", "revision", "precision",
379
+ "weight_type", "model_type", "use_chat_template"
380
+ ]
381
+ for field in required_fields:
382
+ if field not in model_data:
383
+ raise ValueError(f"Missing required field: {field}")
384
+
385
+ # Check if model already exists in the system
386
+ try:
387
+ logger.info(LogFormatter.subsection("CHECKING EXISTING SUBMISSIONS"))
388
+ existing_models = await self.get_models()
389
+
390
+ # Check in all statuses (pending, evaluating, finished)
391
+ for status, models in existing_models.items():
392
+ for model in models:
393
+ if model["name"] == model_data["model_id"]:
394
+ error_msg = f"Model {model_data['model_id']} is already in the system with status: {status}"
395
+ logger.error(LogFormatter.error("Submission rejected", error_msg))
396
+ raise ValueError(error_msg)
397
+
398
+ logger.info(LogFormatter.success("No existing submission found"))
399
+ except ValueError:
400
+ raise
401
+ except Exception as e:
402
+ logger.error(LogFormatter.error("Failed to check existing submissions", e))
403
+ raise
404
+
405
+ # Get model info and validate it exists on HuggingFace
406
+ try:
407
+ logger.info(LogFormatter.subsection("MODEL VALIDATION"))
408
+
409
+ # Get the model info to check if it exists
410
+ model_info = self.hf_api.model_info(
411
+ model_data["model_id"],
412
+ revision=model_data["revision"],
413
+ token=self.token
414
+ )
415
+ if not model_info:
416
+ raise Exception(f"Model {model_data['model_id']} not found on HuggingFace Hub")
417
+
418
+ logger.info(LogFormatter.success("Model exists on HuggingFace Hub"))
419
+
420
+ except Exception as e:
421
+ logger.error(LogFormatter.error("Model validation failed", e))
422
+ raise
423
+
424
+ # Validate model card
425
+ valid, error, model_card = await self.validator.check_model_card(
426
+ model_data["model_id"]
427
+ )
428
+ if not valid:
429
+ logger.error(LogFormatter.error("Model card validation failed", error))
430
+ raise Exception(error)
431
+ logger.info(LogFormatter.success("Model card validation passed"))
432
+
433
+ # Check size limits
434
+ model_size, error = await self.validator.get_model_size(
435
+ model_info,
436
+ model_data["precision"],
437
+ model_data["base_model"]
438
+ )
439
+ if model_size is None:
440
+ logger.error(LogFormatter.error("Model size validation failed", error))
441
+ raise Exception(error)
442
+ logger.info(LogFormatter.success(f"Model size validation passed: {model_size:.1f}GB"))
443
+
444
+ # Size limits based on precision
445
+ if model_data["precision"] in ["float16", "bfloat16"] and model_size > 100:
446
+ error_msg = f"Model too large for {model_data['precision']} (limit: 100GB)"
447
+ logger.error(LogFormatter.error("Size limit exceeded", error_msg))
448
+ raise Exception(error_msg)
449
+
450
+ # Chat template validation if requested
451
+ if model_data["use_chat_template"]:
452
+ valid, error = await self.validator.check_chat_template(
453
+ model_data["model_id"],
454
+ model_data["revision"]
455
+ )
456
+ if not valid:
457
+ logger.error(LogFormatter.error("Chat template validation failed", error))
458
+ raise Exception(error)
459
+ logger.info(LogFormatter.success("Chat template validation passed"))
460
+
461
+ # Create eval entry
462
+ eval_entry = {
463
+ "model": model_data["model_id"],
464
+ "base_model": model_data["base_model"],
465
+ "revision": model_info.sha,
466
+ "precision": model_data["precision"],
467
+ "params": model_size,
468
+ "architectures": model_info.pipeline_tag if hasattr(model_info, 'pipeline_tag') else None,
469
+ "weight_type": model_data["weight_type"],
470
+ "status": "PENDING",
471
+ "submitted_time": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
472
+ "model_type": model_data["model_type"],
473
+ "job_id": -1,
474
+ "job_start_time": None,
475
+ "use_chat_template": model_data["use_chat_template"],
476
+ "sender": user_id
477
+ }
478
+
479
+ logger.info(LogFormatter.subsection("EVALUATION ENTRY"))
480
+ for line in LogFormatter.tree(eval_entry):
481
+ logger.info(line)
482
+
483
+ # Upload to HF dataset
484
+ try:
485
+ logger.info(LogFormatter.subsection("UPLOADING TO HUGGINGFACE"))
486
+ logger.info(LogFormatter.info(f"Uploading to {HF_ORGANIZATION}/requests..."))
487
+
488
+ # Construct the path in the dataset
489
+ org_or_user = model_data["model_id"].split("/")[0] if "/" in model_data["model_id"] else ""
490
+ model_path = model_data["model_id"].split("/")[-1]
491
+ relative_path = f"{org_or_user}/{model_path}_eval_request_False_{model_data['precision']}_{model_data['weight_type']}.json"
492
+
493
+ # Create a temporary file with the request
494
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
495
+ json.dump(eval_entry, temp_file, indent=2)
496
+ temp_file.flush()
497
+ temp_path = temp_file.name
498
+
499
+ # Upload file directly
500
+ self.hf_api.upload_file(
501
+ path_or_fileobj=temp_path,
502
+ path_in_repo=relative_path,
503
+ repo_id=f"{HF_ORGANIZATION}/requests",
504
+ repo_type="dataset",
505
+ commit_message=f"Add {model_data['model_id']} to eval queue",
506
+ token=self.token
507
+ )
508
+
509
+ # Clean up temp file
510
+ os.unlink(temp_path)
511
+
512
+ logger.info(LogFormatter.success("Upload successful"))
513
+
514
+ except Exception as e:
515
+ logger.error(LogFormatter.error("Upload failed", e))
516
+ raise
517
+
518
+ # Add automatic vote
519
+ try:
520
+ logger.info(LogFormatter.subsection("AUTOMATIC VOTE"))
521
+ logger.info(LogFormatter.info(f"Adding upvote for {model_data['model_id']} by {user_id}"))
522
+ await self.vote_service.add_vote(
523
+ model_data["model_id"],
524
+ user_id,
525
+ "up"
526
+ )
527
+ logger.info(LogFormatter.success("Vote recorded successfully"))
528
+ except Exception as e:
529
+ logger.error(LogFormatter.error("Failed to record vote", e))
530
+ # Don't raise here as the main submission was successful
531
+
532
+ return {
533
+ "status": "success",
534
+ "message": "Model submitted successfully and vote recorded"
535
+ }
536
+
537
+ async def get_model_status(self, model_id: str) -> Dict[str, Any]:
538
+ """Get evaluation status of a model"""
539
+ logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
540
+ eval_path = self.eval_requests_path
541
+
542
+ for user_folder in eval_path.iterdir():
543
+ if user_folder.is_dir():
544
+ for file in user_folder.glob("*.json"):
545
+ with open(file, "r") as f:
546
+ data = json.load(f)
547
+ if data["model"] == model_id:
548
+ status = {
549
+ "status": data["status"],
550
+ "submitted_time": data["submitted_time"],
551
+ "job_id": data.get("job_id", -1)
552
+ }
553
+ logger.info(LogFormatter.success("Status found"))
554
+ for line in LogFormatter.tree(status, "Model Status"):
555
+ logger.info(line)
556
+ return status
557
+
558
+ logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
559
+ return {"status": "not_found"}
backend/app/services/rate_limiter.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ import logging
3
+ from datetime import datetime, timedelta, timezone
4
+ from typing import Tuple, Dict, List
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class RateLimiter:
9
+ def __init__(self, period_days: int = 7, quota: int = 5):
10
+ self.period_days = period_days
11
+ self.quota = quota
12
+ self.submission_history: Dict[str, List[datetime]] = {}
13
+ self.higher_quota_users = set() # Users with higher quotas
14
+ self.unlimited_users = set() # Users with no quota limits
15
+
16
+ def add_unlimited_user(self, user_id: str):
17
+ """Add a user to the unlimited users list"""
18
+ self.unlimited_users.add(user_id)
19
+
20
+ def add_higher_quota_user(self, user_id: str):
21
+ """Add a user to the higher quota users list"""
22
+ self.higher_quota_users.add(user_id)
23
+
24
+ def record_submission(self, user_id: str):
25
+ """Record a new submission for a user"""
26
+ current_time = datetime.now(timezone.utc)
27
+ if user_id not in self.submission_history:
28
+ self.submission_history[user_id] = []
29
+ self.submission_history[user_id].append(current_time)
30
+
31
+ def clean_old_submissions(self, user_id: str):
32
+ """Remove submissions older than the period"""
33
+ if user_id not in self.submission_history:
34
+ return
35
+
36
+ current_time = datetime.now(timezone.utc)
37
+ cutoff_time = current_time - timedelta(days=self.period_days)
38
+
39
+ self.submission_history[user_id] = [
40
+ time for time in self.submission_history[user_id]
41
+ if time > cutoff_time
42
+ ]
43
+
44
+ async def check_rate_limit(self, user_id: str) -> Tuple[bool, str]:
45
+ """Check if a user has exceeded their rate limit
46
+
47
+ Returns:
48
+ Tuple[bool, str]: (is_allowed, error_message)
49
+ """
50
+ # Unlimited users bypass all checks
51
+ if user_id in self.unlimited_users:
52
+ return True, ""
53
+
54
+ # Clean old submissions
55
+ self.clean_old_submissions(user_id)
56
+
57
+ # Get current submission count
58
+ submission_count = len(self.submission_history.get(user_id, []))
59
+
60
+ # Calculate user's quota
61
+ user_quota = self.quota * 2 if user_id in self.higher_quota_users else self.quota
62
+
63
+ # Check if user has exceeded their quota
64
+ if submission_count >= user_quota:
65
+ error_msg = (
66
+ f"User '{user_id}' has reached the limit of {user_quota} submissions "
67
+ f"in the last {self.period_days} days. Please wait before submitting again."
68
+ )
69
+ return False, error_msg
70
+
71
+ return True, ""
72
+ """
backend/app/services/votes.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timezone
2
+ from typing import Dict, Any, List, Set, Tuple, Optional
3
+ import json
4
+ import logging
5
+ import asyncio
6
+ from pathlib import Path
7
+ import os
8
+ import aiohttp
9
+ from huggingface_hub import HfApi
10
+ import datasets
11
+
12
+ from app.services.hf_service import HuggingFaceService
13
+ from app.config import HF_TOKEN, API
14
+ from app.config.hf_config import HF_ORGANIZATION
15
+ from app.core.cache import cache_config
16
+ from app.utils.logging import LogFormatter
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ class VoteService(HuggingFaceService):
21
+ _instance: Optional['VoteService'] = None
22
+ _initialized = False
23
+
24
+ def __new__(cls):
25
+ if cls._instance is None:
26
+ cls._instance = super(VoteService, cls).__new__(cls)
27
+ return cls._instance
28
+
29
+ def __init__(self):
30
+ if not hasattr(self, '_init_done'):
31
+ super().__init__()
32
+ self.votes_file = cache_config.votes_file
33
+ self.votes_to_upload: List[Dict[str, Any]] = []
34
+ self.vote_check_set: Set[Tuple[str, str, str]] = set()
35
+ self._votes_by_model: Dict[str, List[Dict[str, Any]]] = {}
36
+ self._votes_by_user: Dict[str, List[Dict[str, Any]]] = {}
37
+ self._upload_lock = asyncio.Lock()
38
+ self._last_sync = None
39
+ self._sync_interval = 300 # 5 minutes
40
+ self._total_votes = 0
41
+ self._last_vote_timestamp = None
42
+ self._max_retries = 3
43
+ self._retry_delay = 1 # seconds
44
+ self._upload_batch_size = 10
45
+ self.hf_api = HfApi(token=HF_TOKEN)
46
+ self._init_done = True
47
+
48
+ async def initialize(self):
49
+ """Initialize the vote service"""
50
+ if self._initialized:
51
+ await self._check_for_new_votes()
52
+ return
53
+
54
+ try:
55
+ logger.info(LogFormatter.section("VOTE SERVICE INITIALIZATION"))
56
+
57
+ # Ensure votes directory exists
58
+ self.votes_file.parent.mkdir(parents=True, exist_ok=True)
59
+
60
+ # Load existing votes if file exists
61
+ local_vote_count = 0
62
+ if self.votes_file.exists():
63
+ logger.info(LogFormatter.info(f"Loading votes from {self.votes_file}"))
64
+ local_vote_count = await self._count_local_votes()
65
+ logger.info(LogFormatter.info(f"Found {local_vote_count:,} local votes"))
66
+
67
+ # Check remote votes count
68
+ remote_vote_count = await self._count_remote_votes()
69
+ logger.info(LogFormatter.info(f"Found {remote_vote_count:,} remote votes"))
70
+
71
+ if remote_vote_count > local_vote_count:
72
+ logger.info(LogFormatter.info(f"Fetching {remote_vote_count - local_vote_count:,} new votes"))
73
+ await self._sync_with_hub()
74
+ elif remote_vote_count < local_vote_count:
75
+ logger.warning(LogFormatter.warning(f"Local votes ({local_vote_count:,}) > Remote votes ({remote_vote_count:,})"))
76
+ await self._load_existing_votes()
77
+ else:
78
+ logger.info(LogFormatter.success("Local and remote votes are in sync"))
79
+ if local_vote_count > 0:
80
+ await self._load_existing_votes()
81
+ else:
82
+ logger.info(LogFormatter.info("No votes found"))
83
+
84
+ self._initialized = True
85
+ self._last_sync = datetime.now(timezone.utc)
86
+
87
+ # Final summary
88
+ stats = {
89
+ "Total_Votes": self._total_votes,
90
+ "Last_Sync": self._last_sync.strftime("%Y-%m-%d %H:%M:%S UTC")
91
+ }
92
+ logger.info(LogFormatter.section("INITIALIZATION COMPLETE"))
93
+ for line in LogFormatter.stats(stats):
94
+ logger.info(line)
95
+
96
+ except Exception as e:
97
+ logger.error(LogFormatter.error("Initialization failed", e))
98
+ raise
99
+
100
+ async def _count_local_votes(self) -> int:
101
+ """Count votes in local file"""
102
+ if not self.votes_file.exists():
103
+ return 0
104
+
105
+ count = 0
106
+ try:
107
+ with open(self.votes_file, 'r') as f:
108
+ for _ in f:
109
+ count += 1
110
+ return count
111
+ except Exception as e:
112
+ logger.error(f"Error counting local votes: {str(e)}")
113
+ return 0
114
+
115
+ async def _count_remote_votes(self) -> int:
116
+ """Count votes in remote file"""
117
+ url = f"https://huggingface.co/datasets/{HF_ORGANIZATION}/votes/raw/main/votes_data.jsonl"
118
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
119
+
120
+ try:
121
+ async with aiohttp.ClientSession() as session:
122
+ async with session.get(url, headers=headers) as response:
123
+ if response.status == 200:
124
+ count = 0
125
+ async for line in response.content:
126
+ if line.strip(): # Skip empty lines
127
+ count += 1
128
+ return count
129
+ else:
130
+ logger.error(f"Failed to get remote votes: HTTP {response.status}")
131
+ return 0
132
+ except Exception as e:
133
+ logger.error(f"Error counting remote votes: {str(e)}")
134
+ return 0
135
+
136
+ async def _sync_with_hub(self):
137
+ """Sync votes with HuggingFace hub using datasets"""
138
+ try:
139
+ logger.info(LogFormatter.section("VOTE SYNC"))
140
+ self._log_repo_operation("sync", f"{HF_ORGANIZATION}/votes", "Syncing local votes with HF hub")
141
+ logger.info(LogFormatter.info("Syncing with HuggingFace hub..."))
142
+
143
+ # Load votes from HF dataset
144
+ dataset = datasets.load_dataset(
145
+ f"{HF_ORGANIZATION}/votes",
146
+ split="train",
147
+ cache_dir=cache_config.get_cache_path("datasets")
148
+ )
149
+
150
+ remote_votes = len(dataset)
151
+ logger.info(LogFormatter.info(f"Dataset loaded with {remote_votes:,} votes"))
152
+
153
+ # Convert to list of dictionaries
154
+ df = dataset.to_pandas()
155
+ if 'timestamp' in df.columns:
156
+ df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%dT%H:%M:%SZ')
157
+ remote_votes = df.to_dict('records')
158
+
159
+ # If we have more remote votes than local
160
+ if len(remote_votes) > self._total_votes:
161
+ new_votes = len(remote_votes) - self._total_votes
162
+ logger.info(LogFormatter.info(f"Processing {new_votes:,} new votes..."))
163
+
164
+ # Save votes to local file
165
+ with open(self.votes_file, 'w') as f:
166
+ for vote in remote_votes:
167
+ f.write(json.dumps(vote) + '\n')
168
+
169
+ # Reload votes in memory
170
+ await self._load_existing_votes()
171
+ logger.info(LogFormatter.success("Sync completed successfully"))
172
+ else:
173
+ logger.info(LogFormatter.success("Local votes are up to date"))
174
+
175
+ self._last_sync = datetime.now(timezone.utc)
176
+
177
+ except Exception as e:
178
+ logger.error(LogFormatter.error("Sync failed", e))
179
+ raise
180
+
181
+ async def _check_for_new_votes(self):
182
+ """Check for new votes on the hub"""
183
+ try:
184
+ self._log_repo_operation("check", f"{HF_ORGANIZATION}/votes", "Checking for new votes")
185
+ # Load only dataset metadata
186
+ dataset_info = datasets.load_dataset(f"{HF_ORGANIZATION}/votes", split="train")
187
+ remote_vote_count = len(dataset_info)
188
+
189
+ if remote_vote_count > self._total_votes:
190
+ logger.info(f"Found {remote_vote_count - self._total_votes} new votes on hub")
191
+ await self._sync_with_hub()
192
+ else:
193
+ logger.info("No new votes found on hub")
194
+
195
+ except Exception as e:
196
+ logger.error(f"Error checking for new votes: {str(e)}")
197
+
198
+ async def _load_existing_votes(self):
199
+ """Load existing votes from file"""
200
+ if not self.votes_file.exists():
201
+ logger.warning(LogFormatter.warning("No votes file found"))
202
+ return
203
+
204
+ try:
205
+ logger.info(LogFormatter.section("LOADING VOTES"))
206
+
207
+ # Clear existing data structures
208
+ self.vote_check_set.clear()
209
+ self._votes_by_model.clear()
210
+ self._votes_by_user.clear()
211
+
212
+ vote_count = 0
213
+ latest_timestamp = None
214
+
215
+ with open(self.votes_file, "r") as f:
216
+ for line in f:
217
+ try:
218
+ vote = json.loads(line.strip())
219
+ vote_count += 1
220
+
221
+ # Track latest timestamp
222
+ try:
223
+ vote_timestamp = datetime.fromisoformat(vote["timestamp"].replace("Z", "+00:00"))
224
+ if not latest_timestamp or vote_timestamp > latest_timestamp:
225
+ latest_timestamp = vote_timestamp
226
+ vote["timestamp"] = vote_timestamp.strftime("%Y-%m-%dT%H:%M:%SZ")
227
+ except (KeyError, ValueError) as e:
228
+ logger.warning(LogFormatter.warning(f"Invalid timestamp in vote: {str(e)}"))
229
+ continue
230
+
231
+ if vote_count % 1000 == 0:
232
+ logger.info(LogFormatter.info(f"Processed {vote_count:,} votes..."))
233
+
234
+ self._add_vote_to_memory(vote)
235
+
236
+ except json.JSONDecodeError as e:
237
+ logger.error(LogFormatter.error("Vote parsing failed", e))
238
+ continue
239
+ except Exception as e:
240
+ logger.error(LogFormatter.error("Vote processing failed", e))
241
+ continue
242
+
243
+ self._total_votes = vote_count
244
+ self._last_vote_timestamp = latest_timestamp
245
+
246
+ # Final summary
247
+ stats = {
248
+ "Total_Votes": vote_count,
249
+ "Latest_Vote": latest_timestamp.strftime("%Y-%m-%d %H:%M:%S UTC") if latest_timestamp else "None",
250
+ "Unique_Models": len(self._votes_by_model),
251
+ "Unique_Users": len(self._votes_by_user)
252
+ }
253
+
254
+ logger.info(LogFormatter.section("VOTE SUMMARY"))
255
+ for line in LogFormatter.stats(stats):
256
+ logger.info(line)
257
+
258
+ except Exception as e:
259
+ logger.error(LogFormatter.error("Failed to load votes", e))
260
+ raise
261
+
262
+ def _add_vote_to_memory(self, vote: Dict[str, Any]):
263
+ """Add vote to memory structures"""
264
+ try:
265
+ check_tuple = (vote["model"], vote["revision"], vote["username"])
266
+
267
+ # Skip if we already have this vote
268
+ if check_tuple in self.vote_check_set:
269
+ return
270
+
271
+ self.vote_check_set.add(check_tuple)
272
+
273
+ # Update model votes
274
+ if vote["model"] not in self._votes_by_model:
275
+ self._votes_by_model[vote["model"]] = []
276
+ self._votes_by_model[vote["model"]].append(vote)
277
+
278
+ # Update user votes
279
+ if vote["username"] not in self._votes_by_user:
280
+ self._votes_by_user[vote["username"]] = []
281
+ self._votes_by_user[vote["username"]].append(vote)
282
+
283
+ except KeyError as e:
284
+ logger.error(f"Malformed vote data, missing key: {str(e)}")
285
+ except Exception as e:
286
+ logger.error(f"Error adding vote to memory: {str(e)}")
287
+
288
+ async def get_user_votes(self, user_id: str) -> List[Dict[str, Any]]:
289
+ """Get all votes from a specific user"""
290
+ logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
291
+ votes = self._votes_by_user.get(user_id, [])
292
+ logger.info(LogFormatter.success(f"Found {len(votes):,} votes"))
293
+ return votes
294
+
295
+ async def get_model_votes(self, model_id: str) -> Dict[str, Any]:
296
+ """Get all votes for a specific model"""
297
+ logger.info(LogFormatter.info(f"Fetching votes for model: {model_id}"))
298
+ votes = self._votes_by_model.get(model_id, [])
299
+
300
+ # Group votes by revision
301
+ votes_by_revision = {}
302
+ for vote in votes:
303
+ revision = vote["revision"]
304
+ if revision not in votes_by_revision:
305
+ votes_by_revision[revision] = 0
306
+ votes_by_revision[revision] += 1
307
+
308
+ stats = {
309
+ "Total_Votes": len(votes),
310
+ **{f"Revision_{k}": v for k, v in votes_by_revision.items()}
311
+ }
312
+
313
+ logger.info(LogFormatter.section("VOTE STATISTICS"))
314
+ for line in LogFormatter.stats(stats):
315
+ logger.info(line)
316
+
317
+ return {
318
+ "total_votes": len(votes),
319
+ "votes_by_revision": votes_by_revision,
320
+ "votes": votes
321
+ }
322
+
323
+ async def _get_model_revision(self, model_id: str) -> str:
324
+ """Get current revision of a model with retries"""
325
+ logger.info(f"Getting revision for model: {model_id}")
326
+ for attempt in range(self._max_retries):
327
+ try:
328
+ model_info = await asyncio.to_thread(self.hf_api.model_info, model_id)
329
+ logger.info(f"Successfully got revision {model_info.sha} for model {model_id}")
330
+ return model_info.sha
331
+ except Exception as e:
332
+ logger.error(f"Error getting model revision for {model_id} (attempt {attempt + 1}): {str(e)}")
333
+ if attempt < self._max_retries - 1:
334
+ retry_delay = self._retry_delay * (attempt + 1)
335
+ logger.info(f"Retrying in {retry_delay} seconds...")
336
+ await asyncio.sleep(retry_delay)
337
+ else:
338
+ logger.warning(f"Using 'main' as fallback revision for {model_id} after {self._max_retries} failed attempts")
339
+ return "main"
340
+
341
+ async def add_vote(self, model_id: str, user_id: str, vote_type: str) -> Dict[str, Any]:
342
+ """Add a vote for a model"""
343
+ try:
344
+ self._log_repo_operation("add", f"{HF_ORGANIZATION}/votes", f"Adding {vote_type} vote for {model_id} by {user_id}")
345
+ logger.info(LogFormatter.section("NEW VOTE"))
346
+ stats = {
347
+ "Model": model_id,
348
+ "User": user_id,
349
+ "Type": vote_type
350
+ }
351
+ for line in LogFormatter.tree(stats, "Vote Details"):
352
+ logger.info(line)
353
+
354
+ revision = await self._get_model_revision(model_id)
355
+ check_tuple = (model_id, revision, user_id)
356
+
357
+ if check_tuple in self.vote_check_set:
358
+ raise ValueError("Vote already recorded for this model")
359
+
360
+ vote = {
361
+ "model": model_id,
362
+ "revision": revision,
363
+ "username": user_id,
364
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
365
+ "vote_type": vote_type
366
+ }
367
+
368
+ # Update local storage
369
+ with open(self.votes_file, "a") as f:
370
+ f.write(json.dumps(vote) + "\n")
371
+
372
+ self._add_vote_to_memory(vote)
373
+ self.votes_to_upload.append(vote)
374
+
375
+ stats = {
376
+ "Status": "Success",
377
+ "Queue_Size": len(self.votes_to_upload)
378
+ }
379
+ for line in LogFormatter.stats(stats):
380
+ logger.info(line)
381
+
382
+ # Try to upload if batch size reached
383
+ if len(self.votes_to_upload) >= self._upload_batch_size:
384
+ logger.info(LogFormatter.info(f"Upload batch size reached ({self._upload_batch_size}), triggering sync"))
385
+ await self._sync_with_hub()
386
+
387
+ return {"status": "success", "message": "Vote added successfully"}
388
+
389
+ except Exception as e:
390
+ logger.error(LogFormatter.error("Failed to add vote", e))
391
+ raise
backend/app/utils/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from . import model_validation
2
+
3
+ __all__ = ["model_validation"]
backend/app/utils/logging.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from typing import Dict, Any, List, Optional
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ class LogFormatter:
8
+ """Utility class for consistent log formatting across the application"""
9
+
10
+ @staticmethod
11
+ def section(title: str) -> str:
12
+ """Create a section header"""
13
+ return f"\n{'='*20} {title.upper()} {'='*20}"
14
+
15
+ @staticmethod
16
+ def subsection(title: str) -> str:
17
+ """Create a subsection header"""
18
+ return f"\n{'─'*20} {title} {'─'*20}"
19
+
20
+ @staticmethod
21
+ def tree(items: Dict[str, Any], title: str = None) -> List[str]:
22
+ """Create a tree view of dictionary data"""
23
+ lines = []
24
+ if title:
25
+ lines.append(f"📊 {title}:")
26
+
27
+ # Get the maximum length for alignment
28
+ max_key_length = max(len(str(k)) for k in items.keys())
29
+
30
+ # Format each item
31
+ for i, (key, value) in enumerate(items.items()):
32
+ prefix = "└──" if i == len(items) - 1 else "├──"
33
+ if isinstance(value, (int, float)):
34
+ value = f"{value:,}" # Add thousand separators
35
+ lines.append(f"{prefix} {str(key):<{max_key_length}}: {value}")
36
+
37
+ return lines
38
+
39
+ @staticmethod
40
+ def stats(stats: Dict[str, int], title: str = None) -> List[str]:
41
+ """Format statistics with icons"""
42
+ lines = []
43
+ if title:
44
+ lines.append(f"📊 {title}:")
45
+
46
+ # Get the maximum length for alignment
47
+ max_key_length = max(len(str(k)) for k in stats.keys())
48
+
49
+ # Format each stat with an appropriate icon
50
+ icons = {
51
+ "total": "📌",
52
+ "success": "✅",
53
+ "error": "❌",
54
+ "pending": "⏳",
55
+ "processing": "⚙️",
56
+ "finished": "✨",
57
+ "evaluating": "🔄",
58
+ "downloads": "⬇️",
59
+ "files": "📁",
60
+ "cached": "💾",
61
+ "size": "📏",
62
+ "time": "⏱️",
63
+ "rate": "🚀"
64
+ }
65
+
66
+ # Format each item
67
+ for i, (key, value) in enumerate(stats.items()):
68
+ prefix = "└──" if i == len(stats) - 1 else "├──"
69
+ icon = icons.get(key.lower().split('_')[0], "•")
70
+ if isinstance(value, (int, float)):
71
+ value = f"{value:,}" # Add thousand separators
72
+ lines.append(f"{prefix} {icon} {str(key):<{max_key_length}}: {value}")
73
+
74
+ return lines
75
+
76
+ @staticmethod
77
+ def progress_bar(current: int, total: int, width: int = 20) -> str:
78
+ """Create a progress bar"""
79
+ percentage = (current * 100) // total
80
+ filled = "█" * (percentage * width // 100)
81
+ empty = "░" * (width - len(filled))
82
+ return f"{filled}{empty} {percentage:3d}%"
83
+
84
+ @staticmethod
85
+ def error(message: str, error: Optional[Exception] = None) -> str:
86
+ """Format error message"""
87
+ error_msg = f"\n❌ Error: {message}"
88
+ if error:
89
+ error_msg += f"\n └── Details: {str(error)}"
90
+ return error_msg
91
+
92
+ @staticmethod
93
+ def success(message: str) -> str:
94
+ """Format success message"""
95
+ return f"✅ {message}"
96
+
97
+ @staticmethod
98
+ def warning(message: str) -> str:
99
+ """Format warning message"""
100
+ return f"⚠️ {message}"
101
+
102
+ @staticmethod
103
+ def info(message: str) -> str:
104
+ """Format info message"""
105
+ return f"ℹ️ {message}"
backend/app/utils/model_validation.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import asyncio
4
+ import re
5
+ from typing import Tuple, Optional, Dict, Any
6
+ import aiohttp
7
+ from huggingface_hub import HfApi, ModelCard, hf_hub_download
8
+ from transformers import AutoConfig, AutoTokenizer
9
+ from app.config.base import HF_TOKEN, API
10
+ from app.utils.logging import LogFormatter
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class ModelValidator:
15
+ def __init__(self):
16
+ self.token = HF_TOKEN
17
+ self.api = HfApi(token=self.token)
18
+ self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
19
+
20
+ async def check_model_card(self, model_id: str) -> Tuple[bool, str, Optional[Dict[str, Any]]]:
21
+ """Check if model has a valid model card"""
22
+ try:
23
+ logger.info(LogFormatter.info(f"Checking model card for {model_id}"))
24
+
25
+ # Get model card content using ModelCard.load
26
+ try:
27
+ model_card = await asyncio.to_thread(
28
+ ModelCard.load,
29
+ model_id
30
+ )
31
+ logger.info(LogFormatter.success("Model card found"))
32
+ except Exception as e:
33
+ error_msg = "Please add a model card to your model to explain how you trained/fine-tuned it."
34
+ logger.error(LogFormatter.error(error_msg, e))
35
+ return False, error_msg, None
36
+
37
+ # Check license in model card data
38
+ if model_card.data.license is None and not ("license_name" in model_card.data and "license_link" in model_card.data):
39
+ error_msg = "License not found. Please add a license to your model card using the `license` metadata or a `license_name`/`license_link` pair."
40
+ logger.warning(LogFormatter.warning(error_msg))
41
+ return False, error_msg, None
42
+
43
+ # Enforce card content length
44
+ if len(model_card.text) < 200:
45
+ error_msg = "Please add a description to your model card, it is too short."
46
+ logger.warning(LogFormatter.warning(error_msg))
47
+ return False, error_msg, None
48
+
49
+ logger.info(LogFormatter.success("Model card validation passed"))
50
+ return True, "", model_card
51
+
52
+ except Exception as e:
53
+ error_msg = "Failed to validate model card"
54
+ logger.error(LogFormatter.error(error_msg, e))
55
+ return False, str(e), None
56
+
57
+ async def get_safetensors_metadata(self, model_id: str, filename: str = "model.safetensors") -> Optional[Dict]:
58
+ """Get metadata from a safetensors file"""
59
+ try:
60
+ url = f"{API['HUB']}/{model_id}/raw/main/{filename}"
61
+ async with aiohttp.ClientSession() as session:
62
+ async with session.get(url, headers=self.headers) as response:
63
+ if response.status == 200:
64
+ # Read only the first 32KB to get the metadata
65
+ header = await response.content.read(32768)
66
+ # Parse metadata length from the first 8 bytes
67
+ metadata_len = int.from_bytes(header[:8], byteorder='little')
68
+ metadata_bytes = header[8:8+metadata_len]
69
+ return json.loads(metadata_bytes)
70
+ return None
71
+ except Exception as e:
72
+ logger.warning(f"Failed to get safetensors metadata: {str(e)}")
73
+ return None
74
+
75
+ async def get_model_size(
76
+ self,
77
+ model_info: Any,
78
+ precision: str,
79
+ base_model: str
80
+ ) -> Tuple[Optional[float], Optional[str]]:
81
+ """Get model size in billions of parameters"""
82
+ try:
83
+ logger.info(LogFormatter.info(f"Checking model size for {model_info.modelId}"))
84
+
85
+ # Check if model is adapter
86
+ is_adapter = any(s.rfilename == "adapter_config.json" for s in model_info.siblings if hasattr(s, 'rfilename'))
87
+
88
+ # Try to get size from safetensors first
89
+ model_size = None
90
+
91
+ if is_adapter and base_model:
92
+ # For adapters, we need both adapter and base model sizes
93
+ adapter_meta = await self.get_safetensors_metadata(model_info.id, "adapter_model.safetensors")
94
+ base_meta = await self.get_safetensors_metadata(base_model)
95
+
96
+ if adapter_meta and base_meta:
97
+ adapter_size = sum(int(v.split(',')[0]) for v in adapter_meta.get("tensor_metadata", {}).values())
98
+ base_size = sum(int(v.split(',')[0]) for v in base_meta.get("tensor_metadata", {}).values())
99
+ model_size = (adapter_size + base_size) / (2 * 1e9) # Convert to billions, assuming float16
100
+ else:
101
+ # For regular models, just get the model size
102
+ meta = await self.get_safetensors_metadata(model_info.id)
103
+ if meta:
104
+ total_params = sum(int(v.split(',')[0]) for v in meta.get("tensor_metadata", {}).values())
105
+ model_size = total_params / (2 * 1e9) # Convert to billions, assuming float16
106
+
107
+ if model_size is None:
108
+ # Fallback: Try to get size from model name
109
+ size_pattern = re.compile(r"(\d+\.?\d*)b") # Matches patterns like "7b", "13b", "1.1b"
110
+ size_match = re.search(size_pattern, model_info.id.lower())
111
+
112
+ if size_match:
113
+ size_str = size_match.group(1)
114
+ model_size = float(size_str)
115
+ else:
116
+ return None, "Could not determine model size from safetensors or model name"
117
+
118
+ # Adjust size for GPTQ models
119
+ size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
120
+ model_size = round(size_factor * model_size, 3)
121
+
122
+ logger.info(LogFormatter.success(f"Model size: {model_size}B parameters"))
123
+ return model_size, None
124
+
125
+ except Exception as e:
126
+ error_msg = "Failed to get model size"
127
+ logger.error(LogFormatter.error(error_msg, e))
128
+ return None, str(e)
129
+
130
+ async def check_chat_template(
131
+ self,
132
+ model_id: str,
133
+ revision: str
134
+ ) -> Tuple[bool, Optional[str]]:
135
+ """Check if model has a valid chat template"""
136
+ try:
137
+ logger.info(LogFormatter.info(f"Checking chat template for {model_id}"))
138
+
139
+ try:
140
+ config_file = await asyncio.to_thread(
141
+ hf_hub_download,
142
+ repo_id=model_id,
143
+ filename="tokenizer_config.json",
144
+ revision=revision,
145
+ repo_type="model"
146
+ )
147
+
148
+ with open(config_file, 'r') as f:
149
+ tokenizer_config = json.load(f)
150
+
151
+ if 'chat_template' not in tokenizer_config:
152
+ error_msg = f"The model {model_id} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
153
+ logger.error(LogFormatter.error(error_msg))
154
+ return False, error_msg
155
+
156
+ logger.info(LogFormatter.success("Valid chat template found"))
157
+ return True, None
158
+
159
+ except Exception as e:
160
+ error_msg = f"Error checking chat_template: {str(e)}"
161
+ logger.error(LogFormatter.error(error_msg))
162
+ return False, error_msg
163
+
164
+ except Exception as e:
165
+ error_msg = "Failed to check chat template"
166
+ logger.error(LogFormatter.error(error_msg, e))
167
+ return False, str(e)
168
+
169
+ async def is_model_on_hub(
170
+ self,
171
+ model_name: str,
172
+ revision: str,
173
+ test_tokenizer: bool = False,
174
+ trust_remote_code: bool = False
175
+ ) -> Tuple[bool, Optional[str], Optional[Any]]:
176
+ """Check if model exists and is properly configured on the Hub"""
177
+ try:
178
+ config = await asyncio.to_thread(
179
+ AutoConfig.from_pretrained,
180
+ model_name,
181
+ revision=revision,
182
+ trust_remote_code=trust_remote_code,
183
+ token=self.token,
184
+ force_download=True
185
+ )
186
+
187
+ if test_tokenizer:
188
+ try:
189
+ await asyncio.to_thread(
190
+ AutoTokenizer.from_pretrained,
191
+ model_name,
192
+ revision=revision,
193
+ trust_remote_code=trust_remote_code,
194
+ token=self.token
195
+ )
196
+ except ValueError as e:
197
+ return False, f"uses a tokenizer which is not in a transformers release: {e}", None
198
+ except Exception:
199
+ return False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None
200
+
201
+ return True, None, config
202
+
203
+ except ValueError:
204
+ return False, "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.", None
205
+ except Exception as e:
206
+ if "You are trying to access a gated repo." in str(e):
207
+ return True, "uses a gated model.", None
208
+ return False, f"was not found or misconfigured on the hub! Error raised was {e.args[0]}", None
backend/pyproject.toml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "llm-leaderboard-backend"
3
+ version = "0.1.0"
4
+ description = "Backend for the Open LLM Leaderboard"
5
+ authors = ["Your Name <[email protected]>"]
6
+
7
+ [tool.poetry.dependencies]
8
+ python = "^3.9"
9
+ fastapi = "^0.104.1"
10
+ uvicorn = {extras = ["standard"], version = "^0.24.0"}
11
+ numpy = "1.24.3"
12
+ pandas = "^2.0.0"
13
+ datasets = "^2.0.0"
14
+ pyarrow = "^14.0.1"
15
+ python-multipart = "^0.0.6"
16
+ huggingface-hub = "^0.19.0"
17
+ transformers = "^4.35.0"
18
+ safetensors = "^0.4.0"
19
+ aiofiles = "^24.1.0"
20
+ fastapi-cache2 = "^0.2.1"
21
+
22
+ [tool.poetry.group.dev.dependencies]
23
+ pytest = "^7.4.0"
24
+ black = "^23.7.0"
25
+ isort = "^5.12.0"
26
+ flake8 = "^6.1.0"
27
+
28
+ [build-system]
29
+ requires = ["poetry-core>=1.0.0"]
30
+ build-backend = "poetry.core.masonry.api"
backend/utils/analyze_prod_datasets.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import logging
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Dict, Any, List
7
+ from huggingface_hub import HfApi
8
+ from dotenv import load_dotenv
9
+ from app.config.hf_config import HF_ORGANIZATION
10
+
11
+ # Get the backend directory path
12
+ BACKEND_DIR = Path(__file__).parent.parent
13
+ ROOT_DIR = BACKEND_DIR.parent
14
+
15
+ # Load environment variables from .env file in root directory
16
+ load_dotenv(ROOT_DIR / ".env")
17
+
18
+ # Configure logging
19
+ logging.basicConfig(
20
+ level=logging.INFO,
21
+ format='%(message)s'
22
+ )
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Initialize Hugging Face API
26
+ HF_TOKEN = os.getenv("HF_TOKEN")
27
+ if not HF_TOKEN:
28
+ raise ValueError("HF_TOKEN not found in environment variables")
29
+ api = HfApi(token=HF_TOKEN)
30
+
31
+ def analyze_dataset(repo_id: str) -> Dict[str, Any]:
32
+ """Analyze a dataset and return statistics"""
33
+ try:
34
+ # Get dataset info
35
+ dataset_info = api.dataset_info(repo_id=repo_id)
36
+
37
+ # Get file list
38
+ files = api.list_repo_files(repo_id, repo_type="dataset")
39
+
40
+ # Get last commit info
41
+ commits = api.list_repo_commits(repo_id, repo_type="dataset")
42
+ last_commit = next(commits, None)
43
+
44
+ # Count lines in jsonl files
45
+ total_entries = 0
46
+ for file in files:
47
+ if file.endswith('.jsonl'):
48
+ try:
49
+ # Download file content
50
+ content = api.hf_hub_download(
51
+ repo_id=repo_id,
52
+ filename=file,
53
+ repo_type="dataset"
54
+ )
55
+
56
+ # Count lines
57
+ with open(content, 'r') as f:
58
+ for _ in f:
59
+ total_entries += 1
60
+
61
+ except Exception as e:
62
+ logger.error(f"Error processing file {file}: {str(e)}")
63
+ continue
64
+
65
+ # Special handling for requests dataset
66
+ if repo_id == f"{HF_ORGANIZATION}/requests":
67
+ pending_count = 0
68
+ completed_count = 0
69
+
70
+ try:
71
+ content = api.hf_hub_download(
72
+ repo_id=repo_id,
73
+ filename="eval_requests.jsonl",
74
+ repo_type="dataset"
75
+ )
76
+
77
+ with open(content, 'r') as f:
78
+ for line in f:
79
+ try:
80
+ entry = json.loads(line)
81
+ if entry.get("status") == "pending":
82
+ pending_count += 1
83
+ elif entry.get("status") == "completed":
84
+ completed_count += 1
85
+ except json.JSONDecodeError:
86
+ continue
87
+
88
+ except Exception as e:
89
+ logger.error(f"Error analyzing requests: {str(e)}")
90
+
91
+ # Build response
92
+ response = {
93
+ "id": repo_id,
94
+ "last_modified": last_commit.created_at if last_commit else None,
95
+ "total_entries": total_entries,
96
+ "file_count": len(files),
97
+ "size_bytes": dataset_info.size_in_bytes,
98
+ "downloads": dataset_info.downloads,
99
+ }
100
+
101
+ # Add request-specific info if applicable
102
+ if repo_id == f"{HF_ORGANIZATION}/requests":
103
+ response.update({
104
+ "pending_requests": pending_count,
105
+ "completed_requests": completed_count
106
+ })
107
+
108
+ return response
109
+
110
+ except Exception as e:
111
+ logger.error(f"Error analyzing dataset {repo_id}: {str(e)}")
112
+ return {
113
+ "id": repo_id,
114
+ "error": str(e)
115
+ }
116
+
117
+ def main():
118
+ """Main function to analyze all datasets"""
119
+ try:
120
+ # List of datasets to analyze
121
+ datasets = [
122
+ {
123
+ "id": f"{HF_ORGANIZATION}/contents",
124
+ "description": "Aggregated results"
125
+ },
126
+ {
127
+ "id": f"{HF_ORGANIZATION}/requests",
128
+ "description": "Evaluation requests"
129
+ },
130
+ {
131
+ "id": f"{HF_ORGANIZATION}/votes",
132
+ "description": "User votes"
133
+ },
134
+ {
135
+ "id": f"{HF_ORGANIZATION}/maintainers-highlight",
136
+ "description": "Highlighted models"
137
+ }
138
+ ]
139
+
140
+ # Analyze each dataset
141
+ results = []
142
+ for dataset in datasets:
143
+ logger.info(f"\nAnalyzing {dataset['description']} ({dataset['id']})...")
144
+ result = analyze_dataset(dataset['id'])
145
+ results.append(result)
146
+
147
+ if 'error' in result:
148
+ logger.error(f"❌ Error: {result['error']}")
149
+ else:
150
+ logger.info(f"✓ {result['total_entries']} entries")
151
+ logger.info(f"✓ {result['file_count']} files")
152
+ logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
153
+ logger.info(f"✓ {result['downloads']} downloads")
154
+
155
+ if 'pending_requests' in result:
156
+ logger.info(f"✓ {result['pending_requests']} pending requests")
157
+ logger.info(f"✓ {result['completed_requests']} completed requests")
158
+
159
+ if result['last_modified']:
160
+ last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
161
+ logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
162
+
163
+ return results
164
+
165
+ except Exception as e:
166
+ logger.error(f"Global error: {str(e)}")
167
+ return []
168
+
169
+ if __name__ == "__main__":
170
+ main()
backend/utils/analyze_prod_models.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import logging
4
+ from pathlib import Path
5
+ from huggingface_hub import HfApi
6
+ from dotenv import load_dotenv
7
+ from app.config.hf_config import HF_ORGANIZATION
8
+
9
+ # Get the backend directory path
10
+ BACKEND_DIR = Path(__file__).parent.parent
11
+ ROOT_DIR = BACKEND_DIR.parent
12
+
13
+ # Load environment variables from .env file in root directory
14
+ load_dotenv(ROOT_DIR / ".env")
15
+
16
+ # Configure logging
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format='%(message)s'
20
+ )
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Initialize Hugging Face API
24
+ HF_TOKEN = os.getenv("HF_TOKEN")
25
+ if not HF_TOKEN:
26
+ raise ValueError("HF_TOKEN not found in environment variables")
27
+ api = HfApi(token=HF_TOKEN)
28
+
29
+ def count_evaluated_models():
30
+ """Count the number of evaluated models"""
31
+ try:
32
+ # Get dataset info
33
+ dataset_info = api.dataset_info(repo_id=f"{HF_ORGANIZATION}/contents", repo_type="dataset")
34
+
35
+ # Get file list
36
+ files = api.list_repo_files(f"{HF_ORGANIZATION}/contents", repo_type="dataset")
37
+
38
+ # Get last commit info
39
+ commits = api.list_repo_commits(f"{HF_ORGANIZATION}/contents", repo_type="dataset")
40
+ last_commit = next(commits, None)
41
+
42
+ # Count lines in jsonl files
43
+ total_entries = 0
44
+ for file in files:
45
+ if file.endswith('.jsonl'):
46
+ try:
47
+ # Download file content
48
+ content = api.hf_hub_download(
49
+ repo_id=f"{HF_ORGANIZATION}/contents",
50
+ filename=file,
51
+ repo_type="dataset"
52
+ )
53
+
54
+ # Count lines
55
+ with open(content, 'r') as f:
56
+ for _ in f:
57
+ total_entries += 1
58
+
59
+ except Exception as e:
60
+ logger.error(f"Error processing file {file}: {str(e)}")
61
+ continue
62
+
63
+ # Build response
64
+ response = {
65
+ "total_models": total_entries,
66
+ "last_modified": last_commit.created_at if last_commit else None,
67
+ "file_count": len(files),
68
+ "size_bytes": dataset_info.size_in_bytes,
69
+ "downloads": dataset_info.downloads
70
+ }
71
+
72
+ return response
73
+
74
+ except Exception as e:
75
+ logger.error(f"Error counting evaluated models: {str(e)}")
76
+ return {
77
+ "error": str(e)
78
+ }
79
+
80
+ def main():
81
+ """Main function to count evaluated models"""
82
+ try:
83
+ logger.info("\nAnalyzing evaluated models...")
84
+ result = count_evaluated_models()
85
+
86
+ if 'error' in result:
87
+ logger.error(f"❌ Error: {result['error']}")
88
+ else:
89
+ logger.info(f"✓ {result['total_models']} models evaluated")
90
+ logger.info(f"✓ {result['file_count']} files")
91
+ logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
92
+ logger.info(f"✓ {result['downloads']} downloads")
93
+
94
+ if result['last_modified']:
95
+ last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
96
+ logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
97
+
98
+ return result
99
+
100
+ except Exception as e:
101
+ logger.error(f"Global error: {str(e)}")
102
+ return {"error": str(e)}
103
+
104
+ if __name__ == "__main__":
105
+ main()
backend/utils/last_activity.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import logging
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Dict, Any, List, Tuple
7
+ from huggingface_hub import HfApi
8
+ from dotenv import load_dotenv
9
+
10
+ # Get the backend directory path
11
+ BACKEND_DIR = Path(__file__).parent.parent
12
+ ROOT_DIR = BACKEND_DIR.parent
13
+
14
+ # Load environment variables from .env file in root directory
15
+ load_dotenv(ROOT_DIR / ".env")
16
+
17
+ # Configure logging
18
+ logging.basicConfig(
19
+ level=logging.INFO,
20
+ format='%(message)s'
21
+ )
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Initialize Hugging Face API
25
+ HF_TOKEN = os.getenv("HF_TOKEN")
26
+ if not HF_TOKEN:
27
+ raise ValueError("HF_TOKEN not found in environment variables")
28
+ api = HfApi(token=HF_TOKEN)
29
+
30
+ # Default organization
31
+ HF_ORGANIZATION = os.getenv('HF_ORGANIZATION', 'open-llm-leaderboard')
32
+
33
+ def get_last_votes(limit: int = 5) -> List[Dict]:
34
+ """Get the last votes from the votes dataset"""
35
+ try:
36
+ logger.info("\nFetching last votes...")
37
+
38
+ # Download and read votes file
39
+ logger.info("Downloading votes file...")
40
+ votes_file = api.hf_hub_download(
41
+ repo_id=f"{HF_ORGANIZATION}/votes",
42
+ filename="votes_data.jsonl",
43
+ repo_type="dataset"
44
+ )
45
+
46
+ logger.info("Reading votes file...")
47
+ votes = []
48
+ with open(votes_file, 'r') as f:
49
+ for line in f:
50
+ try:
51
+ vote = json.loads(line)
52
+ votes.append(vote)
53
+ except json.JSONDecodeError:
54
+ continue
55
+
56
+ # Sort by timestamp and get last n votes
57
+ logger.info("Sorting votes...")
58
+ votes.sort(key=lambda x: x.get('timestamp', ''), reverse=True)
59
+ last_votes = votes[:limit]
60
+
61
+ logger.info(f"✓ Found {len(last_votes)} recent votes")
62
+ return last_votes
63
+
64
+ except Exception as e:
65
+ logger.error(f"Error reading votes: {str(e)}")
66
+ return []
67
+
68
+ def get_last_models(limit: int = 5) -> List[Dict]:
69
+ """Get the last models from the requests dataset using commit history"""
70
+ try:
71
+ logger.info("\nFetching last model submissions...")
72
+
73
+ # Get commit history
74
+ logger.info("Getting commit history...")
75
+ commits = list(api.list_repo_commits(
76
+ repo_id=f"{HF_ORGANIZATION}/requests",
77
+ repo_type="dataset"
78
+ ))
79
+ logger.info(f"Found {len(commits)} commits")
80
+
81
+ # Track processed files to avoid duplicates
82
+ processed_files = set()
83
+ models = []
84
+
85
+ # Process commits until we have enough models
86
+ for i, commit in enumerate(commits):
87
+ logger.info(f"Processing commit {i+1}/{len(commits)} ({commit.created_at})")
88
+
89
+ # Look at added/modified files in this commit
90
+ files_to_process = [f for f in (commit.added + commit.modified) if f.endswith('.json')]
91
+ if files_to_process:
92
+ logger.info(f"Found {len(files_to_process)} JSON files in commit")
93
+
94
+ for file in files_to_process:
95
+ if file in processed_files:
96
+ continue
97
+
98
+ processed_files.add(file)
99
+ logger.info(f"Downloading {file}...")
100
+
101
+ try:
102
+ # Download and read the file
103
+ content = api.hf_hub_download(
104
+ repo_id=f"{HF_ORGANIZATION}/requests",
105
+ filename=file,
106
+ repo_type="dataset"
107
+ )
108
+
109
+ with open(content, 'r') as f:
110
+ model_data = json.load(f)
111
+ models.append(model_data)
112
+ logger.info(f"✓ Added model {model_data.get('model', 'Unknown')}")
113
+
114
+ if len(models) >= limit:
115
+ logger.info("Reached desired number of models")
116
+ break
117
+
118
+ except Exception as e:
119
+ logger.error(f"Error reading file {file}: {str(e)}")
120
+ continue
121
+
122
+ if len(models) >= limit:
123
+ break
124
+
125
+ logger.info(f"✓ Found {len(models)} recent model submissions")
126
+ return models
127
+
128
+ except Exception as e:
129
+ logger.error(f"Error reading models: {str(e)}")
130
+ return []
131
+
132
+ def main():
133
+ """Display last activities from the leaderboard"""
134
+ try:
135
+ # Get last votes
136
+ logger.info("\n=== Last Votes ===")
137
+ last_votes = get_last_votes()
138
+ if last_votes:
139
+ for vote in last_votes:
140
+ logger.info(f"\nModel: {vote.get('model')}")
141
+ logger.info(f"User: {vote.get('username')}")
142
+ logger.info(f"Timestamp: {vote.get('timestamp')}")
143
+ else:
144
+ logger.info("No votes found")
145
+
146
+ # Get last model submissions
147
+ logger.info("\n=== Last Model Submissions ===")
148
+ last_models = get_last_models()
149
+ if last_models:
150
+ for model in last_models:
151
+ logger.info(f"\nModel: {model.get('model')}")
152
+ logger.info(f"Submitter: {model.get('sender', 'Unknown')}")
153
+ logger.info(f"Status: {model.get('status', 'Unknown')}")
154
+ logger.info(f"Submission Time: {model.get('submitted_time', 'Unknown')}")
155
+ logger.info(f"Precision: {model.get('precision', 'Unknown')}")
156
+ logger.info(f"Weight Type: {model.get('weight_type', 'Unknown')}")
157
+ else:
158
+ logger.info("No models found")
159
+
160
+ except Exception as e:
161
+ logger.error(f"Global error: {str(e)}")
162
+
163
+ if __name__ == "__main__":
164
+ main()
backend/utils/sync_datasets_locally.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import tempfile
4
+ import logging
5
+ from pathlib import Path
6
+ from huggingface_hub import HfApi, snapshot_download, upload_folder, create_repo
7
+ from dotenv import load_dotenv
8
+
9
+ # Configure source and destination usernames
10
+ SOURCE_USERNAME = "open-llm-leaderboard"
11
+ DESTINATION_USERNAME = "tfrere"
12
+
13
+ # Get the backend directory path
14
+ BACKEND_DIR = Path(__file__).parent.parent
15
+ ROOT_DIR = BACKEND_DIR.parent
16
+
17
+ # Load environment variables from .env file in root directory
18
+ load_dotenv(ROOT_DIR / ".env")
19
+
20
+ # Configure logging
21
+ logging.basicConfig(
22
+ level=logging.INFO,
23
+ format='%(message)s'
24
+ )
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # List of dataset names to sync
28
+ DATASET_NAMES = [
29
+ "votes",
30
+ "results",
31
+ "requests",
32
+ "contents",
33
+ "maintainers-highlight",
34
+ ]
35
+
36
+ # Build list of datasets with their source and destination paths
37
+ DATASETS = [
38
+ (name, f"{SOURCE_USERNAME}/{name}", f"{DESTINATION_USERNAME}/{name}")
39
+ for name in DATASET_NAMES
40
+ ]
41
+
42
+ # Initialize Hugging Face API
43
+ api = HfApi()
44
+
45
+ def ensure_repo_exists(repo_id, token):
46
+ """Ensure the repository exists, create it if it doesn't"""
47
+ try:
48
+ api.repo_info(repo_id=repo_id, repo_type="dataset")
49
+ logger.info(f"✓ Repository {repo_id} already exists")
50
+ except Exception:
51
+ logger.info(f"Creating repository {repo_id}...")
52
+ create_repo(
53
+ repo_id=repo_id,
54
+ repo_type="dataset",
55
+ token=token,
56
+ private=True
57
+ )
58
+ logger.info(f"✓ Repository {repo_id} created")
59
+
60
+ def process_dataset(dataset_info, token):
61
+ """Process a single dataset"""
62
+ name, source_dataset, destination_dataset = dataset_info
63
+ try:
64
+ logger.info(f"\n📥 Processing dataset: {name}")
65
+
66
+ # Ensure destination repository exists
67
+ ensure_repo_exists(destination_dataset, token)
68
+
69
+ # Create a temporary directory for this dataset
70
+ with tempfile.TemporaryDirectory() as temp_dir:
71
+ try:
72
+ # List files in source dataset
73
+ logger.info(f"Listing files in {source_dataset}...")
74
+ files = api.list_repo_files(source_dataset, repo_type="dataset")
75
+ logger.info(f"Detected structure: {len(files)} files")
76
+
77
+ # Download dataset
78
+ logger.info(f"Downloading from {source_dataset}...")
79
+ local_dir = snapshot_download(
80
+ repo_id=source_dataset,
81
+ repo_type="dataset",
82
+ local_dir=temp_dir,
83
+ token=token
84
+ )
85
+ logger.info(f"✓ Download complete")
86
+
87
+ # Upload to destination while preserving structure
88
+ logger.info(f"📤 Uploading to {destination_dataset}...")
89
+ api.upload_folder(
90
+ folder_path=local_dir,
91
+ repo_id=destination_dataset,
92
+ repo_type="dataset",
93
+ token=token
94
+ )
95
+ logger.info(f"✅ {name} copied successfully!")
96
+ return True
97
+
98
+ except Exception as e:
99
+ logger.error(f"❌ Error processing {name}: {str(e)}")
100
+ return False
101
+
102
+ except Exception as e:
103
+ logger.error(f"❌ Error for {name}: {str(e)}")
104
+ return False
105
+
106
+ def copy_datasets():
107
+ try:
108
+ logger.info("🔑 Checking authentication...")
109
+ # Get token from .env file
110
+ token = os.getenv("HF_TOKEN")
111
+ if not token:
112
+ raise ValueError("HF_TOKEN not found in .env file")
113
+
114
+ # Process datasets sequentially
115
+ results = []
116
+ for dataset_info in DATASETS:
117
+ success = process_dataset(dataset_info, token)
118
+ results.append((dataset_info[0], success))
119
+
120
+ # Print final summary
121
+ logger.info("\n📊 Final summary:")
122
+ for dataset, success in results:
123
+ status = "✅ Success" if success else "❌ Failure"
124
+ logger.info(f"{dataset}: {status}")
125
+
126
+ except Exception as e:
127
+ logger.error(f"❌ Global error: {str(e)}")
128
+
129
+ if __name__ == "__main__":
130
+ copy_datasets()
docker-compose.yml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ backend:
5
+ build:
6
+ context: ./backend
7
+ dockerfile: Dockerfile.dev
8
+ args:
9
+ - HF_TOKEN=${HF_TOKEN}
10
+ ports:
11
+ - "${BACKEND_PORT:-8000}:8000"
12
+ volumes:
13
+ - ./backend:/app
14
+ environment:
15
+ - ENVIRONMENT=${ENVIRONMENT:-development}
16
+ - HF_TOKEN=${HF_TOKEN}
17
+ - HF_HOME=${HF_HOME:-/.cache}
18
+ command: uvicorn app.asgi:app --host 0.0.0.0 --port 8000 --reload
19
+
20
+ frontend:
21
+ build:
22
+ context: ./frontend
23
+ dockerfile: Dockerfile.dev
24
+ ports:
25
+ - "${FRONTEND_PORT:-7860}:7860"
26
+ volumes:
27
+ - ./frontend:/app
28
+ - /app/node_modules
29
+ environment:
30
+ - NODE_ENV=${ENVIRONMENT:-development}
31
+ - CHOKIDAR_USEPOLLING=true
32
+ - PORT=${FRONTEND_PORT:-7860}
33
+ command: npm start
34
+ stdin_open: true
35
+ tty: true
frontend/Dockerfile.dev ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM node:18
2
+
3
+ WORKDIR /app
4
+
5
+ # Install required global dependencies
6
+ RUN npm install -g react-scripts
7
+
8
+ # Copy package.json and package-lock.json
9
+ COPY package*.json ./
10
+
11
+ # Install project dependencies
12
+ RUN npm install
13
+
14
+ # Volume will be mounted here, no need for COPY
15
+ CMD ["npm", "start"]
frontend/README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Frontend - Open LLM Leaderboard 🏆
2
+
3
+ React interface for exploring and comparing open-source language models.
4
+
5
+ ## 🏗 Architecture
6
+
7
+ ```mermaid
8
+ flowchart TD
9
+ Client(["User Browser"]) --> Components["React Components"]
10
+
11
+ subgraph Frontend
12
+ Components --> Context["Context Layer<br>• LeaderboardContext<br>• Global State"]
13
+
14
+ API["API Layer<br>• /api/leaderboard/formatted<br>• TanStack Query"] --> |Data Feed| Context
15
+
16
+ Context --> Hooks["Hooks Layer<br>• Data Processing<br>• Filtering<br>• Caching"]
17
+
18
+ Hooks --> Features["Features<br>• Table Management<br>• Search & Filters<br>• Display Options"]
19
+ Features --> Cache["Cache Layer<br>• LocalStorage<br>• URL State"]
20
+ end
21
+
22
+ API --> Backend["Backend Server"]
23
+
24
+ style Backend fill:#f96,stroke:#333,stroke-width:2px
25
+ ```
26
+
27
+ ## ✨ Core Features
28
+
29
+ - 🔍 **Search & Filters**: Real-time filtering, regex search, advanced filters
30
+ - 📊 **Data Visualization**: Interactive table, customizable columns, sorting
31
+ - 🔄 **State Management**: URL sync, client-side caching (5min TTL)
32
+ - 📱 **Responsive Design**: Mobile-friendly, dark/light themes
33
+
34
+ ## 🛠 Tech Stack
35
+
36
+ - React 18 + Material-UI
37
+ - TanStack Query & Table
38
+ - React Router v6
39
+
40
+ ## 📁 Project Structure
41
+
42
+ ```
43
+ src/
44
+ ├── pages/
45
+ │ └── LeaderboardPage/
46
+ │ ├── components/ # UI Components
47
+ │ ├── context/ # Global State
48
+ │ └── hooks/ # Data Processing
49
+ ├── components/ # Shared Components
50
+ └── utils/ # Helper Functions
51
+ ```
52
+
53
+ ## 🚀 Development
54
+
55
+ ```bash
56
+ # Install dependencies
57
+ npm install
58
+
59
+ # Start development server
60
+ npm start
61
+
62
+ # Production build
63
+ npm run build
64
+ ```
65
+
66
+ ## 🔧 Environment Variables
67
+
68
+ ```env
69
+ # API Configuration
70
+ REACT_APP_API_URL=http://localhost:8000
71
+ REACT_APP_CACHE_DURATION=300000 # 5 minutes
72
+ ```
73
+
74
+ ## 🔄 Data Flow
75
+
76
+ 1. API fetches leaderboard data from backend
77
+ 2. Context stores and manages global state
78
+ 3. Hooks handle data processing and filtering
79
+ 4. Components render based on processed data
80
+ 5. Cache maintains user preferences and URL state
frontend/package.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "open-llm-leaderboard",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "dependencies": {
6
+ "@emotion/react": "^11.13.3",
7
+ "@emotion/styled": "^11.13.0",
8
+ "@huggingface/hub": "^0.14.0",
9
+ "@mui/icons-material": "^6.1.7",
10
+ "@mui/lab": "^6.0.0-beta.16",
11
+ "@mui/material": "^6.1.6",
12
+ "@mui/x-data-grid": "^7.22.2",
13
+ "@tanstack/react-query": "^5.62.2",
14
+ "@tanstack/react-table": "^8.20.5",
15
+ "@tanstack/react-virtual": "^3.10.9",
16
+ "@testing-library/jest-dom": "^5.17.0",
17
+ "@testing-library/react": "^13.4.0",
18
+ "@testing-library/user-event": "^13.5.0",
19
+ "compression": "^1.7.4",
20
+ "cors": "^2.8.5",
21
+ "express": "^4.18.2",
22
+ "react": "^18.3.1",
23
+ "react-dom": "^18.3.1",
24
+ "react-router-dom": "^6.28.0",
25
+ "react-scripts": "5.0.1",
26
+ "serve-static": "^1.15.0",
27
+ "web-vitals": "^2.1.4"
28
+ },
29
+ "scripts": {
30
+ "start": "react-scripts start",
31
+ "build": "react-scripts build",
32
+ "test": "react-scripts test",
33
+ "eject": "react-scripts eject",
34
+ "serve": "node server.js"
35
+ },
36
+ "eslintConfig": {
37
+ "extends": [
38
+ "react-app",
39
+ "react-app/jest"
40
+ ]
41
+ },
42
+ "browserslist": {
43
+ "production": [
44
+ ">0.2%",
45
+ "not dead",
46
+ "not op_mini all"
47
+ ],
48
+ "development": [
49
+ "last 1 chrome version",
50
+ "last 1 firefox version",
51
+ "last 1 safari version"
52
+ ]
53
+ },
54
+ "proxy": "http://backend:8000"
55
+ }
frontend/public/index.html ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <link rel="icon" href="%PUBLIC_URL%/logo32.png" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
7
+ <meta
8
+ name="description"
9
+ content="Interactive leaderboard tracking and comparing open-source Large Language Models across multiple benchmarks: IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
10
+ />
11
+
12
+ <!-- Open Graph / Facebook -->
13
+ <meta property="og:type" content="website" />
14
+ <meta
15
+ property="og:url"
16
+ content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
17
+ />
18
+ <meta
19
+ property="og:title"
20
+ content="Open LLM Leaderboard - Compare Open Source Large Language Models"
21
+ />
22
+ <meta
23
+ property="og:description"
24
+ content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
25
+ />
26
+ <meta property="og:image" content="%PUBLIC_URL%/og-image.png" />
27
+
28
+ <!-- Twitter -->
29
+ <meta property="twitter:card" content="summary_large_image" />
30
+ <meta
31
+ property="twitter:url"
32
+ content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
33
+ />
34
+ <meta
35
+ property="twitter:title"
36
+ content="Open LLM Leaderboard - Compare Open Source Large Language Models"
37
+ />
38
+ <meta
39
+ property="twitter:description"
40
+ content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
41
+ />
42
+ <meta property="twitter:image" content="%PUBLIC_URL%/og-image.png" />
43
+ <!--
44
+ Notice the use of %PUBLIC_URL% in the tags above.
45
+ It will be replaced with the URL of the `public` folder during the build.
46
+ Only files inside the `public` folder can be referenced from the HTML.
47
+
48
+ Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
49
+ work correctly both with client-side routing and a non-root public URL.
50
+ Learn how to configure a non-root public URL by running `npm run build`.
51
+ -->
52
+ <title>
53
+ Open LLM Leaderboard - Compare Open Source Large Language Models
54
+ </title>
55
+ <link
56
+ href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap"
57
+ rel="stylesheet"
58
+ />
59
+ </head>
60
+ <body>
61
+ <noscript>You need to enable JavaScript to run this app.</noscript>
62
+ <div id="root"></div>
63
+ <!--
64
+ This HTML file is a template.
65
+ If you open it directly in the browser, you will see an empty page.
66
+
67
+ You can add webfonts, meta tags, or analytics to this file.
68
+ The build step will place the bundled scripts into the <body> tag.
69
+
70
+ To begin the development, run `npm start` or `yarn start`.
71
+ To create a production bundle, use `npm run build` or `yarn build`.
72
+ -->
73
+ </body>
74
+ </html>
frontend/public/logo256.png ADDED
frontend/public/logo32.png ADDED
frontend/public/og-image.jpg ADDED
frontend/public/robots.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # https://www.robotstxt.org/robotstxt.html
2
+ User-agent: *
3
+ Disallow:
frontend/server.js ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const express = require("express");
2
+ const cors = require("cors");
3
+ const compression = require("compression");
4
+ const path = require("path");
5
+ const serveStatic = require("serve-static");
6
+ const { createProxyMiddleware } = require("http-proxy-middleware");
7
+
8
+ const app = express();
9
+ const port = process.env.PORT || 7860;
10
+ const apiPort = process.env.INTERNAL_API_PORT || 7861;
11
+
12
+ // Enable CORS for all routes
13
+ app.use(cors());
14
+
15
+ // Enable GZIP compression
16
+ app.use(compression());
17
+
18
+ // Proxy all API requests to the Python backend
19
+ app.use(
20
+ "/api",
21
+ createProxyMiddleware({
22
+ target: `http://127.0.0.1:${apiPort}`,
23
+ changeOrigin: true,
24
+ onError: (err, req, res) => {
25
+ console.error("Proxy Error:", err);
26
+ res.status(500).json({ error: "Proxy Error", details: err.message });
27
+ },
28
+ })
29
+ );
30
+
31
+ // Serve static files from the build directory
32
+ app.use(
33
+ express.static(path.join(__dirname, "build"), {
34
+ // Don't cache HTML files
35
+ setHeaders: (res, path) => {
36
+ if (path.endsWith(".html")) {
37
+ res.setHeader("Cache-Control", "no-cache, no-store, must-revalidate");
38
+ res.setHeader("Pragma", "no-cache");
39
+ res.setHeader("Expires", "0");
40
+ } else {
41
+ // Cache other static resources for 1 year
42
+ res.setHeader("Cache-Control", "public, max-age=31536000");
43
+ }
44
+ },
45
+ })
46
+ );
47
+
48
+ // Middleware to preserve URL parameters
49
+ app.use((req, res, next) => {
50
+ // Don't interfere with API requests
51
+ if (req.url.startsWith("/api")) {
52
+ return next();
53
+ }
54
+
55
+ // Preserve original URL parameters
56
+ req.originalUrl = req.url;
57
+ next();
58
+ });
59
+
60
+ // Handle all other routes by serving index.html
61
+ app.get("*", (req, res) => {
62
+ // Don't interfere with API requests
63
+ if (req.url.startsWith("/api")) {
64
+ return next();
65
+ }
66
+
67
+ // Headers for client-side routing
68
+ res.set({
69
+ "Cache-Control": "no-cache, no-store, must-revalidate",
70
+ Pragma: "no-cache",
71
+ Expires: "0",
72
+ });
73
+
74
+ // Send index.html for all other routes
75
+ res.sendFile(path.join(__dirname, "build", "index.html"));
76
+ });
77
+
78
+ app.listen(port, "0.0.0.0", () => {
79
+ console.log(
80
+ `Frontend server is running on port ${port} in ${
81
+ process.env.NODE_ENV || "development"
82
+ } mode`
83
+ );
84
+ console.log(`API proxy target: http://127.0.0.1:${apiPort}`);
85
+ });
frontend/src/App.js ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useEffect } from "react";
2
+ import {
3
+ HashRouter as Router,
4
+ Routes,
5
+ Route,
6
+ useSearchParams,
7
+ useLocation,
8
+ } from "react-router-dom";
9
+ import { ThemeProvider } from "@mui/material/styles";
10
+ import { Box, CssBaseline } from "@mui/material";
11
+ import Navigation from "./components/Navigation/Navigation";
12
+ import LeaderboardPage from "./pages/LeaderboardPage/LeaderboardPage";
13
+ import AddModelPage from "./pages/AddModelPage/AddModelPage";
14
+ import QuotePage from "./pages/QuotePage/QuotePage";
15
+ import VoteModelPage from "./pages/VoteModelPage/VoteModelPage";
16
+ import Footer from "./components/Footer/Footer";
17
+ import getTheme from "./config/theme";
18
+ import { useThemeMode } from "./hooks/useThemeMode";
19
+ import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
20
+ import LeaderboardProvider from "./pages/LeaderboardPage/components/Leaderboard/context/LeaderboardContext";
21
+
22
+ const queryClient = new QueryClient({
23
+ defaultOptions: {
24
+ queries: {
25
+ retry: 1,
26
+ refetchOnWindowFocus: false,
27
+ },
28
+ },
29
+ });
30
+
31
+ function UrlHandler() {
32
+ const location = useLocation();
33
+ const [searchParams] = useSearchParams();
34
+
35
+ // Synchroniser l'URL avec la page parente HF
36
+ useEffect(() => {
37
+ // Vérifier si nous sommes dans un iframe HF Space
38
+ const isHFSpace = window.location !== window.parent.location;
39
+ if (!isHFSpace) return;
40
+
41
+ // Sync query and hash from this embedded app to the parent page URL
42
+ const queryString = window.location.search;
43
+ const hash = window.location.hash;
44
+
45
+ // HF Spaces' special message type to update the query string and the hash in the parent page URL
46
+ window.parent.postMessage(
47
+ {
48
+ queryString,
49
+ hash,
50
+ },
51
+ "https://huggingface.co"
52
+ );
53
+ }, [location, searchParams]);
54
+
55
+ // Read the updated hash reactively
56
+ useEffect(() => {
57
+ const handleHashChange = (event) => {
58
+ console.log("hash change event", event);
59
+ };
60
+
61
+ window.addEventListener("hashchange", handleHashChange);
62
+ return () => window.removeEventListener("hashchange", handleHashChange);
63
+ }, []);
64
+
65
+ return null;
66
+ }
67
+
68
+ function App() {
69
+ const { mode, toggleTheme } = useThemeMode();
70
+ const theme = getTheme(mode);
71
+
72
+ return (
73
+ <QueryClientProvider client={queryClient}>
74
+ <ThemeProvider theme={theme}>
75
+ <CssBaseline />
76
+ <Router>
77
+ <LeaderboardProvider>
78
+ <UrlHandler />
79
+ <Box
80
+ sx={{
81
+ minHeight: "100vh",
82
+ display: "flex",
83
+ flexDirection: "column",
84
+ bgcolor: "background.default",
85
+ color: "text.primary",
86
+ }}
87
+ >
88
+ <Navigation onToggleTheme={toggleTheme} mode={mode} />
89
+ <Box
90
+ sx={{
91
+ flex: 1,
92
+ display: "flex",
93
+ flexDirection: "column",
94
+ width: "100%",
95
+ px: 4,
96
+ pb: 4,
97
+ }}
98
+ >
99
+ <Routes>
100
+ <Route path="/" element={<LeaderboardPage />} />
101
+ <Route path="/add" element={<AddModelPage />} />
102
+ <Route path="/quote" element={<QuotePage />} />
103
+ <Route path="/vote" element={<VoteModelPage />} />
104
+ </Routes>
105
+ </Box>
106
+ <Footer />
107
+ </Box>
108
+ </LeaderboardProvider>
109
+ </Router>
110
+ </ThemeProvider>
111
+ </QueryClientProvider>
112
+ );
113
+ }
114
+
115
+ export default App;
frontend/src/components/Footer/Footer.js ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { Box, Typography, Link } from '@mui/material';
3
+
4
+ const Footer = () => {
5
+ return (
6
+ <Box
7
+ component="footer"
8
+ sx={{
9
+ width: '100%',
10
+ py: 4,
11
+ textAlign: 'center',
12
+ }}
13
+ >
14
+ <Typography variant="body2" color="text.secondary">
15
+ © 2024 Hugging Face - Open LLM Leaderboard - Made with 🤗 by the HF team - {' '}
16
+ <Link
17
+ href="https://huggingface.co"
18
+ target="_blank"
19
+ rel="noopener noreferrer"
20
+ color="inherit"
21
+ >
22
+ huggingface.co
23
+ </Link>
24
+ </Typography>
25
+ </Box>
26
+ );
27
+ };
28
+
29
+ export default Footer;