Spaces:
Runtime error
Runtime error
rusticluftig
commited on
Commit
·
a2dcddd
1
Parent(s):
17bb6e0
First pass at a working leaderboard
Browse files- .gitignore +3 -0
- app.py +7 -9
- competitions.py +23 -0
- requirements.txt +2 -1
- utils.py +47 -18
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.venv
|
| 2 |
+
__pycache__/
|
| 3 |
+
.env
|
app.py
CHANGED
|
@@ -9,6 +9,7 @@ from dotenv import load_dotenv
|
|
| 9 |
from huggingface_hub import HfApi
|
| 10 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 11 |
|
|
|
|
| 12 |
import utils
|
| 13 |
|
| 14 |
FONT = (
|
|
@@ -20,11 +21,6 @@ HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/ma
|
|
| 20 |
EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
|
| 21 |
EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
|
| 22 |
|
| 23 |
-
# A map of competition IDs to HTML descriptions.
|
| 24 |
-
COMPETITION_DETAILS: Dict[int, str] = {
|
| 25 |
-
1: """<b>Competition ID 1:</b> Produce the best fine-tuned model from a Subnet 9 pretrained model. Models are evaluated using synthetic prompt/response data from Subnet 18."""
|
| 26 |
-
}
|
| 27 |
-
|
| 28 |
HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
|
| 29 |
SECONDS_PER_BLOCK = 12
|
| 30 |
|
|
@@ -93,8 +89,9 @@ def main():
|
|
| 93 |
show_stale = gr.Checkbox(label="Show Stale", interactive=True)
|
| 94 |
competition_leaderboards = []
|
| 95 |
# TODO: Dynamically generate per-competition leaderboards based on model_data.
|
| 96 |
-
|
| 97 |
-
|
|
|
|
| 98 |
competition_leaderboards.append(gr.components.Dataframe(
|
| 99 |
value=utils.leaderboard_data(model_data, scores, show_stale.value),
|
| 100 |
headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
|
|
@@ -110,13 +107,14 @@ def main():
|
|
| 110 |
outputs=competition_leaderboards,
|
| 111 |
)
|
| 112 |
|
|
|
|
| 113 |
gr.LinePlot(
|
| 114 |
utils.get_losses_over_time(vali_runs),
|
| 115 |
x="timestamp",
|
| 116 |
x_title="Date",
|
| 117 |
-
y="
|
| 118 |
y_title="Average Loss",
|
| 119 |
-
tooltip="
|
| 120 |
interactive=True,
|
| 121 |
visible=True,
|
| 122 |
width=1024,
|
|
|
|
| 9 |
from huggingface_hub import HfApi
|
| 10 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 11 |
|
| 12 |
+
import competitions
|
| 13 |
import utils
|
| 14 |
|
| 15 |
FONT = (
|
|
|
|
| 21 |
EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
|
| 22 |
EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
|
| 25 |
SECONDS_PER_BLOCK = 12
|
| 26 |
|
|
|
|
| 89 |
show_stale = gr.Checkbox(label="Show Stale", interactive=True)
|
| 90 |
competition_leaderboards = []
|
| 91 |
# TODO: Dynamically generate per-competition leaderboards based on model_data.
|
| 92 |
+
competition_details = competitions.COMPETITION_DETAILS[1]
|
| 93 |
+
with gr.Accordion(f"{competition_details.name} competition"):
|
| 94 |
+
gr.HTML(competition_details.html_description)
|
| 95 |
competition_leaderboards.append(gr.components.Dataframe(
|
| 96 |
value=utils.leaderboard_data(model_data, scores, show_stale.value),
|
| 97 |
headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
|
|
|
|
| 107 |
outputs=competition_leaderboards,
|
| 108 |
)
|
| 109 |
|
| 110 |
+
# TODO: Make this a multi-competition line plot
|
| 111 |
gr.LinePlot(
|
| 112 |
utils.get_losses_over_time(vali_runs),
|
| 113 |
x="timestamp",
|
| 114 |
x_title="Date",
|
| 115 |
+
y="SN9_MODEL",
|
| 116 |
y_title="Average Loss",
|
| 117 |
+
tooltip="SN9_MODEL",
|
| 118 |
interactive=True,
|
| 119 |
visible=True,
|
| 120 |
width=1024,
|
competitions.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from typing import Dict
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@dataclass(frozen=True)
|
| 9 |
+
class CompetitionDetails:
|
| 10 |
+
# The display name of the competition.
|
| 11 |
+
name: str
|
| 12 |
+
|
| 13 |
+
# The HTML description of the competition.
|
| 14 |
+
html_description: str
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# A map of competition IDs to HTML descriptions.
|
| 18 |
+
COMPETITION_DETAILS: Dict[int, CompetitionDetails] = {
|
| 19 |
+
1: CompetitionDetails(
|
| 20 |
+
name="SN9_MODEL",
|
| 21 |
+
html_description="""<b>Competition ID 1</b><br/>Produce the best fine-tuned model from a Subnet 9 pretrained model. Models are evaluated using synthetic prompt/response data from Subnet 18."""
|
| 22 |
+
)
|
| 23 |
+
}
|
requirements.txt
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
bittensor
|
| 2 |
requests
|
| 3 |
-
wandb
|
|
|
|
| 4 |
python-dotenv
|
| 5 |
APScheduler
|
| 6 |
huggingface-hub
|
|
|
|
| 1 |
bittensor
|
| 2 |
requests
|
| 3 |
+
wandb==0.17.1
|
| 4 |
+
numpy==1.26.4
|
| 5 |
python-dotenv
|
| 6 |
APScheduler
|
| 7 |
huggingface-hub
|
utils.py
CHANGED
|
@@ -6,7 +6,9 @@ import math
|
|
| 6 |
import os
|
| 7 |
import time
|
| 8 |
import traceback
|
|
|
|
| 9 |
from dataclasses import dataclass
|
|
|
|
| 10 |
from typing import Any, Dict, List, Optional, Tuple
|
| 11 |
|
| 12 |
import bittensor as bt
|
|
@@ -15,6 +17,9 @@ import pandas as pd
|
|
| 15 |
import wandb
|
| 16 |
from bittensor.extrinsics.serving import get_metadata
|
| 17 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# TODO: Update once registered
|
| 20 |
NETUID = 179
|
|
@@ -235,24 +240,48 @@ def get_validator_weights(
|
|
| 235 |
def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
|
| 236 |
"""Returns a dataframe of the best average model loss over time."""
|
| 237 |
timestamps = []
|
| 238 |
-
|
| 239 |
-
|
| 240 |
for run in wandb_runs:
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
|
| 258 |
def next_epoch(subtensor: bt.subtensor, block: int) -> int:
|
|
@@ -384,7 +413,7 @@ def load_state_vars() -> dict[Any]:
|
|
| 384 |
vali_runs = get_wandb_runs(
|
| 385 |
project=VALIDATOR_WANDB_PROJECT,
|
| 386 |
# TODO: Update to point to the OTF vali on finetuning
|
| 387 |
-
filters={"config.type": "validator", "config.uid":
|
| 388 |
)
|
| 389 |
|
| 390 |
scores = get_scores([x.uid for x in model_data], vali_runs)
|
|
|
|
| 6 |
import os
|
| 7 |
import time
|
| 8 |
import traceback
|
| 9 |
+
from collections import defaultdict
|
| 10 |
from dataclasses import dataclass
|
| 11 |
+
from email.policy import default
|
| 12 |
from typing import Any, Dict, List, Optional, Tuple
|
| 13 |
|
| 14 |
import bittensor as bt
|
|
|
|
| 17 |
import wandb
|
| 18 |
from bittensor.extrinsics.serving import get_metadata
|
| 19 |
from dotenv import load_dotenv
|
| 20 |
+
from wandb.apis.public.history import HistoryScan
|
| 21 |
+
|
| 22 |
+
import competitions
|
| 23 |
|
| 24 |
# TODO: Update once registered
|
| 25 |
NETUID = 179
|
|
|
|
| 240 |
def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
|
| 241 |
"""Returns a dataframe of the best average model loss over time."""
|
| 242 |
timestamps = []
|
| 243 |
+
datapoints_per_comp_id = {id: [] for id in competitions.COMPETITION_DETAILS}
|
| 244 |
+
|
| 245 |
for run in wandb_runs:
|
| 246 |
+
# For each run, check the 10 most recent steps.
|
| 247 |
+
best_loss_per_competition_id = defaultdict(lambda: math.inf)
|
| 248 |
+
should_add_datapoint = False
|
| 249 |
+
min_step = max(0, run.lastHistoryStep - 10)
|
| 250 |
+
history_scan = HistoryScan(
|
| 251 |
+
run.client, run, min_step, run.lastHistoryStep, page_size=10
|
| 252 |
+
)
|
| 253 |
+
max_timestamp = None
|
| 254 |
+
for step in history_scan:
|
| 255 |
+
if "original_format_json" not in step:
|
| 256 |
+
continue
|
| 257 |
+
data = json.loads(step["original_format_json"])
|
| 258 |
+
all_uid_data = data["uid_data"]
|
| 259 |
+
timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
|
| 260 |
+
if max_timestamp is None:
|
| 261 |
+
max_timestamp = timestamp
|
| 262 |
+
max_timestamp = max(max_timestamp, timestamp)
|
| 263 |
+
|
| 264 |
+
for _, uid_data in all_uid_data.items():
|
| 265 |
+
loss = uid_data.get("average_loss", math.inf)
|
| 266 |
+
competition_id = uid_data.get("competition_id", None)
|
| 267 |
+
if not competition_id:
|
| 268 |
+
continue
|
| 269 |
+
|
| 270 |
+
if loss < best_loss_per_competition_id[competition_id]:
|
| 271 |
+
best_loss_per_competition_id[competition_id] = uid_data["average_loss"]
|
| 272 |
+
should_add_datapoint = True
|
| 273 |
+
# Now that we've processed the run's most recent steps, check if we should add a datapoint.
|
| 274 |
+
if should_add_datapoint:
|
| 275 |
+
timestamps.append(max_timestamp)
|
| 276 |
+
# Iterate through all possible competitions and add the best loss for each.
|
| 277 |
+
# Set None for any that aren't active during this run.
|
| 278 |
+
for id, losses in datapoints_per_comp_id.items():
|
| 279 |
+
losses.append(best_loss_per_competition_id.get(id, None))
|
| 280 |
+
|
| 281 |
+
# Create a dictionary of competitions to lists of losses.
|
| 282 |
+
output_columns = {competitions.COMPETITION_DETAILS[id].name: losses for id, losses in datapoints_per_comp_id.items()}
|
| 283 |
+
|
| 284 |
+
return pd.DataFrame({"timestamp": timestamps, **output_columns})
|
| 285 |
|
| 286 |
|
| 287 |
def next_epoch(subtensor: bt.subtensor, block: int) -> int:
|
|
|
|
| 413 |
vali_runs = get_wandb_runs(
|
| 414 |
project=VALIDATOR_WANDB_PROJECT,
|
| 415 |
# TODO: Update to point to the OTF vali on finetuning
|
| 416 |
+
filters={"config.type": "validator", "config.uid": 0},
|
| 417 |
)
|
| 418 |
|
| 419 |
scores = get_scores([x.uid for x in model_data], vali_runs)
|