Spaces:

macrocosm-os
/

finetuning-leaderboard

Runtime error

App Files Files Community

rusticluftig commited on Jun 16, 2024

Commit

a2dcddd

1 Parent(s): 17bb6e0

First pass at a working leaderboard

Browse files

Files changed (5) hide show

.gitignore +3 -0
app.py +7 -9
competitions.py +23 -0
requirements.txt +2 -1
utils.py +47 -18

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.venv
+__pycache__/
+.env

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from dotenv import load_dotenv
 from huggingface_hub import HfApi
 from apscheduler.schedulers.background import BackgroundScheduler
 import utils
 FONT = (
@@ -20,11 +21,6 @@ HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/ma
 EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
 EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
-# A map of competition IDs to HTML descriptions.
-COMPETITION_DETAILS: Dict[int, str] = {
-    1: """<b>Competition ID 1:</b> Produce the best fine-tuned model from a Subnet 9 pretrained model. Models are evaluated using synthetic prompt/response data from Subnet 18."""
-}
 HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
 SECONDS_PER_BLOCK = 12
@@ -93,8 +89,9 @@ def main():
             show_stale = gr.Checkbox(label="Show Stale", interactive=True)
             competition_leaderboards = []
             # TODO: Dynamically generate per-competition leaderboards based on model_data.
-            with gr.Accordion("Finetuned SN9 competition"):
-                gr.HTML(COMPETITION_DETAILS[1])
                 competition_leaderboards.append(gr.components.Dataframe(
                     value=utils.leaderboard_data(model_data, scores, show_stale.value),
                     headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
@@ -110,13 +107,14 @@ def main():
                 outputs=competition_leaderboards,
             )
             gr.LinePlot(
                 utils.get_losses_over_time(vali_runs),
                 x="timestamp",
                 x_title="Date",
-                y="best_loss",
                 y_title="Average Loss",
-                tooltip="best_loss",
                 interactive=True,
                 visible=True,
                 width=1024,

 from huggingface_hub import HfApi
 from apscheduler.schedulers.background import BackgroundScheduler
+import competitions
 import utils
 FONT = (
 EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
 EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
 HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
 SECONDS_PER_BLOCK = 12
             show_stale = gr.Checkbox(label="Show Stale", interactive=True)
             competition_leaderboards = []
             # TODO: Dynamically generate per-competition leaderboards based on model_data.
+            competition_details = competitions.COMPETITION_DETAILS[1]
+            with gr.Accordion(f"{competition_details.name} competition"):
+                gr.HTML(competition_details.html_description)
                 competition_leaderboards.append(gr.components.Dataframe(
                     value=utils.leaderboard_data(model_data, scores, show_stale.value),
                     headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
                 outputs=competition_leaderboards,
             )
+            # TODO: Make this a multi-competition line plot
             gr.LinePlot(
                 utils.get_losses_over_time(vali_runs),
                 x="timestamp",
                 x_title="Date",
+                y="SN9_MODEL",
                 y_title="Average Loss",
+                tooltip="SN9_MODEL",
                 interactive=True,
                 visible=True,
                 width=1024,

competitions.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from dataclasses import dataclass
+from typing import Dict
+@dataclass(frozen=True)
+class CompetitionDetails:
+    # The display name of the competition.
+    name: str
+    # The HTML description of the competition.
+    html_description: str
+# A map of competition IDs to HTML descriptions.
+COMPETITION_DETAILS: Dict[int, CompetitionDetails] = {
+    1: CompetitionDetails(
+        name="SN9_MODEL",
+        html_description="""<b>Competition ID 1</b><br/>Produce the best fine-tuned model from a Subnet 9 pretrained model. Models are evaluated using synthetic prompt/response data from Subnet 18."""
+    )
+}

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
 bittensor
 requests
-wandb
 python-dotenv
 APScheduler
 huggingface-hub

 bittensor
 requests
+wandb==0.17.1
+numpy==1.26.4
 python-dotenv
 APScheduler
 huggingface-hub

utils.py CHANGED Viewed

@@ -6,7 +6,9 @@ import math
 import os
 import time
 import traceback
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple
 import bittensor as bt
@@ -15,6 +17,9 @@ import pandas as pd
 import wandb
 from bittensor.extrinsics.serving import get_metadata
 from dotenv import load_dotenv
 # TODO: Update once registered
 NETUID = 179
@@ -235,24 +240,48 @@ def get_validator_weights(
 def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
     """Returns a dataframe of the best average model loss over time."""
     timestamps = []
-    best_losses = []
     for run in wandb_runs:
-        if "original_format_json" not in run.summary:
-            continue
-        data = json.loads(run.summary["original_format_json"])
-        all_uid_data = data["uid_data"]
-        timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
-        best_loss = math.inf
-        for _, uid_data in all_uid_data.items():
-            loss = uid_data.get("average_loss", math.inf)
-            if loss < best_loss:
-                best_loss = uid_data["average_loss"]
-        if best_loss != math.inf:
-            timestamps.append(timestamp)
-            best_losses.append(best_loss)
-    return pd.DataFrame({"timestamp": timestamps, "best_loss": best_losses})
 def next_epoch(subtensor: bt.subtensor, block: int) -> int:
@@ -384,7 +413,7 @@ def load_state_vars() -> dict[Any]:
             vali_runs = get_wandb_runs(
                 project=VALIDATOR_WANDB_PROJECT,
                 # TODO: Update to point to the OTF vali on finetuning
-                filters={"config.type": "validator", "config.uid": 238},
             )
             scores = get_scores([x.uid for x in model_data], vali_runs)

 import os
 import time
 import traceback
+from collections import defaultdict
 from dataclasses import dataclass
+from email.policy import default
 from typing import Any, Dict, List, Optional, Tuple
 import bittensor as bt
 import wandb
 from bittensor.extrinsics.serving import get_metadata
 from dotenv import load_dotenv
+from wandb.apis.public.history import HistoryScan
+import competitions
 # TODO: Update once registered
 NETUID = 179
 def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
     """Returns a dataframe of the best average model loss over time."""
     timestamps = []
+    datapoints_per_comp_id = {id: [] for id in competitions.COMPETITION_DETAILS}
     for run in wandb_runs:
+        # For each run, check the 10 most recent steps.
+        best_loss_per_competition_id = defaultdict(lambda: math.inf)
+        should_add_datapoint = False
+        min_step = max(0, run.lastHistoryStep - 10)
+        history_scan = HistoryScan(
+            run.client, run, min_step, run.lastHistoryStep, page_size=10
+        )
+        max_timestamp = None
+        for step in history_scan:
+            if "original_format_json" not in step:
+                continue
+            data = json.loads(step["original_format_json"])
+            all_uid_data = data["uid_data"]
+            timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
+            if max_timestamp is None:
+                max_timestamp = timestamp
+            max_timestamp = max(max_timestamp, timestamp)
+            for _, uid_data in all_uid_data.items():
+                loss = uid_data.get("average_loss", math.inf)
+                competition_id = uid_data.get("competition_id", None)
+                if not competition_id:
+                    continue
+                if loss < best_loss_per_competition_id[competition_id]:
+                    best_loss_per_competition_id[competition_id] = uid_data["average_loss"]
+                    should_add_datapoint = True
+        # Now that we've processed the run's most recent steps, check if we should add a datapoint.
+        if should_add_datapoint:
+            timestamps.append(max_timestamp)
+            # Iterate through all possible competitions and add the best loss for each.
+            # Set None for any that aren't active during this run.
+            for id, losses in datapoints_per_comp_id.items():
+                losses.append(best_loss_per_competition_id.get(id, None))
+    # Create a dictionary of competitions to lists of losses.
+    output_columns = {competitions.COMPETITION_DETAILS[id].name: losses for id, losses in datapoints_per_comp_id.items()}
+    return pd.DataFrame({"timestamp": timestamps, **output_columns})
 def next_epoch(subtensor: bt.subtensor, block: int) -> int:
             vali_runs = get_wandb_runs(
                 project=VALIDATOR_WANDB_PROJECT,
                 # TODO: Update to point to the OTF vali on finetuning
+                filters={"config.type": "validator", "config.uid": 0},
             )
             scores = get_scores([x.uid for x in model_data], vali_runs)