abdev-leaderboard

Running

App Files Files Community

abdev-leaderboard / utils.py

loodvanniekerkginkgo

Added some changes to filtering / dedup submissions

069fb2c about 2 months ago

raw

history blame

6.25 kB

	import pandas as pd
	from datasets import load_dataset
	import gradio as gr
	import hashlib
	from typing import Iterable, Union
	from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS, BASELINE_USERNAMES

	pd.set_option("display.max_columns", None)


	def show_output_box(message):
	return gr.update(value=message, visible=True)


	def anonymize_user(username: str) -> str:
	# Anonymize using a hash of the username
	return hashlib.sha256(username.encode()).hexdigest()[:8]


	def fetch_hf_results():
	# load_dataset should cache by default if not using force_redownload
	df = load_dataset(
	RESULTS_REPO,
	data_files="auto_submissions/metrics_all.csv",
	)["train"].to_pandas()
	assert all(
	col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS
	), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}"

	df_baseline = df[df["user"].isin(BASELINE_USERNAMES)]
	df_non_baseline = df[~df["user"].isin(BASELINE_USERNAMES)]
	# Show latest submission only
	# For baselines: Keep unique model names
	df_baseline = df_baseline.sort_values("submission_time", ascending=False).drop_duplicates(
	subset=["model", "assay", "dataset", "user"], keep="first"
	)
	# For users: Just show latest submission
	df_non_baseline = df_non_baseline.sort_values("submission_time", ascending=False).drop_duplicates(
	subset=["assay", "dataset", "user"], keep="first"
	)
	df = pd.concat([df_baseline, df_non_baseline], ignore_index=True)
	df["property"] = df["assay"].map(ASSAY_RENAME)

	# Rename baseline username to just "Baseline"
	df.loc[df["user"].isin(BASELINE_USERNAMES), "user"] = "Baseline"
	# Note: Could optionally add a column "is_baseline" to the dataframe to indicate whether the model is a baseline model or not. If things get crowded.
	# Anonymize the user column at this point (so note: users can submit anonymous / non-anonymous and we'll show their latest submission regardless)
	df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)

	return df


	# Readable hashing function similar to coolname or codenamize
	ADJECTIVES = [
	"ancient","brave","calm","clever","crimson","curious","dapper","eager",
	"fuzzy","gentle","glowing","golden","happy","icy","jolly","lucky",
	"magical","mellow","nimble","peachy","quick","royal","shiny","silent",
	"sly","sparkly","spicy","spry","sturdy","sunny","swift","tiny","vivid",
	"witty"
	]

	ANIMALS = [
	"ant","bat","bear","bee","bison","boar","bug","cat","crab","crow",
	"deer","dog","duck","eel","elk","fox","frog","goat","gull","hare",
	"hawk","hen","horse","ibis","kid","kiwi","koala","lamb","lark","lemur",
	"lion","llama","loon","lynx","mole","moose","mouse","newt","otter","owl",
	"ox","panda","pig","prawn","puma","quail","quokka","rabbit","rat","ray",
	"robin","seal","shark","sheep","shrew","skunk","slug","snail","snake",
	"swan","toad","trout","turtle","vole","walrus","wasp","whale","wolf",
	"worm","yak","zebra"
	]
	NOUNS = [
	"rock","sand","star","tree","leaf","seed","stone","cloud","rain","snow",
	"wind","fire","ash","dirt","mud","ice","wave","shell","dust","sun",
	"moon","hill","lake","pond","reef","root","twig","wood"
	]


	def readable_hash(
	data: Union[str, bytes, Iterable[int]],
	*,
	salt: Union[str, bytes, None] = None,
	words: tuple[list[str], list[str]] = (ADJECTIVES, ANIMALS + NOUNS),
	sep: str = "-",
	checksum_len: int = 2, # 0 to disable; 2–3 is plenty
	case: str = "lower", # "lower" \| "title" \| "upper"
	) -> str:
	"""
	Deterministically map input data to 'adjective-animal[-checksum]'. Generated using ChatGPT.

	Examples
	--------
	>>> readable_hash("hello world")
	'magical-panda-6h'

	>>> readable_hash("hello world", salt="my-app-v1", checksum_len=3)
	'royal-otter-1pz'

	>>> readable_hash(b"\x00\x01\x02\x03", case="title", checksum_len=0)
	'Fuzzy-Tiger'

	Vocabulary
	----------
	ADJECTIVES: ~160 safe, descriptive words (e.g. "ancient", "brave", "silent", "swift")
	ANIMALS: ~80 short, common animals (e.g. "dog", "owl", "whale", "tiger")
	NOUNS: optional set of ~30 neutral nouns (e.g. "rock", "star", "tree", "cloud")

	Combinations
	------------
	- adjective + animal: ~13,000 unique names
	- adjective + noun: ~5,000 unique names
	- adjective + animal + noun: ~390,000 unique names

	Checksum
	--------
	An optional short base-36 suffix (e.g. "-6h" or "-1pz"). The checksum
	acts as a disambiguator in case two different inputs map to the same
	word combination. With 2-3 characters, collisions become vanishingly rare.
	If you only need fun, human-readable names, you can disable it by setting
	``checksum_len=0``. If you need unique, stable identifiers, keep it enabled.
	"""
	if isinstance(data, str):
	data = data.encode()
	elif isinstance(data, Iterable) and not isinstance(data, (bytes, bytearray)):
	data = bytes(data)

	h = hashlib.blake2b(digest_size=8) # fast, stable, short digest
	if salt:
	h.update(salt.encode() if isinstance(salt, str) else salt)
	h.update(b"\x00") # domain-separate salt from data
	h.update(data)
	digest = h.digest()

	# Use the first 6 bytes to index words; last bytes for checksum
	n1 = int.from_bytes(digest[0:3], "big")
	n2 = int.from_bytes(digest[3:6], "big")

	adj = words[0][n1 % len(words[0])]
	noun = words[1][n2 % len(words[1])]
	phrase = f"{adj}{sep}{noun}"

	if checksum_len > 0:
	# Short base36 checksum for collision visibility
	cs = int.from_bytes(digest[6:], "big")
	base36 = ""
	alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
	while cs:
	cs, r = divmod(cs, 36)
	base36 = alphabet[r] + base36
	base36 = (base36 or "0")[:checksum_len]
	phrase = f"{phrase}{sep}{base36}"

	if case == "title":
	phrase = sep.join(p.capitalize() for p in phrase.split(sep))
	elif case == "upper":
	phrase = phrase.upper()

	return phrase