|  | import pandas as pd | 
					
						
						|  | from datasets import load_dataset | 
					
						
						|  | import gradio as gr | 
					
						
						|  | import hashlib | 
					
						
						|  | from typing import Iterable, Union | 
					
						
						|  | from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS, BASELINE_USERNAMES | 
					
						
						|  |  | 
					
						
						|  | pd.set_option("display.max_columns", None) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def show_output_box(message): | 
					
						
						|  | return gr.update(value=message, visible=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def anonymize_user(username: str) -> str: | 
					
						
						|  |  | 
					
						
						|  | return hashlib.sha256(username.encode()).hexdigest()[:8] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def fetch_hf_results(): | 
					
						
						|  |  | 
					
						
						|  | df = load_dataset( | 
					
						
						|  | RESULTS_REPO, | 
					
						
						|  | data_files="auto_submissions/metrics_all.csv", | 
					
						
						|  | )["train"].to_pandas() | 
					
						
						|  | assert all( | 
					
						
						|  | col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS | 
					
						
						|  | ), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}" | 
					
						
						|  |  | 
					
						
						|  | df_baseline = df[df["user"].isin(BASELINE_USERNAMES)] | 
					
						
						|  | df_non_baseline = df[~df["user"].isin(BASELINE_USERNAMES)] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | df_baseline = df_baseline.sort_values("submission_time", ascending=False).drop_duplicates( | 
					
						
						|  | subset=["model", "assay", "dataset", "user"], keep="first" | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | df_non_baseline = df_non_baseline.sort_values("submission_time", ascending=False).drop_duplicates( | 
					
						
						|  | subset=["assay", "dataset", "user"], keep="first" | 
					
						
						|  | ) | 
					
						
						|  | df = pd.concat([df_baseline, df_non_baseline], ignore_index=True) | 
					
						
						|  | df["property"] = df["assay"].map(ASSAY_RENAME) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | df.loc[df["user"].isin(BASELINE_USERNAMES), "user"] = "Baseline" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash) | 
					
						
						|  |  | 
					
						
						|  | return df | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | ADJECTIVES = [ | 
					
						
						|  | "ancient","brave","calm","clever","crimson","curious","dapper","eager", | 
					
						
						|  | "fuzzy","gentle","glowing","golden","happy","icy","jolly","lucky", | 
					
						
						|  | "magical","mellow","nimble","peachy","quick","royal","shiny","silent", | 
					
						
						|  | "sly","sparkly","spicy","spry","sturdy","sunny","swift","tiny","vivid", | 
					
						
						|  | "witty" | 
					
						
						|  | ] | 
					
						
						|  |  | 
					
						
						|  | ANIMALS = [ | 
					
						
						|  | "ant","bat","bear","bee","bison","boar","bug","cat","crab","crow", | 
					
						
						|  | "deer","dog","duck","eel","elk","fox","frog","goat","gull","hare", | 
					
						
						|  | "hawk","hen","horse","ibis","kid","kiwi","koala","lamb","lark","lemur", | 
					
						
						|  | "lion","llama","loon","lynx","mole","moose","mouse","newt","otter","owl", | 
					
						
						|  | "ox","panda","pig","prawn","puma","quail","quokka","rabbit","rat","ray", | 
					
						
						|  | "robin","seal","shark","sheep","shrew","skunk","slug","snail","snake", | 
					
						
						|  | "swan","toad","trout","turtle","vole","walrus","wasp","whale","wolf", | 
					
						
						|  | "worm","yak","zebra" | 
					
						
						|  | ] | 
					
						
						|  | NOUNS = [ | 
					
						
						|  | "rock","sand","star","tree","leaf","seed","stone","cloud","rain","snow", | 
					
						
						|  | "wind","fire","ash","dirt","mud","ice","wave","shell","dust","sun", | 
					
						
						|  | "moon","hill","lake","pond","reef","root","twig","wood" | 
					
						
						|  | ] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def readable_hash( | 
					
						
						|  | data: Union[str, bytes, Iterable[int]], | 
					
						
						|  | *, | 
					
						
						|  | salt: Union[str, bytes, None] = None, | 
					
						
						|  | words: tuple[list[str], list[str]] = (ADJECTIVES, ANIMALS + NOUNS), | 
					
						
						|  | sep: str = "-", | 
					
						
						|  | checksum_len: int = 2, | 
					
						
						|  | case: str = "lower", | 
					
						
						|  | ) -> str: | 
					
						
						|  | """ | 
					
						
						|  | Deterministically map input data to 'adjective-animal[-checksum]'. Generated using ChatGPT. | 
					
						
						|  |  | 
					
						
						|  | Examples | 
					
						
						|  | -------- | 
					
						
						|  | >>> readable_hash("hello world") | 
					
						
						|  | 'magical-panda-6h' | 
					
						
						|  |  | 
					
						
						|  | >>> readable_hash("hello world", salt="my-app-v1", checksum_len=3) | 
					
						
						|  | 'royal-otter-1pz' | 
					
						
						|  |  | 
					
						
						|  | >>> readable_hash(b"\x00\x01\x02\x03", case="title", checksum_len=0) | 
					
						
						|  | 'Fuzzy-Tiger' | 
					
						
						|  |  | 
					
						
						|  | Vocabulary | 
					
						
						|  | ---------- | 
					
						
						|  | ADJECTIVES: ~160 safe, descriptive words (e.g. "ancient", "brave", "silent", "swift") | 
					
						
						|  | ANIMALS: ~80 short, common animals (e.g. "dog", "owl", "whale", "tiger") | 
					
						
						|  | NOUNS: optional set of ~30 neutral nouns (e.g. "rock", "star", "tree", "cloud") | 
					
						
						|  |  | 
					
						
						|  | Combinations | 
					
						
						|  | ------------ | 
					
						
						|  | - adjective + animal: ~13,000 unique names | 
					
						
						|  | - adjective + noun: ~5,000 unique names | 
					
						
						|  | - adjective + animal + noun: ~390,000 unique names | 
					
						
						|  |  | 
					
						
						|  | Checksum | 
					
						
						|  | -------- | 
					
						
						|  | An optional short base-36 suffix (e.g. "-6h" or "-1pz"). The checksum | 
					
						
						|  | acts as a disambiguator in case two different inputs map to the same | 
					
						
						|  | word combination. With 2-3 characters, collisions become vanishingly rare. | 
					
						
						|  | If you only need fun, human-readable names, you can disable it by setting | 
					
						
						|  | ``checksum_len=0``. If you need unique, stable identifiers, keep it enabled. | 
					
						
						|  | """ | 
					
						
						|  | if isinstance(data, str): | 
					
						
						|  | data = data.encode() | 
					
						
						|  | elif isinstance(data, Iterable) and not isinstance(data, (bytes, bytearray)): | 
					
						
						|  | data = bytes(data) | 
					
						
						|  |  | 
					
						
						|  | h = hashlib.blake2b(digest_size=8) | 
					
						
						|  | if salt: | 
					
						
						|  | h.update(salt.encode() if isinstance(salt, str) else salt) | 
					
						
						|  | h.update(b"\x00") | 
					
						
						|  | h.update(data) | 
					
						
						|  | digest = h.digest() | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | n1 = int.from_bytes(digest[0:3], "big") | 
					
						
						|  | n2 = int.from_bytes(digest[3:6], "big") | 
					
						
						|  |  | 
					
						
						|  | adj = words[0][n1 % len(words[0])] | 
					
						
						|  | noun = words[1][n2 % len(words[1])] | 
					
						
						|  | phrase = f"{adj}{sep}{noun}" | 
					
						
						|  |  | 
					
						
						|  | if checksum_len > 0: | 
					
						
						|  |  | 
					
						
						|  | cs = int.from_bytes(digest[6:], "big") | 
					
						
						|  | base36 = "" | 
					
						
						|  | alphabet = "0123456789abcdefghijklmnopqrstuvwxyz" | 
					
						
						|  | while cs: | 
					
						
						|  | cs, r = divmod(cs, 36) | 
					
						
						|  | base36 = alphabet[r] + base36 | 
					
						
						|  | base36 = (base36 or "0")[:checksum_len] | 
					
						
						|  | phrase = f"{phrase}{sep}{base36}" | 
					
						
						|  |  | 
					
						
						|  | if case == "title": | 
					
						
						|  | phrase = sep.join(p.capitalize() for p in phrase.split(sep)) | 
					
						
						|  | elif case == "upper": | 
					
						
						|  | phrase = phrase.upper() | 
					
						
						|  |  | 
					
						
						|  | return phrase | 
					
						
						|  |  |