abdev-leaderboard

Running

App Files Files Community

loodvanniekerkginkgo commited on Aug 26

Commit

3e8741e

1 Parent(s): 0768e70

Using readable hashes

Browse files

Files changed (1) hide show

utils.py +118 -1

utils.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import pandas as pd
 from datasets import load_dataset
 import gradio as gr
 from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS
 pd.set_option('display.max_columns', None)
@@ -10,6 +11,10 @@ pd.set_option('display.max_columns', None)
 def show_output_box(message):
     return gr.update(value=message, visible=True)
 def fetch_hf_results():
     # For debugging
@@ -24,4 +29,116 @@ def fetch_hf_results():
     # Show latest submission only
     df = df.sort_values("submission_time", ascending=False).drop_duplicates(subset=["model", "assay", "user"], keep="first")
     df["property"] = df["assay"].map(ASSAY_RENAME)
     return df

 import pandas as pd
 from datasets import load_dataset
 import gradio as gr
+import hashlib
+from typing import Iterable, Union
 from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS
 pd.set_option('display.max_columns', None)
 def show_output_box(message):
     return gr.update(value=message, visible=True)
+def anonymize_user(username: str) -> str:
+    # Anonymize using a hash of the username
+    return hashlib.sha256(username.encode()).hexdigest()[:8]
 def fetch_hf_results():
     # For debugging
     # Show latest submission only
     df = df.sort_values("submission_time", ascending=False).drop_duplicates(subset=["model", "assay", "user"], keep="first")
     df["property"] = df["assay"].map(ASSAY_RENAME)
+    # Anonymize the user column at this point
+    df.loc[df["anonymous"] != False, "user"] = df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
     return df
+# Readable hashing function similar to coolname or codenamize
+ADJECTIVES = [
+    "ancient","brave","calm","clever","crimson","curious","dapper","eager",
+    "fuzzy","gentle","glowing","golden","happy","icy","jolly","lucky",
+    "magical","mellow","nimble","peachy","quick","royal","shiny","silent",
+    "sly","sparkly","spicy","spry","sturdy","sunny","swift","tiny","vivid",
+    "witty"
+]
+ANIMALS = [
+    "ant","bat","bear","bee","bison","boar","bug","cat","crab","crow",
+    "deer","dog","duck","eel","elk","fox","frog","goat","gull","hare",
+    "hawk","hen","horse","ibis","kid","kiwi","koala","lamb","lark","lemur",
+    "lion","llama","loon","lynx","mole","moose","mouse","newt","otter","owl",
+    "ox","panda","pig","prawn","puma","quail","quokka","rabbit","rat","ray",
+    "robin","seal","shark","sheep","shrew","skunk","slug","snail","snake",
+    "swan","toad","trout","turtle","vole","walrus","wasp","whale","wolf",
+    "worm","yak","zebra"
+]
+NOUNS = [
+    "rock","sand","star","tree","leaf","seed","stone","cloud","rain","snow",
+    "wind","fire","ash","dirt","mud","ice","wave","shell","dust","sun",
+    "moon","hill","lake","pond","reef","root","twig","wood"
+]
+def readable_hash(
+    data: Union[str, bytes, Iterable[int]],
+    *,
+    salt: Union[str, bytes, None] = None,
+    words: tuple[list[str], list[str]] = (ADJECTIVES, ANIMALS+NOUNS),
+    sep: str = "-",
+    checksum_len: int = 2,  # 0 to disable; 2–3 is plenty
+    case: str = "lower"     # "lower" | "title" | "upper"
+) -> str:
+    """
+    Deterministically map input data to 'adjective-animal[-checksum]'. Generated using ChatGPT.
+    Examples
+    --------
+    >>> readable_hash("hello world")
+    'magical-panda-6h'
+    >>> readable_hash("hello world", salt="my-app-v1", checksum_len=3)
+    'royal-otter-1pz'
+    >>> readable_hash(b"\x00\x01\x02\x03", case="title", checksum_len=0)
+    'Fuzzy-Tiger'
+    Vocabulary
+    ----------
+    ADJECTIVES: ~160 safe, descriptive words (e.g. "ancient", "brave", "silent", "swift")
+    ANIMALS: ~80 short, common animals (e.g. "dog", "owl", "whale", "tiger")
+    NOUNS: optional set of ~30 neutral nouns (e.g. "rock", "star", "tree", "cloud")
+    Combinations
+    ------------
+    - adjective + animal: ~13,000 unique names
+    - adjective + noun: ~5,000 unique names
+    - adjective + animal + noun: ~390,000 unique names
+    Checksum
+    --------
+    An optional short base-36 suffix (e.g. "-6h" or "-1pz"). The checksum
+    acts as a disambiguator in case two different inputs map to the same
+    word combination. With 2-3 characters, collisions become vanishingly rare.
+    If you only need fun, human-readable names, you can disable it by setting
+    ``checksum_len=0``. If you need unique, stable identifiers, keep it enabled.
+    """
+    if isinstance(data, str):
+        data = data.encode()
+    elif isinstance(data, Iterable) and not isinstance(data, (bytes, bytearray)):
+        data = bytes(data)
+    h = hashlib.blake2b(digest_size=8)  # fast, stable, short digest
+    if salt:
+        h.update(salt.encode() if isinstance(salt, str) else salt)
+        h.update(b"\x00")  # domain-separate salt from data
+    h.update(data)
+    digest = h.digest()
+    # Use the first 6 bytes to index words; last bytes for checksum
+    n1 = int.from_bytes(digest[0:3], "big")
+    n2 = int.from_bytes(digest[3:6], "big")
+    adj = words[0][n1 % len(words[0])]
+    noun = words[1][n2 % len(words[1])]
+    phrase = f"{adj}{sep}{noun}"
+    if checksum_len > 0:
+        # Short base36 checksum for collision visibility
+        cs = int.from_bytes(digest[6:], "big")
+        base36 = ""
+        alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
+        while cs:
+            cs, r = divmod(cs, 36)
+            base36 = alphabet[r] + base36
+        base36 = (base36 or "0")[:checksum_len]
+        phrase = f"{phrase}{sep}{base36}"
+    if case == "title":
+        phrase = sep.join(p.capitalize() for p in phrase.split(sep))
+    elif case == "upper":
+        phrase = phrase.upper()
+    return phrase