Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- __init__.py +138 -0
- __pycache__/__init__.cpython-310.pyc +0 -0
- __pycache__/context_vars.cpython-310.pyc +0 -0
- __pycache__/deploy.cpython-310.pyc +0 -0
- __pycache__/dummy_commit_scheduler.cpython-310.pyc +0 -0
- __pycache__/imports.cpython-310.pyc +0 -0
- __pycache__/run.cpython-310.pyc +0 -0
- __pycache__/sqlite_storage.cpython-310.pyc +0 -0
- __pycache__/ui.cpython-310.pyc +0 -0
- __pycache__/utils.cpython-310.pyc +0 -0
- cli.py +26 -0
- context_vars.py +19 -0
- deploy.py +138 -0
- dummy_commit_scheduler.py +12 -0
- imports.py +118 -0
- run.py +36 -0
- sqlite_storage.py +241 -0
- trackio_logo.png +3 -0
- ui.py +503 -0
- utils.py +278 -0
- version.txt +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
trackio_logo.png filter=lfs diff=lfs merge=lfs -text
|
__init__.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import webbrowser
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
from gradio_client import Client
|
6 |
+
|
7 |
+
from trackio import context_vars, deploy, utils
|
8 |
+
from trackio.imports import import_csv
|
9 |
+
from trackio.run import Run
|
10 |
+
from trackio.sqlite_storage import SQLiteStorage
|
11 |
+
from trackio.ui import demo
|
12 |
+
from trackio.utils import TRACKIO_DIR, TRACKIO_LOGO_PATH
|
13 |
+
|
14 |
+
__version__ = Path(__file__).parent.joinpath("version.txt").read_text().strip()
|
15 |
+
|
16 |
+
__all__ = ["init", "log", "finish", "show", "import_csv"]
|
17 |
+
|
18 |
+
|
19 |
+
config = {}
|
20 |
+
|
21 |
+
|
22 |
+
def init(
|
23 |
+
project: str,
|
24 |
+
name: str | None = None,
|
25 |
+
space_id: str | None = None,
|
26 |
+
dataset_id: str | None = None,
|
27 |
+
config: dict | None = None,
|
28 |
+
resume: str = "never",
|
29 |
+
) -> Run:
|
30 |
+
"""
|
31 |
+
Creates a new Trackio project and returns a Run object.
|
32 |
+
|
33 |
+
Args:
|
34 |
+
project: The name of the project (can be an existing project to continue tracking or a new project to start tracking from scratch).
|
35 |
+
name: The name of the run (if not provided, a default name will be generated).
|
36 |
+
space_id: If provided, the project will be logged to a Hugging Face Space instead of a local directory. Should be a complete Space name like "username/reponame" or "orgname/reponame", or just "reponame" in which case the Space will be created in the currently-logged-in Hugging Face user's namespace. If the Space does not exist, it will be created. If the Space already exists, the project will be logged to it.
|
37 |
+
dataset_id: If provided, a persistent Hugging Face Dataset will be created and the metrics will be synced to it every 5 minutes. Should be a complete Dataset name like "username/datasetname" or "orgname/datasetname", or just "datasetname" in which case the Dataset will be created in the currently-logged-in Hugging Face user's namespace. If the Dataset does not exist, it will be created. If the Dataset already exists, the project will be appended to it. If not provided, the metrics will be logged to a local SQLite database, unless a `space_id` is provided, in which case a Dataset will be automatically created with the same name as the Space but with the "_dataset" suffix.
|
38 |
+
config: A dictionary of configuration options. Provided for compatibility with wandb.init()
|
39 |
+
resume: Controls how to handle resuming a run. Can be one of:
|
40 |
+
- "must": Must resume the run with the given name, raises error if run doesn't exist
|
41 |
+
- "allow": Resume the run if it exists, otherwise create a new run
|
42 |
+
- "never": Never resume a run, always create a new one
|
43 |
+
"""
|
44 |
+
if not context_vars.current_server.get() and space_id is None:
|
45 |
+
_, url, _ = demo.launch(
|
46 |
+
show_api=False, inline=False, quiet=True, prevent_thread_lock=True
|
47 |
+
)
|
48 |
+
context_vars.current_server.set(url)
|
49 |
+
else:
|
50 |
+
url = context_vars.current_server.get()
|
51 |
+
|
52 |
+
space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
|
53 |
+
|
54 |
+
if (
|
55 |
+
context_vars.current_project.get() is None
|
56 |
+
or context_vars.current_project.get() != project
|
57 |
+
):
|
58 |
+
print(f"* Trackio project initialized: {project}")
|
59 |
+
|
60 |
+
if dataset_id is not None:
|
61 |
+
os.environ["TRACKIO_DATASET_ID"] = dataset_id
|
62 |
+
print(
|
63 |
+
f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}"
|
64 |
+
)
|
65 |
+
if space_id is None:
|
66 |
+
print(f"* Trackio metrics logged to: {TRACKIO_DIR}")
|
67 |
+
utils.print_dashboard_instructions(project)
|
68 |
+
else:
|
69 |
+
deploy.create_space_if_not_exists(space_id, dataset_id)
|
70 |
+
print(
|
71 |
+
f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
|
72 |
+
)
|
73 |
+
context_vars.current_project.set(project)
|
74 |
+
|
75 |
+
space_or_url = space_id if space_id else url
|
76 |
+
client = Client(space_or_url, verbose=False)
|
77 |
+
|
78 |
+
if resume == "must":
|
79 |
+
if name is None:
|
80 |
+
raise ValueError("Must provide a run name when resume='must'")
|
81 |
+
if name not in SQLiteStorage.get_runs(project):
|
82 |
+
raise ValueError(f"Run '{name}' does not exist in project '{project}'")
|
83 |
+
elif resume == "allow":
|
84 |
+
if name is not None and name in SQLiteStorage.get_runs(project):
|
85 |
+
print(f"* Resuming existing run: {name}")
|
86 |
+
elif resume == "never":
|
87 |
+
if name is not None and name in SQLiteStorage.get_runs(project):
|
88 |
+
name = None
|
89 |
+
else:
|
90 |
+
raise ValueError("resume must be one of: 'must', 'allow', or 'never'")
|
91 |
+
|
92 |
+
run = Run(project=project, client=client, name=name, config=config)
|
93 |
+
context_vars.current_run.set(run)
|
94 |
+
globals()["config"] = run.config
|
95 |
+
return run
|
96 |
+
|
97 |
+
|
98 |
+
def log(metrics: dict) -> None:
|
99 |
+
"""
|
100 |
+
Logs metrics to the current run.
|
101 |
+
|
102 |
+
Args:
|
103 |
+
metrics: A dictionary of metrics to log.
|
104 |
+
"""
|
105 |
+
if context_vars.current_run.get() is None:
|
106 |
+
raise RuntimeError("Call trackio.init() before log().")
|
107 |
+
context_vars.current_run.get().log(metrics)
|
108 |
+
|
109 |
+
|
110 |
+
def finish():
|
111 |
+
"""
|
112 |
+
Finishes the current run.
|
113 |
+
"""
|
114 |
+
if context_vars.current_run.get() is None:
|
115 |
+
raise RuntimeError("Call trackio.init() before finish().")
|
116 |
+
context_vars.current_run.get().finish()
|
117 |
+
|
118 |
+
|
119 |
+
def show(project: str | None = None):
|
120 |
+
"""
|
121 |
+
Launches the Trackio dashboard.
|
122 |
+
|
123 |
+
Args:
|
124 |
+
project: The name of the project whose runs to show. If not provided, all projects will be shown and the user can select one.
|
125 |
+
"""
|
126 |
+
_, url, share_url = demo.launch(
|
127 |
+
show_api=False,
|
128 |
+
quiet=True,
|
129 |
+
inline=False,
|
130 |
+
prevent_thread_lock=True,
|
131 |
+
favicon_path=TRACKIO_LOGO_PATH,
|
132 |
+
allowed_paths=[TRACKIO_LOGO_PATH],
|
133 |
+
)
|
134 |
+
base_url = share_url + "/" if share_url else url
|
135 |
+
dashboard_url = base_url + f"?project={project}" if project else base_url
|
136 |
+
print(f"* Trackio UI launched at: {dashboard_url}")
|
137 |
+
webbrowser.open(dashboard_url)
|
138 |
+
utils.block_except_in_notebook()
|
__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (5.4 kB). View file
|
|
__pycache__/context_vars.cpython-310.pyc
ADDED
Binary file (695 Bytes). View file
|
|
__pycache__/deploy.cpython-310.pyc
ADDED
Binary file (3.72 kB). View file
|
|
__pycache__/dummy_commit_scheduler.cpython-310.pyc
ADDED
Binary file (945 Bytes). View file
|
|
__pycache__/imports.cpython-310.pyc
ADDED
Binary file (4.17 kB). View file
|
|
__pycache__/run.cpython-310.pyc
ADDED
Binary file (1.3 kB). View file
|
|
__pycache__/sqlite_storage.cpython-310.pyc
ADDED
Binary file (7.35 kB). View file
|
|
__pycache__/ui.cpython-310.pyc
ADDED
Binary file (11.8 kB). View file
|
|
__pycache__/utils.cpython-310.pyc
ADDED
Binary file (4.28 kB). View file
|
|
cli.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
from trackio import show
|
4 |
+
|
5 |
+
|
6 |
+
def main():
|
7 |
+
parser = argparse.ArgumentParser(description="Trackio CLI")
|
8 |
+
subparsers = parser.add_subparsers(dest="command")
|
9 |
+
|
10 |
+
ui_parser = subparsers.add_parser(
|
11 |
+
"show", help="Show the Trackio dashboard UI for a project"
|
12 |
+
)
|
13 |
+
ui_parser.add_argument(
|
14 |
+
"--project", required=False, help="Project name to show in the dashboard"
|
15 |
+
)
|
16 |
+
|
17 |
+
args = parser.parse_args()
|
18 |
+
|
19 |
+
if args.command == "show":
|
20 |
+
show(args.project)
|
21 |
+
else:
|
22 |
+
parser.print_help()
|
23 |
+
|
24 |
+
|
25 |
+
if __name__ == "__main__":
|
26 |
+
main()
|
context_vars.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import contextvars
|
2 |
+
from typing import TYPE_CHECKING
|
3 |
+
|
4 |
+
if TYPE_CHECKING:
|
5 |
+
from trackio.run import Run
|
6 |
+
from trackio.sqlite_storage import CommitScheduler, DummyCommitScheduler
|
7 |
+
|
8 |
+
current_run: contextvars.ContextVar["Run | None"] = contextvars.ContextVar(
|
9 |
+
"current_run", default=None
|
10 |
+
)
|
11 |
+
current_project: contextvars.ContextVar[str | None] = contextvars.ContextVar(
|
12 |
+
"current_project", default=None
|
13 |
+
)
|
14 |
+
current_server: contextvars.ContextVar[str | None] = contextvars.ContextVar(
|
15 |
+
"current_server", default=None
|
16 |
+
)
|
17 |
+
current_scheduler: contextvars.ContextVar[
|
18 |
+
"CommitScheduler | DummyCommitScheduler | None"
|
19 |
+
] = contextvars.ContextVar("current_scheduler", default=None)
|
deploy.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import os
|
3 |
+
import time
|
4 |
+
from importlib.resources import files
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
import gradio
|
8 |
+
import huggingface_hub
|
9 |
+
from gradio_client import Client, handle_file
|
10 |
+
from httpx import ReadTimeout
|
11 |
+
from huggingface_hub.errors import RepositoryNotFoundError
|
12 |
+
|
13 |
+
from trackio.sqlite_storage import SQLiteStorage
|
14 |
+
|
15 |
+
SPACE_URL = "https://huggingface.co/spaces/{space_id}"
|
16 |
+
|
17 |
+
|
18 |
+
def deploy_as_space(
|
19 |
+
space_id: str,
|
20 |
+
dataset_id: str | None = None,
|
21 |
+
):
|
22 |
+
if (
|
23 |
+
os.getenv("SYSTEM") == "spaces"
|
24 |
+
): # in case a repo with this function is uploaded to spaces
|
25 |
+
return
|
26 |
+
|
27 |
+
trackio_path = files("trackio")
|
28 |
+
|
29 |
+
hf_api = huggingface_hub.HfApi()
|
30 |
+
whoami = None
|
31 |
+
login = False
|
32 |
+
try:
|
33 |
+
whoami = hf_api.whoami()
|
34 |
+
if whoami["auth"]["accessToken"]["role"] != "write":
|
35 |
+
login = True
|
36 |
+
except OSError:
|
37 |
+
login = True
|
38 |
+
if login:
|
39 |
+
print("Need 'write' access token to create a Spaces repo.")
|
40 |
+
huggingface_hub.login(add_to_git_credential=False)
|
41 |
+
whoami = hf_api.whoami()
|
42 |
+
|
43 |
+
huggingface_hub.create_repo(
|
44 |
+
space_id,
|
45 |
+
space_sdk="gradio",
|
46 |
+
repo_type="space",
|
47 |
+
exist_ok=True,
|
48 |
+
)
|
49 |
+
|
50 |
+
with open(Path(trackio_path, "README.md"), "r") as f:
|
51 |
+
readme_content = f.read()
|
52 |
+
readme_content = readme_content.replace("{GRADIO_VERSION}", gradio.__version__)
|
53 |
+
readme_buffer = io.BytesIO(readme_content.encode("utf-8"))
|
54 |
+
hf_api.upload_file(
|
55 |
+
path_or_fileobj=readme_buffer,
|
56 |
+
path_in_repo="README.md",
|
57 |
+
repo_id=space_id,
|
58 |
+
repo_type="space",
|
59 |
+
)
|
60 |
+
|
61 |
+
huggingface_hub.utils.disable_progress_bars()
|
62 |
+
hf_api.upload_folder(
|
63 |
+
repo_id=space_id,
|
64 |
+
repo_type="space",
|
65 |
+
folder_path=trackio_path,
|
66 |
+
ignore_patterns=["README.md"],
|
67 |
+
)
|
68 |
+
|
69 |
+
hf_token = huggingface_hub.utils.get_token()
|
70 |
+
if hf_token is not None:
|
71 |
+
huggingface_hub.add_space_secret(space_id, "HF_TOKEN", hf_token)
|
72 |
+
if dataset_id is not None:
|
73 |
+
huggingface_hub.add_space_variable(space_id, "TRACKIO_DATASET_ID", dataset_id)
|
74 |
+
|
75 |
+
|
76 |
+
def create_space_if_not_exists(
|
77 |
+
space_id: str,
|
78 |
+
dataset_id: str | None = None,
|
79 |
+
) -> None:
|
80 |
+
"""
|
81 |
+
Creates a new Hugging Face Space if it does not exist. If a dataset_id is provided, it will be added as a space variable.
|
82 |
+
|
83 |
+
Args:
|
84 |
+
space_id: The ID of the Space to create.
|
85 |
+
dataset_id: The ID of the Dataset to add to the Space.
|
86 |
+
"""
|
87 |
+
if "/" not in space_id:
|
88 |
+
raise ValueError(
|
89 |
+
f"Invalid space ID: {space_id}. Must be in the format: username/reponame or orgname/reponame."
|
90 |
+
)
|
91 |
+
if dataset_id is not None and "/" not in dataset_id:
|
92 |
+
raise ValueError(
|
93 |
+
f"Invalid dataset ID: {dataset_id}. Must be in the format: username/datasetname or orgname/datasetname."
|
94 |
+
)
|
95 |
+
try:
|
96 |
+
huggingface_hub.repo_info(space_id, repo_type="space")
|
97 |
+
print(f"* Found existing space: {SPACE_URL.format(space_id=space_id)}")
|
98 |
+
if dataset_id is not None:
|
99 |
+
huggingface_hub.add_space_variable(
|
100 |
+
space_id, "TRACKIO_DATASET_ID", dataset_id
|
101 |
+
)
|
102 |
+
return
|
103 |
+
except RepositoryNotFoundError:
|
104 |
+
pass
|
105 |
+
|
106 |
+
print(f"* Creating new space: {SPACE_URL.format(space_id=space_id)}")
|
107 |
+
deploy_as_space(space_id, dataset_id)
|
108 |
+
|
109 |
+
client = None
|
110 |
+
for _ in range(30):
|
111 |
+
try:
|
112 |
+
client = Client(space_id, verbose=False)
|
113 |
+
if client:
|
114 |
+
break
|
115 |
+
except ReadTimeout:
|
116 |
+
print("* Space is not yet ready. Waiting 5 seconds...")
|
117 |
+
time.sleep(5)
|
118 |
+
except ValueError as e:
|
119 |
+
print(f"* Space gave error {e}. Trying again in 5 seconds...")
|
120 |
+
time.sleep(5)
|
121 |
+
|
122 |
+
|
123 |
+
def upload_db_to_space(project: str, space_id: str) -> None:
|
124 |
+
"""
|
125 |
+
Uploads the database of a local Trackio project to a Hugging Face Space.
|
126 |
+
|
127 |
+
Args:
|
128 |
+
project: The name of the project to upload.
|
129 |
+
space_id: The ID of the Space to upload to.
|
130 |
+
"""
|
131 |
+
db_path = SQLiteStorage.get_project_db_path(project)
|
132 |
+
client = Client(space_id, verbose=False)
|
133 |
+
client.predict(
|
134 |
+
api_name="/upload_db_to_space",
|
135 |
+
project=project,
|
136 |
+
uploaded_db=handle_file(db_path),
|
137 |
+
hf_token=huggingface_hub.utils.get_token(),
|
138 |
+
)
|
dummy_commit_scheduler.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# A dummy object to fit the interface of huggingface_hub's CommitScheduler
|
2 |
+
class DummyCommitSchedulerLock:
|
3 |
+
def __enter__(self):
|
4 |
+
return None
|
5 |
+
|
6 |
+
def __exit__(self, exception_type, exception_value, exception_traceback):
|
7 |
+
pass
|
8 |
+
|
9 |
+
|
10 |
+
class DummyCommitScheduler:
|
11 |
+
def __init__(self):
|
12 |
+
self.lock = DummyCommitSchedulerLock()
|
imports.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
from trackio import deploy, utils
|
7 |
+
from trackio.sqlite_storage import SQLiteStorage
|
8 |
+
|
9 |
+
|
10 |
+
def import_csv(
|
11 |
+
csv_path: str,
|
12 |
+
project: str,
|
13 |
+
name: str | None = None,
|
14 |
+
space_id: str | None = None,
|
15 |
+
dataset_id: str | None = None,
|
16 |
+
) -> None:
|
17 |
+
"""
|
18 |
+
Imports a CSV file into a Trackio project. The CSV file must contain a "step" column, may optionally
|
19 |
+
contain a "timestamp" column, and any other columns will be treated as metrics. It should also include
|
20 |
+
a header row with the column names.
|
21 |
+
|
22 |
+
TODO: call init() and return a Run object so that the user can continue to log metrics to it.
|
23 |
+
|
24 |
+
Args:
|
25 |
+
csv_path: The str or Path to the CSV file to import.
|
26 |
+
project: The name of the project to import the CSV file into. Must not be an existing project.
|
27 |
+
name: The name of the Run to import the CSV file into. If not provided, a default name will be generated.
|
28 |
+
name: The name of the run (if not provided, a default name will be generated).
|
29 |
+
space_id: If provided, the project will be logged to a Hugging Face Space instead of a local directory. Should be a complete Space name like "username/reponame" or "orgname/reponame", or just "reponame" in which case the Space will be created in the currently-logged-in Hugging Face user's namespace. If the Space does not exist, it will be created. If the Space already exists, the project will be logged to it.
|
30 |
+
dataset_id: If provided, a persistent Hugging Face Dataset will be created and the metrics will be synced to it every 5 minutes. Should be a complete Dataset name like "username/datasetname" or "orgname/datasetname", or just "datasetname" in which case the Dataset will be created in the currently-logged-in Hugging Face user's namespace. If the Dataset does not exist, it will be created. If the Dataset already exists, the project will be appended to it. If not provided, the metrics will be logged to a local SQLite database, unless a `space_id` is provided, in which case a Dataset will be automatically created with the same name as the Space but with the "_dataset" suffix.
|
31 |
+
"""
|
32 |
+
if SQLiteStorage.get_runs(project):
|
33 |
+
raise ValueError(
|
34 |
+
f"Project '{project}' already exists. Cannot import CSV into existing project."
|
35 |
+
)
|
36 |
+
|
37 |
+
csv_path = Path(csv_path)
|
38 |
+
if not csv_path.exists():
|
39 |
+
raise FileNotFoundError(f"CSV file not found: {csv_path}")
|
40 |
+
|
41 |
+
df = pd.read_csv(csv_path)
|
42 |
+
if df.empty:
|
43 |
+
raise ValueError("CSV file is empty")
|
44 |
+
|
45 |
+
column_mapping = utils.simplify_column_names(df.columns.tolist())
|
46 |
+
df = df.rename(columns=column_mapping)
|
47 |
+
|
48 |
+
step_column = None
|
49 |
+
for col in df.columns:
|
50 |
+
if col.lower() == "step":
|
51 |
+
step_column = col
|
52 |
+
break
|
53 |
+
|
54 |
+
if step_column is None:
|
55 |
+
raise ValueError("CSV file must contain a 'step' or 'Step' column")
|
56 |
+
|
57 |
+
if name is None:
|
58 |
+
name = csv_path.stem
|
59 |
+
|
60 |
+
metrics_list = []
|
61 |
+
steps = []
|
62 |
+
timestamps = []
|
63 |
+
|
64 |
+
numeric_columns = []
|
65 |
+
for column in df.columns:
|
66 |
+
if column == step_column:
|
67 |
+
continue
|
68 |
+
if column == "timestamp":
|
69 |
+
continue
|
70 |
+
|
71 |
+
try:
|
72 |
+
pd.to_numeric(df[column], errors="raise")
|
73 |
+
numeric_columns.append(column)
|
74 |
+
except (ValueError, TypeError):
|
75 |
+
continue
|
76 |
+
|
77 |
+
for _, row in df.iterrows():
|
78 |
+
metrics = {}
|
79 |
+
for column in numeric_columns:
|
80 |
+
if pd.notna(row[column]):
|
81 |
+
metrics[column] = float(row[column])
|
82 |
+
|
83 |
+
if metrics:
|
84 |
+
metrics_list.append(metrics)
|
85 |
+
steps.append(int(row[step_column]))
|
86 |
+
|
87 |
+
if "timestamp" in df.columns and pd.notna(row["timestamp"]):
|
88 |
+
timestamps.append(str(row["timestamp"]))
|
89 |
+
else:
|
90 |
+
timestamps.append("")
|
91 |
+
|
92 |
+
if metrics_list:
|
93 |
+
SQLiteStorage.bulk_log(
|
94 |
+
project=project,
|
95 |
+
run=name,
|
96 |
+
metrics_list=metrics_list,
|
97 |
+
steps=steps,
|
98 |
+
timestamps=timestamps,
|
99 |
+
)
|
100 |
+
|
101 |
+
print(
|
102 |
+
f"* Imported {len(metrics_list)} rows from {csv_path} into project '{project}' as run '{name}'"
|
103 |
+
)
|
104 |
+
print(f"* Metrics found: {', '.join(metrics_list[0].keys())}")
|
105 |
+
|
106 |
+
space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
|
107 |
+
if dataset_id is not None:
|
108 |
+
os.environ["TRACKIO_DATASET_ID"] = dataset_id
|
109 |
+
print(f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}")
|
110 |
+
|
111 |
+
if space_id is None:
|
112 |
+
utils.print_dashboard_instructions(project)
|
113 |
+
else:
|
114 |
+
deploy.create_space_if_not_exists(space_id, dataset_id)
|
115 |
+
deploy.upload_db_to_space(project, space_id)
|
116 |
+
print(
|
117 |
+
f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
|
118 |
+
)
|
run.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import huggingface_hub
|
2 |
+
from gradio_client import Client
|
3 |
+
|
4 |
+
from trackio.utils import RESERVED_KEYS, generate_readable_name
|
5 |
+
|
6 |
+
|
7 |
+
class Run:
|
8 |
+
def __init__(
|
9 |
+
self,
|
10 |
+
project: str,
|
11 |
+
client: Client,
|
12 |
+
name: str | None = None,
|
13 |
+
config: dict | None = None,
|
14 |
+
):
|
15 |
+
self.project = project
|
16 |
+
self.client = client
|
17 |
+
self.name = name or generate_readable_name()
|
18 |
+
self.config = config or {}
|
19 |
+
|
20 |
+
def log(self, metrics: dict):
|
21 |
+
for k in metrics.keys():
|
22 |
+
if k in RESERVED_KEYS or k.startswith("__"):
|
23 |
+
raise ValueError(
|
24 |
+
f"Please do not use this reserved key as a metric: {k}"
|
25 |
+
)
|
26 |
+
self.client.predict(
|
27 |
+
api_name="/log",
|
28 |
+
project=self.project,
|
29 |
+
run=self.name,
|
30 |
+
metrics=metrics,
|
31 |
+
hf_token=huggingface_hub.utils.get_token(),
|
32 |
+
)
|
33 |
+
|
34 |
+
def finish(self):
|
35 |
+
"""Cleanup when run is finished."""
|
36 |
+
pass
|
sqlite_storage.py
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import glob
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
import sqlite3
|
5 |
+
from datetime import datetime
|
6 |
+
|
7 |
+
from huggingface_hub import CommitScheduler
|
8 |
+
|
9 |
+
try:
|
10 |
+
from trackio.context_vars import current_scheduler
|
11 |
+
from trackio.dummy_commit_scheduler import DummyCommitScheduler
|
12 |
+
from trackio.utils import TRACKIO_DIR
|
13 |
+
except: # noqa: E722
|
14 |
+
from context_vars import current_scheduler
|
15 |
+
from dummy_commit_scheduler import DummyCommitScheduler
|
16 |
+
from utils import TRACKIO_DIR
|
17 |
+
|
18 |
+
|
19 |
+
class SQLiteStorage:
|
20 |
+
@staticmethod
|
21 |
+
def get_project_db_path(project: str) -> str:
|
22 |
+
"""Get the database path for a specific project."""
|
23 |
+
safe_project_name = "".join(
|
24 |
+
c for c in project if c.isalnum() or c in ("-", "_")
|
25 |
+
).rstrip()
|
26 |
+
if not safe_project_name:
|
27 |
+
safe_project_name = "default"
|
28 |
+
return os.path.join(TRACKIO_DIR, f"{safe_project_name}.db")
|
29 |
+
|
30 |
+
@staticmethod
|
31 |
+
def init_db(project: str) -> str:
|
32 |
+
"""
|
33 |
+
Initialize the SQLite database with required tables.
|
34 |
+
Returns the database path.
|
35 |
+
"""
|
36 |
+
db_path = SQLiteStorage.get_project_db_path(project)
|
37 |
+
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
38 |
+
with SQLiteStorage.get_scheduler().lock:
|
39 |
+
with sqlite3.connect(db_path) as conn:
|
40 |
+
cursor = conn.cursor()
|
41 |
+
cursor.execute("""
|
42 |
+
CREATE TABLE IF NOT EXISTS metrics (
|
43 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
44 |
+
timestamp TEXT NOT NULL,
|
45 |
+
project_name TEXT NOT NULL,
|
46 |
+
run_name TEXT NOT NULL,
|
47 |
+
step INTEGER NOT NULL,
|
48 |
+
metrics TEXT NOT NULL
|
49 |
+
)
|
50 |
+
""")
|
51 |
+
conn.commit()
|
52 |
+
return db_path
|
53 |
+
|
54 |
+
@staticmethod
|
55 |
+
def get_scheduler():
|
56 |
+
"""
|
57 |
+
Get the scheduler for the database based on the environment variables.
|
58 |
+
This applies to both local and Spaces.
|
59 |
+
"""
|
60 |
+
if current_scheduler.get() is not None:
|
61 |
+
return current_scheduler.get()
|
62 |
+
hf_token = os.environ.get("HF_TOKEN")
|
63 |
+
dataset_id = os.environ.get("TRACKIO_DATASET_ID")
|
64 |
+
if dataset_id is None:
|
65 |
+
scheduler = DummyCommitScheduler()
|
66 |
+
else:
|
67 |
+
scheduler = CommitScheduler(
|
68 |
+
repo_id=dataset_id,
|
69 |
+
repo_type="dataset",
|
70 |
+
folder_path=TRACKIO_DIR,
|
71 |
+
private=True,
|
72 |
+
squash_history=True,
|
73 |
+
token=hf_token,
|
74 |
+
)
|
75 |
+
current_scheduler.set(scheduler)
|
76 |
+
return scheduler
|
77 |
+
|
78 |
+
@staticmethod
|
79 |
+
def log(project: str, run: str, metrics: dict):
|
80 |
+
"""
|
81 |
+
Safely log metrics to the database. Before logging, this method will ensure the database exists
|
82 |
+
and is set up with the correct tables. It also uses the scheduler to lock the database so
|
83 |
+
that there is no race condition when logging / syncing to the Hugging Face Dataset.
|
84 |
+
"""
|
85 |
+
db_path = SQLiteStorage.init_db(project)
|
86 |
+
|
87 |
+
with SQLiteStorage.get_scheduler().lock:
|
88 |
+
with sqlite3.connect(db_path) as conn:
|
89 |
+
cursor = conn.cursor()
|
90 |
+
|
91 |
+
cursor.execute(
|
92 |
+
"""
|
93 |
+
SELECT MAX(step)
|
94 |
+
FROM metrics
|
95 |
+
WHERE project_name = ? AND run_name = ?
|
96 |
+
""",
|
97 |
+
(project, run),
|
98 |
+
)
|
99 |
+
last_step = cursor.fetchone()[0]
|
100 |
+
current_step = 0 if last_step is None else last_step + 1
|
101 |
+
|
102 |
+
current_timestamp = datetime.now().isoformat()
|
103 |
+
|
104 |
+
cursor.execute(
|
105 |
+
"""
|
106 |
+
INSERT INTO metrics
|
107 |
+
(timestamp, project_name, run_name, step, metrics)
|
108 |
+
VALUES (?, ?, ?, ?, ?)
|
109 |
+
""",
|
110 |
+
(
|
111 |
+
current_timestamp,
|
112 |
+
project,
|
113 |
+
run,
|
114 |
+
current_step,
|
115 |
+
json.dumps(metrics),
|
116 |
+
),
|
117 |
+
)
|
118 |
+
conn.commit()
|
119 |
+
|
120 |
+
@staticmethod
|
121 |
+
def bulk_log(
|
122 |
+
project: str,
|
123 |
+
run: str,
|
124 |
+
metrics_list: list[dict],
|
125 |
+
steps: list[int] | None = None,
|
126 |
+
timestamps: list[str] | None = None,
|
127 |
+
):
|
128 |
+
"""Bulk log metrics to the database with specified steps and timestamps."""
|
129 |
+
if not metrics_list:
|
130 |
+
return
|
131 |
+
|
132 |
+
if steps is None:
|
133 |
+
steps = list(range(len(metrics_list)))
|
134 |
+
|
135 |
+
if timestamps is None:
|
136 |
+
timestamps = [datetime.now().isoformat()] * len(metrics_list)
|
137 |
+
|
138 |
+
if len(metrics_list) != len(steps) or len(metrics_list) != len(timestamps):
|
139 |
+
raise ValueError(
|
140 |
+
"metrics_list, steps, and timestamps must have the same length"
|
141 |
+
)
|
142 |
+
|
143 |
+
db_path = SQLiteStorage.init_db(project)
|
144 |
+
with SQLiteStorage.get_scheduler().lock:
|
145 |
+
with sqlite3.connect(db_path) as conn:
|
146 |
+
cursor = conn.cursor()
|
147 |
+
|
148 |
+
data = []
|
149 |
+
for i, metrics in enumerate(metrics_list):
|
150 |
+
data.append(
|
151 |
+
(
|
152 |
+
timestamps[i],
|
153 |
+
project,
|
154 |
+
run,
|
155 |
+
steps[i],
|
156 |
+
json.dumps(metrics),
|
157 |
+
)
|
158 |
+
)
|
159 |
+
|
160 |
+
cursor.executemany(
|
161 |
+
"""
|
162 |
+
INSERT INTO metrics
|
163 |
+
(timestamp, project_name, run_name, step, metrics)
|
164 |
+
VALUES (?, ?, ?, ?, ?)
|
165 |
+
""",
|
166 |
+
data,
|
167 |
+
)
|
168 |
+
conn.commit()
|
169 |
+
|
170 |
+
@staticmethod
|
171 |
+
def get_metrics(project: str, run: str) -> list[dict]:
|
172 |
+
"""Retrieve metrics for a specific run. The metrics also include the step count (int) and the timestamp (datetime object)."""
|
173 |
+
db_path = SQLiteStorage.get_project_db_path(project)
|
174 |
+
if not os.path.exists(db_path):
|
175 |
+
return []
|
176 |
+
|
177 |
+
with sqlite3.connect(db_path) as conn:
|
178 |
+
cursor = conn.cursor()
|
179 |
+
cursor.execute(
|
180 |
+
"""
|
181 |
+
SELECT timestamp, step, metrics
|
182 |
+
FROM metrics
|
183 |
+
WHERE project_name = ? AND run_name = ?
|
184 |
+
ORDER BY timestamp
|
185 |
+
""",
|
186 |
+
(project, run),
|
187 |
+
)
|
188 |
+
rows = cursor.fetchall()
|
189 |
+
|
190 |
+
results = []
|
191 |
+
for row in rows:
|
192 |
+
timestamp, step, metrics_json = row
|
193 |
+
metrics = json.loads(metrics_json)
|
194 |
+
metrics["timestamp"] = timestamp
|
195 |
+
metrics["step"] = step
|
196 |
+
results.append(metrics)
|
197 |
+
return results
|
198 |
+
|
199 |
+
@staticmethod
|
200 |
+
def get_projects() -> list[str]:
|
201 |
+
"""Get list of all projects by scanning database files."""
|
202 |
+
projects = []
|
203 |
+
if not os.path.exists(TRACKIO_DIR):
|
204 |
+
return projects
|
205 |
+
|
206 |
+
db_files = glob.glob(os.path.join(TRACKIO_DIR, "*.db"))
|
207 |
+
|
208 |
+
for db_file in db_files:
|
209 |
+
try:
|
210 |
+
with sqlite3.connect(db_file) as conn:
|
211 |
+
cursor = conn.cursor()
|
212 |
+
cursor.execute(
|
213 |
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='metrics'"
|
214 |
+
)
|
215 |
+
if cursor.fetchone():
|
216 |
+
cursor.execute("SELECT DISTINCT project_name FROM metrics")
|
217 |
+
project_names = [row[0] for row in cursor.fetchall()]
|
218 |
+
projects.extend(project_names)
|
219 |
+
except sqlite3.Error:
|
220 |
+
continue
|
221 |
+
|
222 |
+
return list(set(projects))
|
223 |
+
|
224 |
+
@staticmethod
|
225 |
+
def get_runs(project: str) -> list[str]:
|
226 |
+
"""Get list of all runs for a project."""
|
227 |
+
db_path = SQLiteStorage.get_project_db_path(project)
|
228 |
+
if not os.path.exists(db_path):
|
229 |
+
return []
|
230 |
+
|
231 |
+
with sqlite3.connect(db_path) as conn:
|
232 |
+
cursor = conn.cursor()
|
233 |
+
cursor.execute(
|
234 |
+
"SELECT DISTINCT run_name FROM metrics WHERE project_name = ?",
|
235 |
+
(project,),
|
236 |
+
)
|
237 |
+
return [row[0] for row in cursor.fetchall()]
|
238 |
+
|
239 |
+
def finish(self):
|
240 |
+
"""Cleanup when run is finished."""
|
241 |
+
pass
|
trackio_logo.png
ADDED
![]() |
Git LFS Details
|
ui.py
ADDED
@@ -0,0 +1,503 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
from typing import Any
|
4 |
+
|
5 |
+
import gradio as gr
|
6 |
+
import huggingface_hub as hf
|
7 |
+
import pandas as pd
|
8 |
+
|
9 |
+
HfApi = hf.HfApi()
|
10 |
+
|
11 |
+
try:
|
12 |
+
from trackio.sqlite_storage import SQLiteStorage
|
13 |
+
from trackio.utils import RESERVED_KEYS, TRACKIO_LOGO_PATH
|
14 |
+
except: # noqa: E722
|
15 |
+
from sqlite_storage import SQLiteStorage
|
16 |
+
from utils import RESERVED_KEYS, TRACKIO_LOGO_PATH
|
17 |
+
|
18 |
+
css = """
|
19 |
+
#run-cb .wrap {
|
20 |
+
gap: 2px;
|
21 |
+
}
|
22 |
+
#run-cb .wrap label {
|
23 |
+
line-height: 1;
|
24 |
+
padding: 6px;
|
25 |
+
}
|
26 |
+
"""
|
27 |
+
|
28 |
+
COLOR_PALETTE = [
|
29 |
+
"#3B82F6",
|
30 |
+
"#EF4444",
|
31 |
+
"#10B981",
|
32 |
+
"#F59E0B",
|
33 |
+
"#8B5CF6",
|
34 |
+
"#EC4899",
|
35 |
+
"#06B6D4",
|
36 |
+
"#84CC16",
|
37 |
+
"#F97316",
|
38 |
+
"#6366F1",
|
39 |
+
]
|
40 |
+
|
41 |
+
|
42 |
+
def get_color_mapping(runs: list[str], smoothing: bool) -> dict[str, str]:
|
43 |
+
"""Generate color mapping for runs, with transparency for original data when smoothing is enabled."""
|
44 |
+
color_map = {}
|
45 |
+
|
46 |
+
for i, run in enumerate(runs):
|
47 |
+
base_color = COLOR_PALETTE[i % len(COLOR_PALETTE)]
|
48 |
+
|
49 |
+
if smoothing:
|
50 |
+
color_map[f"{run}_smoothed"] = base_color
|
51 |
+
color_map[f"{run}_original"] = base_color + "4D"
|
52 |
+
else:
|
53 |
+
color_map[run] = base_color
|
54 |
+
|
55 |
+
return color_map
|
56 |
+
|
57 |
+
|
58 |
+
def get_projects(request: gr.Request):
|
59 |
+
dataset_id = os.environ.get("TRACKIO_DATASET_ID")
|
60 |
+
projects = SQLiteStorage.get_projects()
|
61 |
+
if project := request.query_params.get("project"):
|
62 |
+
interactive = False
|
63 |
+
else:
|
64 |
+
interactive = True
|
65 |
+
project = projects[0] if projects else None
|
66 |
+
return gr.Dropdown(
|
67 |
+
label="Project",
|
68 |
+
choices=projects,
|
69 |
+
value=project,
|
70 |
+
allow_custom_value=True,
|
71 |
+
interactive=interactive,
|
72 |
+
info=f"↻ Synced to <a href='https://huggingface.co/datasets/{dataset_id}' target='_blank'>{dataset_id}</a> every 5 min"
|
73 |
+
if dataset_id
|
74 |
+
else None,
|
75 |
+
)
|
76 |
+
|
77 |
+
|
78 |
+
def get_runs(project) -> list[str]:
|
79 |
+
if not project:
|
80 |
+
return []
|
81 |
+
return SQLiteStorage.get_runs(project)
|
82 |
+
|
83 |
+
|
84 |
+
def get_available_metrics(project: str, runs: list[str]) -> list[str]:
|
85 |
+
"""Get all available metrics across all runs for x-axis selection."""
|
86 |
+
if not project or not runs:
|
87 |
+
return ["step", "time"]
|
88 |
+
|
89 |
+
all_metrics = set()
|
90 |
+
for run in runs:
|
91 |
+
metrics = SQLiteStorage.get_metrics(project, run)
|
92 |
+
if metrics:
|
93 |
+
df = pd.DataFrame(metrics)
|
94 |
+
numeric_cols = df.select_dtypes(include="number").columns
|
95 |
+
numeric_cols = [c for c in numeric_cols if c not in RESERVED_KEYS]
|
96 |
+
all_metrics.update(numeric_cols)
|
97 |
+
|
98 |
+
# Always include step and time as options
|
99 |
+
all_metrics.add("step")
|
100 |
+
all_metrics.add("time")
|
101 |
+
|
102 |
+
# Sort metrics by prefix
|
103 |
+
sorted_metrics = sort_metrics_by_prefix(list(all_metrics))
|
104 |
+
|
105 |
+
# Put step and time at the beginning
|
106 |
+
result = ["step", "time"]
|
107 |
+
for metric in sorted_metrics:
|
108 |
+
if metric not in result:
|
109 |
+
result.append(metric)
|
110 |
+
|
111 |
+
return result
|
112 |
+
|
113 |
+
|
114 |
+
def load_run_data(project: str | None, run: str | None, smoothing: bool, x_axis: str):
|
115 |
+
if not project or not run:
|
116 |
+
return None
|
117 |
+
metrics = SQLiteStorage.get_metrics(project, run)
|
118 |
+
if not metrics:
|
119 |
+
return None
|
120 |
+
df = pd.DataFrame(metrics)
|
121 |
+
|
122 |
+
if "step" not in df.columns:
|
123 |
+
df["step"] = range(len(df))
|
124 |
+
|
125 |
+
if x_axis == "time" and "timestamp" in df.columns:
|
126 |
+
df["timestamp"] = pd.to_datetime(df["timestamp"])
|
127 |
+
first_timestamp = df["timestamp"].min()
|
128 |
+
df["time"] = (df["timestamp"] - first_timestamp).dt.total_seconds()
|
129 |
+
x_column = "time"
|
130 |
+
elif x_axis == "step":
|
131 |
+
x_column = "step"
|
132 |
+
else:
|
133 |
+
x_column = x_axis
|
134 |
+
|
135 |
+
if smoothing:
|
136 |
+
numeric_cols = df.select_dtypes(include="number").columns
|
137 |
+
numeric_cols = [c for c in numeric_cols if c not in RESERVED_KEYS]
|
138 |
+
|
139 |
+
df_original = df.copy()
|
140 |
+
df_original["run"] = f"{run}_original"
|
141 |
+
df_original["data_type"] = "original"
|
142 |
+
|
143 |
+
df_smoothed = df.copy()
|
144 |
+
window_size = max(3, min(10, len(df) // 10)) # Adaptive window size
|
145 |
+
df_smoothed[numeric_cols] = (
|
146 |
+
df_smoothed[numeric_cols]
|
147 |
+
.rolling(window=window_size, center=True, min_periods=1)
|
148 |
+
.mean()
|
149 |
+
)
|
150 |
+
df_smoothed["run"] = f"{run}_smoothed"
|
151 |
+
df_smoothed["data_type"] = "smoothed"
|
152 |
+
|
153 |
+
combined_df = pd.concat([df_original, df_smoothed], ignore_index=True)
|
154 |
+
combined_df["x_axis"] = x_column
|
155 |
+
return combined_df
|
156 |
+
else:
|
157 |
+
df["run"] = run
|
158 |
+
df["data_type"] = "original"
|
159 |
+
df["x_axis"] = x_column
|
160 |
+
return df
|
161 |
+
|
162 |
+
|
163 |
+
def update_runs(project, filter_text, user_interacted_with_runs=False):
|
164 |
+
if project is None:
|
165 |
+
runs = []
|
166 |
+
num_runs = 0
|
167 |
+
else:
|
168 |
+
runs = get_runs(project)
|
169 |
+
num_runs = len(runs)
|
170 |
+
if filter_text:
|
171 |
+
runs = [r for r in runs if filter_text in r]
|
172 |
+
if not user_interacted_with_runs:
|
173 |
+
return gr.CheckboxGroup(choices=runs, value=runs), gr.Textbox(
|
174 |
+
label=f"Runs ({num_runs})"
|
175 |
+
)
|
176 |
+
else:
|
177 |
+
return gr.CheckboxGroup(choices=runs), gr.Textbox(label=f"Runs ({num_runs})")
|
178 |
+
|
179 |
+
|
180 |
+
def filter_runs(project, filter_text):
|
181 |
+
runs = get_runs(project)
|
182 |
+
runs = [r for r in runs if filter_text in r]
|
183 |
+
return gr.CheckboxGroup(choices=runs, value=runs)
|
184 |
+
|
185 |
+
|
186 |
+
def update_x_axis_choices(project, runs):
|
187 |
+
"""Update x-axis dropdown choices based on available metrics."""
|
188 |
+
available_metrics = get_available_metrics(project, runs)
|
189 |
+
return gr.Dropdown(
|
190 |
+
label="X-axis",
|
191 |
+
choices=available_metrics,
|
192 |
+
value="step",
|
193 |
+
)
|
194 |
+
|
195 |
+
|
196 |
+
def toggle_timer(cb_value):
|
197 |
+
if cb_value:
|
198 |
+
return gr.Timer(active=True)
|
199 |
+
else:
|
200 |
+
return gr.Timer(active=False)
|
201 |
+
|
202 |
+
|
203 |
+
def check_auth(hf_token: str | None) -> None:
|
204 |
+
if os.getenv("SYSTEM") == "spaces": # if we are running in Spaces
|
205 |
+
# check auth token passed in
|
206 |
+
if hf_token is None:
|
207 |
+
raise PermissionError(
|
208 |
+
"Expected a HF_TOKEN to be provided when logging to a Space"
|
209 |
+
)
|
210 |
+
who = HfApi.whoami(hf_token)
|
211 |
+
access_token = who["auth"]["accessToken"]
|
212 |
+
owner_name = os.getenv("SPACE_AUTHOR_NAME")
|
213 |
+
repo_name = os.getenv("SPACE_REPO_NAME")
|
214 |
+
# make sure the token user is either the author of the space,
|
215 |
+
# or is a member of an org that is the author.
|
216 |
+
orgs = [o["name"] for o in who["orgs"]]
|
217 |
+
if owner_name != who["name"] and owner_name not in orgs:
|
218 |
+
raise PermissionError(
|
219 |
+
"Expected the provided hf_token to be the user owner of the space, or be a member of the org owner of the space"
|
220 |
+
)
|
221 |
+
# reject fine-grained tokens without specific repo access
|
222 |
+
if access_token["role"] == "fineGrained":
|
223 |
+
matched = False
|
224 |
+
for item in access_token["fineGrained"]["scoped"]:
|
225 |
+
if (
|
226 |
+
item["entity"]["type"] == "space"
|
227 |
+
and item["entity"]["name"] == f"{owner_name}/{repo_name}"
|
228 |
+
and "repo.write" in item["permissions"]
|
229 |
+
):
|
230 |
+
matched = True
|
231 |
+
break
|
232 |
+
if (
|
233 |
+
item["entity"]["type"] == "user"
|
234 |
+
and item["entity"]["name"] == owner_name
|
235 |
+
and "repo.write" in item["permissions"]
|
236 |
+
):
|
237 |
+
matched = True
|
238 |
+
break
|
239 |
+
if not matched:
|
240 |
+
raise PermissionError(
|
241 |
+
"Expected the provided hf_token with fine grained permissions to provide write access to the space"
|
242 |
+
)
|
243 |
+
# reject read-only tokens
|
244 |
+
elif access_token["role"] != "write":
|
245 |
+
raise PermissionError(
|
246 |
+
"Expected the provided hf_token to provide write permissions"
|
247 |
+
)
|
248 |
+
|
249 |
+
|
250 |
+
def upload_db_to_space(
|
251 |
+
project: str, uploaded_db: gr.FileData, hf_token: str | None
|
252 |
+
) -> None:
|
253 |
+
check_auth(hf_token)
|
254 |
+
db_project_path = SQLiteStorage.get_project_db_path(project)
|
255 |
+
if os.path.exists(db_project_path):
|
256 |
+
raise gr.Error(
|
257 |
+
f"Trackio database file already exists for project {project}, cannot overwrite."
|
258 |
+
)
|
259 |
+
os.makedirs(os.path.dirname(db_project_path), exist_ok=True)
|
260 |
+
shutil.copy(uploaded_db["path"], db_project_path)
|
261 |
+
|
262 |
+
|
263 |
+
def log(
|
264 |
+
project: str,
|
265 |
+
run: str,
|
266 |
+
metrics: dict[str, Any],
|
267 |
+
hf_token: str | None,
|
268 |
+
) -> None:
|
269 |
+
check_auth(hf_token)
|
270 |
+
SQLiteStorage.log(project=project, run=run, metrics=metrics)
|
271 |
+
|
272 |
+
|
273 |
+
def sort_metrics_by_prefix(metrics: list[str]) -> list[str]:
|
274 |
+
"""
|
275 |
+
Sort metrics by grouping prefixes together.
|
276 |
+
Metrics without prefixes come first, then grouped by prefix.
|
277 |
+
|
278 |
+
Example:
|
279 |
+
Input: ["train/loss", "loss", "train/acc", "val/loss"]
|
280 |
+
Output: ["loss", "train/acc", "train/loss", "val/loss"]
|
281 |
+
"""
|
282 |
+
no_prefix = []
|
283 |
+
with_prefix = []
|
284 |
+
|
285 |
+
for metric in metrics:
|
286 |
+
if "/" in metric:
|
287 |
+
with_prefix.append(metric)
|
288 |
+
else:
|
289 |
+
no_prefix.append(metric)
|
290 |
+
|
291 |
+
no_prefix.sort()
|
292 |
+
|
293 |
+
prefix_groups = {}
|
294 |
+
for metric in with_prefix:
|
295 |
+
prefix = metric.split("/")[0]
|
296 |
+
if prefix not in prefix_groups:
|
297 |
+
prefix_groups[prefix] = []
|
298 |
+
prefix_groups[prefix].append(metric)
|
299 |
+
|
300 |
+
sorted_with_prefix = []
|
301 |
+
for prefix in sorted(prefix_groups.keys()):
|
302 |
+
sorted_with_prefix.extend(sorted(prefix_groups[prefix]))
|
303 |
+
|
304 |
+
return no_prefix + sorted_with_prefix
|
305 |
+
|
306 |
+
|
307 |
+
def configure(request: gr.Request):
|
308 |
+
sidebar_param = request.query_params.get("sidebar")
|
309 |
+
match sidebar_param:
|
310 |
+
case "collapsed":
|
311 |
+
sidebar = gr.Sidebar(open=False, visible=True)
|
312 |
+
case "hidden":
|
313 |
+
sidebar = gr.Sidebar(visible=False)
|
314 |
+
case _:
|
315 |
+
sidebar = gr.Sidebar(visible=True)
|
316 |
+
|
317 |
+
if metrics := request.query_params.get("metrics"):
|
318 |
+
return metrics.split(","), sidebar
|
319 |
+
else:
|
320 |
+
return [], sidebar
|
321 |
+
|
322 |
+
|
323 |
+
with gr.Blocks(theme="citrus", title="Trackio Dashboard", css=css) as demo:
|
324 |
+
with gr.Sidebar(visible=False) as sidebar:
|
325 |
+
gr.Markdown(
|
326 |
+
f"<div style='display: flex; align-items: center; gap: 8px;'><img src='/gradio_api/file={TRACKIO_LOGO_PATH}' width='32' height='32'><span style='font-size: 2em; font-weight: bold;'>Trackio</span></div>"
|
327 |
+
)
|
328 |
+
project_dd = gr.Dropdown(label="Project", allow_custom_value=True)
|
329 |
+
run_tb = gr.Textbox(label="Runs", placeholder="Type to filter...")
|
330 |
+
run_cb = gr.CheckboxGroup(
|
331 |
+
label="Runs", choices=[], interactive=True, elem_id="run-cb"
|
332 |
+
)
|
333 |
+
gr.HTML("<hr>")
|
334 |
+
realtime_cb = gr.Checkbox(label="Refresh metrics realtime", value=True)
|
335 |
+
smoothing_cb = gr.Checkbox(label="Smooth metrics", value=True)
|
336 |
+
x_axis_dd = gr.Dropdown(
|
337 |
+
label="X-axis",
|
338 |
+
choices=["step", "time"],
|
339 |
+
value="step",
|
340 |
+
)
|
341 |
+
|
342 |
+
timer = gr.Timer(value=1)
|
343 |
+
metrics_subset = gr.State([])
|
344 |
+
user_interacted_with_run_cb = gr.State(False)
|
345 |
+
|
346 |
+
gr.on([demo.load], fn=configure, outputs=[metrics_subset, sidebar])
|
347 |
+
gr.on(
|
348 |
+
[demo.load],
|
349 |
+
fn=get_projects,
|
350 |
+
outputs=project_dd,
|
351 |
+
show_progress="hidden",
|
352 |
+
)
|
353 |
+
gr.on(
|
354 |
+
[timer.tick],
|
355 |
+
fn=update_runs,
|
356 |
+
inputs=[project_dd, run_tb, user_interacted_with_run_cb],
|
357 |
+
outputs=[run_cb, run_tb],
|
358 |
+
show_progress="hidden",
|
359 |
+
)
|
360 |
+
gr.on(
|
361 |
+
[demo.load, project_dd.change],
|
362 |
+
fn=update_runs,
|
363 |
+
inputs=[project_dd, run_tb],
|
364 |
+
outputs=[run_cb, run_tb],
|
365 |
+
show_progress="hidden",
|
366 |
+
)
|
367 |
+
gr.on(
|
368 |
+
[demo.load, project_dd.change, run_cb.change],
|
369 |
+
fn=update_x_axis_choices,
|
370 |
+
inputs=[project_dd, run_cb],
|
371 |
+
outputs=x_axis_dd,
|
372 |
+
show_progress="hidden",
|
373 |
+
)
|
374 |
+
|
375 |
+
realtime_cb.change(
|
376 |
+
fn=toggle_timer,
|
377 |
+
inputs=realtime_cb,
|
378 |
+
outputs=timer,
|
379 |
+
api_name="toggle_timer",
|
380 |
+
)
|
381 |
+
run_cb.input(
|
382 |
+
fn=lambda: True,
|
383 |
+
outputs=user_interacted_with_run_cb,
|
384 |
+
)
|
385 |
+
run_tb.input(
|
386 |
+
fn=filter_runs,
|
387 |
+
inputs=[project_dd, run_tb],
|
388 |
+
outputs=run_cb,
|
389 |
+
)
|
390 |
+
|
391 |
+
gr.api(
|
392 |
+
fn=upload_db_to_space,
|
393 |
+
api_name="upload_db_to_space",
|
394 |
+
)
|
395 |
+
gr.api(
|
396 |
+
fn=log,
|
397 |
+
api_name="log",
|
398 |
+
)
|
399 |
+
|
400 |
+
x_lim = gr.State(None)
|
401 |
+
last_steps = gr.State({})
|
402 |
+
|
403 |
+
def update_x_lim(select_data: gr.SelectData):
|
404 |
+
return select_data.index
|
405 |
+
|
406 |
+
def update_last_steps(project, runs):
|
407 |
+
"""Update the last step from all runs to detect when new data is available."""
|
408 |
+
if not project or not runs:
|
409 |
+
return {}
|
410 |
+
|
411 |
+
last_steps = {}
|
412 |
+
for run in runs:
|
413 |
+
metrics = SQLiteStorage.get_metrics(project, run)
|
414 |
+
if metrics:
|
415 |
+
df = pd.DataFrame(metrics)
|
416 |
+
if "step" not in df.columns:
|
417 |
+
df["step"] = range(len(df))
|
418 |
+
if not df.empty:
|
419 |
+
last_steps[run] = df["step"].max().item()
|
420 |
+
else:
|
421 |
+
last_steps[run] = 0
|
422 |
+
else:
|
423 |
+
last_steps[run] = 0
|
424 |
+
|
425 |
+
return last_steps
|
426 |
+
|
427 |
+
timer.tick(
|
428 |
+
fn=update_last_steps,
|
429 |
+
inputs=[project_dd, run_cb],
|
430 |
+
outputs=last_steps,
|
431 |
+
show_progress="hidden",
|
432 |
+
)
|
433 |
+
|
434 |
+
@gr.render(
|
435 |
+
triggers=[
|
436 |
+
demo.load,
|
437 |
+
run_cb.change,
|
438 |
+
last_steps.change,
|
439 |
+
smoothing_cb.change,
|
440 |
+
x_lim.change,
|
441 |
+
x_axis_dd.change,
|
442 |
+
],
|
443 |
+
inputs=[project_dd, run_cb, smoothing_cb, metrics_subset, x_lim, x_axis_dd],
|
444 |
+
show_progress="hidden",
|
445 |
+
)
|
446 |
+
def update_dashboard(project, runs, smoothing, metrics_subset, x_lim_value, x_axis):
|
447 |
+
dfs = []
|
448 |
+
original_runs = runs.copy()
|
449 |
+
|
450 |
+
for run in runs:
|
451 |
+
df = load_run_data(project, run, smoothing, x_axis)
|
452 |
+
if df is not None:
|
453 |
+
dfs.append(df)
|
454 |
+
|
455 |
+
if dfs:
|
456 |
+
master_df = pd.concat(dfs, ignore_index=True)
|
457 |
+
else:
|
458 |
+
master_df = pd.DataFrame()
|
459 |
+
|
460 |
+
if master_df.empty:
|
461 |
+
return
|
462 |
+
|
463 |
+
x_column = "step"
|
464 |
+
if dfs and not dfs[0].empty and "x_axis" in dfs[0].columns:
|
465 |
+
x_column = dfs[0]["x_axis"].iloc[0]
|
466 |
+
|
467 |
+
numeric_cols = master_df.select_dtypes(include="number").columns
|
468 |
+
numeric_cols = [c for c in numeric_cols if c not in RESERVED_KEYS]
|
469 |
+
if metrics_subset:
|
470 |
+
numeric_cols = [c for c in numeric_cols if c in metrics_subset]
|
471 |
+
|
472 |
+
numeric_cols = sort_metrics_by_prefix(list(numeric_cols))
|
473 |
+
color_map = get_color_mapping(original_runs, smoothing)
|
474 |
+
|
475 |
+
with gr.Row(key="row"):
|
476 |
+
for metric_idx, metric_name in enumerate(numeric_cols):
|
477 |
+
metric_df = master_df.dropna(subset=[metric_name])
|
478 |
+
if not metric_df.empty:
|
479 |
+
plot = gr.LinePlot(
|
480 |
+
metric_df,
|
481 |
+
x=x_column,
|
482 |
+
y=metric_name,
|
483 |
+
color="run" if "run" in metric_df.columns else None,
|
484 |
+
color_map=color_map,
|
485 |
+
title=metric_name,
|
486 |
+
key=f"plot-{metric_idx}",
|
487 |
+
preserved_by_key=None,
|
488 |
+
x_lim=x_lim_value,
|
489 |
+
y_lim=[
|
490 |
+
metric_df[metric_name].min(),
|
491 |
+
metric_df[metric_name].max(),
|
492 |
+
],
|
493 |
+
show_fullscreen_button=True,
|
494 |
+
min_width=400,
|
495 |
+
)
|
496 |
+
plot.select(update_x_lim, outputs=x_lim, key=f"select-{metric_idx}")
|
497 |
+
plot.double_click(
|
498 |
+
lambda: None, outputs=x_lim, key=f"double-{metric_idx}"
|
499 |
+
)
|
500 |
+
|
501 |
+
|
502 |
+
if __name__ == "__main__":
|
503 |
+
demo.launch(allowed_paths=[TRACKIO_LOGO_PATH], show_api=False, show_error=True)
|
utils.py
ADDED
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import random
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
import time
|
6 |
+
from pathlib import Path
|
7 |
+
|
8 |
+
import huggingface_hub
|
9 |
+
from huggingface_hub.constants import HF_HOME
|
10 |
+
|
11 |
+
RESERVED_KEYS = ["project", "run", "timestamp", "step", "time"]
|
12 |
+
TRACKIO_DIR = os.path.join(HF_HOME, "trackio")
|
13 |
+
|
14 |
+
TRACKIO_LOGO_PATH = str(Path(__file__).parent.joinpath("trackio_logo.png"))
|
15 |
+
|
16 |
+
|
17 |
+
def generate_readable_name():
|
18 |
+
"""
|
19 |
+
Generates a random, readable name like "dainty-sunset-1"
|
20 |
+
"""
|
21 |
+
adjectives = [
|
22 |
+
"dainty",
|
23 |
+
"brave",
|
24 |
+
"calm",
|
25 |
+
"eager",
|
26 |
+
"fancy",
|
27 |
+
"gentle",
|
28 |
+
"happy",
|
29 |
+
"jolly",
|
30 |
+
"kind",
|
31 |
+
"lively",
|
32 |
+
"merry",
|
33 |
+
"nice",
|
34 |
+
"proud",
|
35 |
+
"quick",
|
36 |
+
"silly",
|
37 |
+
"tidy",
|
38 |
+
"witty",
|
39 |
+
"zealous",
|
40 |
+
"bright",
|
41 |
+
"shy",
|
42 |
+
"bold",
|
43 |
+
"clever",
|
44 |
+
"daring",
|
45 |
+
"elegant",
|
46 |
+
"faithful",
|
47 |
+
"graceful",
|
48 |
+
"honest",
|
49 |
+
"inventive",
|
50 |
+
"jovial",
|
51 |
+
"keen",
|
52 |
+
"lucky",
|
53 |
+
"modest",
|
54 |
+
"noble",
|
55 |
+
"optimistic",
|
56 |
+
"patient",
|
57 |
+
"quirky",
|
58 |
+
"resourceful",
|
59 |
+
"sincere",
|
60 |
+
"thoughtful",
|
61 |
+
"upbeat",
|
62 |
+
"valiant",
|
63 |
+
"warm",
|
64 |
+
"youthful",
|
65 |
+
"zesty",
|
66 |
+
"adventurous",
|
67 |
+
"breezy",
|
68 |
+
"cheerful",
|
69 |
+
"delightful",
|
70 |
+
"energetic",
|
71 |
+
"fearless",
|
72 |
+
"glad",
|
73 |
+
"hopeful",
|
74 |
+
"imaginative",
|
75 |
+
"joyful",
|
76 |
+
"kindly",
|
77 |
+
"luminous",
|
78 |
+
"mysterious",
|
79 |
+
"neat",
|
80 |
+
"outgoing",
|
81 |
+
"playful",
|
82 |
+
"radiant",
|
83 |
+
"spirited",
|
84 |
+
"tranquil",
|
85 |
+
"unique",
|
86 |
+
"vivid",
|
87 |
+
"wise",
|
88 |
+
"zany",
|
89 |
+
"artful",
|
90 |
+
"bubbly",
|
91 |
+
"charming",
|
92 |
+
"dazzling",
|
93 |
+
"earnest",
|
94 |
+
"festive",
|
95 |
+
"gentlemanly",
|
96 |
+
"hearty",
|
97 |
+
"intrepid",
|
98 |
+
"jubilant",
|
99 |
+
"knightly",
|
100 |
+
"lively",
|
101 |
+
"magnetic",
|
102 |
+
"nimble",
|
103 |
+
"orderly",
|
104 |
+
"peaceful",
|
105 |
+
"quick-witted",
|
106 |
+
"robust",
|
107 |
+
"sturdy",
|
108 |
+
"trusty",
|
109 |
+
"upstanding",
|
110 |
+
"vibrant",
|
111 |
+
"whimsical",
|
112 |
+
]
|
113 |
+
nouns = [
|
114 |
+
"sunset",
|
115 |
+
"forest",
|
116 |
+
"river",
|
117 |
+
"mountain",
|
118 |
+
"breeze",
|
119 |
+
"meadow",
|
120 |
+
"ocean",
|
121 |
+
"valley",
|
122 |
+
"sky",
|
123 |
+
"field",
|
124 |
+
"cloud",
|
125 |
+
"star",
|
126 |
+
"rain",
|
127 |
+
"leaf",
|
128 |
+
"stone",
|
129 |
+
"flower",
|
130 |
+
"bird",
|
131 |
+
"tree",
|
132 |
+
"wave",
|
133 |
+
"trail",
|
134 |
+
"island",
|
135 |
+
"desert",
|
136 |
+
"hill",
|
137 |
+
"lake",
|
138 |
+
"pond",
|
139 |
+
"grove",
|
140 |
+
"canyon",
|
141 |
+
"reef",
|
142 |
+
"bay",
|
143 |
+
"peak",
|
144 |
+
"glade",
|
145 |
+
"marsh",
|
146 |
+
"cliff",
|
147 |
+
"dune",
|
148 |
+
"spring",
|
149 |
+
"brook",
|
150 |
+
"cave",
|
151 |
+
"plain",
|
152 |
+
"ridge",
|
153 |
+
"wood",
|
154 |
+
"blossom",
|
155 |
+
"petal",
|
156 |
+
"root",
|
157 |
+
"branch",
|
158 |
+
"seed",
|
159 |
+
"acorn",
|
160 |
+
"pine",
|
161 |
+
"willow",
|
162 |
+
"cedar",
|
163 |
+
"elm",
|
164 |
+
"falcon",
|
165 |
+
"eagle",
|
166 |
+
"sparrow",
|
167 |
+
"robin",
|
168 |
+
"owl",
|
169 |
+
"finch",
|
170 |
+
"heron",
|
171 |
+
"crane",
|
172 |
+
"duck",
|
173 |
+
"swan",
|
174 |
+
"fox",
|
175 |
+
"wolf",
|
176 |
+
"bear",
|
177 |
+
"deer",
|
178 |
+
"moose",
|
179 |
+
"otter",
|
180 |
+
"beaver",
|
181 |
+
"lynx",
|
182 |
+
"hare",
|
183 |
+
"badger",
|
184 |
+
"butterfly",
|
185 |
+
"bee",
|
186 |
+
"ant",
|
187 |
+
"beetle",
|
188 |
+
"dragonfly",
|
189 |
+
"firefly",
|
190 |
+
"ladybug",
|
191 |
+
"moth",
|
192 |
+
"spider",
|
193 |
+
"worm",
|
194 |
+
"coral",
|
195 |
+
"kelp",
|
196 |
+
"shell",
|
197 |
+
"pebble",
|
198 |
+
"boulder",
|
199 |
+
"cobble",
|
200 |
+
"sand",
|
201 |
+
"wavelet",
|
202 |
+
"tide",
|
203 |
+
"current",
|
204 |
+
]
|
205 |
+
adjective = random.choice(adjectives)
|
206 |
+
noun = random.choice(nouns)
|
207 |
+
number = random.randint(1, 99)
|
208 |
+
return f"{adjective}-{noun}-{number}"
|
209 |
+
|
210 |
+
|
211 |
+
def block_except_in_notebook():
|
212 |
+
in_notebook = bool(getattr(sys, "ps1", sys.flags.interactive))
|
213 |
+
if in_notebook:
|
214 |
+
return
|
215 |
+
try:
|
216 |
+
while True:
|
217 |
+
time.sleep(0.1)
|
218 |
+
except (KeyboardInterrupt, OSError):
|
219 |
+
print("Keyboard interruption in main thread... closing dashboard.")
|
220 |
+
|
221 |
+
|
222 |
+
def simplify_column_names(columns: list[str]) -> dict[str, str]:
|
223 |
+
"""
|
224 |
+
Simplifies column names to first 10 alphanumeric or "/" characters with unique suffixes.
|
225 |
+
|
226 |
+
Args:
|
227 |
+
columns: List of original column names
|
228 |
+
|
229 |
+
Returns:
|
230 |
+
Dictionary mapping original column names to simplified names
|
231 |
+
"""
|
232 |
+
simplified_names = {}
|
233 |
+
used_names = set()
|
234 |
+
|
235 |
+
for col in columns:
|
236 |
+
alphanumeric = re.sub(r"[^a-zA-Z0-9/]", "", col)
|
237 |
+
base_name = alphanumeric[:10] if alphanumeric else f"col_{len(used_names)}"
|
238 |
+
|
239 |
+
final_name = base_name
|
240 |
+
suffix = 1
|
241 |
+
while final_name in used_names:
|
242 |
+
final_name = f"{base_name}_{suffix}"
|
243 |
+
suffix += 1
|
244 |
+
|
245 |
+
simplified_names[col] = final_name
|
246 |
+
used_names.add(final_name)
|
247 |
+
|
248 |
+
return simplified_names
|
249 |
+
|
250 |
+
|
251 |
+
def print_dashboard_instructions(project: str) -> None:
|
252 |
+
"""
|
253 |
+
Prints instructions for viewing the Trackio dashboard.
|
254 |
+
|
255 |
+
Args:
|
256 |
+
project: The name of the project to show dashboard for.
|
257 |
+
"""
|
258 |
+
YELLOW = "\033[93m"
|
259 |
+
BOLD = "\033[1m"
|
260 |
+
RESET = "\033[0m"
|
261 |
+
|
262 |
+
print("* View dashboard by running in your terminal:")
|
263 |
+
print(f'{BOLD}{YELLOW}trackio show --project "{project}"{RESET}')
|
264 |
+
print(f'* or by running in Python: trackio.show(project="{project}")')
|
265 |
+
|
266 |
+
|
267 |
+
def preprocess_space_and_dataset_ids(
|
268 |
+
space_id: str | None, dataset_id: str | None
|
269 |
+
) -> tuple[str | None, str | None]:
|
270 |
+
if space_id is not None and "/" not in space_id:
|
271 |
+
username = huggingface_hub.whoami()["name"]
|
272 |
+
space_id = f"{username}/{space_id}"
|
273 |
+
if dataset_id is not None and "/" not in dataset_id:
|
274 |
+
username = huggingface_hub.whoami()["name"]
|
275 |
+
dataset_id = f"{username}/{dataset_id}"
|
276 |
+
if space_id is not None and dataset_id is None:
|
277 |
+
dataset_id = f"{space_id}_dataset"
|
278 |
+
return space_id, dataset_id
|
version.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.0.17
|