Spaces:
Running
on
Zero
Running
on
Zero
setup cache
Browse files- app.py +44 -11
- build_cache.py +107 -0
- modular_graph_and_candidates.py +2 -33
app.py
CHANGED
@@ -9,12 +9,47 @@ import tempfile
|
|
9 |
from datetime import datetime, timedelta
|
10 |
from functools import lru_cache
|
11 |
from pathlib import Path
|
12 |
-
|
|
|
|
|
13 |
import gradio as gr
|
14 |
|
15 |
# ββ refactored helpers ββ
|
16 |
from modular_graph_and_candidates import build_graph_json, generate_html, build_timeline_json, generate_timeline_html
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
HF_MAIN_REPO = "https://github.com/huggingface/transformers"
|
19 |
|
20 |
# βββββββββββββββββββββββββββββ cache repo once per 24β―h βββββββββββββββββββββββββββ
|
@@ -40,19 +75,13 @@ def clone_or_cache(repo_url: str) -> Path:
|
|
40 |
|
41 |
# βββββββββββββββββββββββββββββ main callback βββββββββββββββββββββββββββββββββββββ
|
42 |
|
43 |
-
def _escape_srcdoc(text: str) -> str:
|
44 |
-
"""Escape for inclusion inside an <iframe srcdoc="β¦"> attribute."""
|
45 |
-
return (
|
46 |
-
text.replace("&", "&")
|
47 |
-
.replace("\"", """)
|
48 |
-
.replace("'", "'")
|
49 |
-
.replace("<", "<")
|
50 |
-
.replace(">", ">")
|
51 |
-
)
|
52 |
-
|
53 |
|
54 |
def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
|
55 |
"""Generate the dependency graph visualization."""
|
|
|
|
|
|
|
|
|
56 |
repo_path = clone_or_cache(repo_url)
|
57 |
|
58 |
graph = build_graph_json(
|
@@ -75,6 +104,10 @@ def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str
|
|
75 |
|
76 |
def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
|
77 |
"""Generate the chronological timeline visualization."""
|
|
|
|
|
|
|
|
|
78 |
repo_path = clone_or_cache(repo_url)
|
79 |
|
80 |
timeline = build_timeline_json(
|
|
|
9 |
from datetime import datetime, timedelta
|
10 |
from functools import lru_cache
|
11 |
from pathlib import Path
|
12 |
+
import os, json, tempfile
|
13 |
+
from pathlib import Path
|
14 |
+
from huggingface_hub import hf_hub_download
|
15 |
import gradio as gr
|
16 |
|
17 |
# ββ refactored helpers ββ
|
18 |
from modular_graph_and_candidates import build_graph_json, generate_html, build_timeline_json, generate_timeline_html
|
19 |
|
20 |
+
def _escape_srcdoc(text: str) -> str:
|
21 |
+
"""Escape for inclusion inside an <iframe srcdoc="β¦"> attribute."""
|
22 |
+
return (
|
23 |
+
text.replace("&", "&")
|
24 |
+
.replace("\"", """)
|
25 |
+
.replace("'", "'")
|
26 |
+
.replace("<", "<")
|
27 |
+
.replace(">", ">")
|
28 |
+
)
|
29 |
+
|
30 |
+
def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool):
|
31 |
+
"""Fetch cached data from Molbap/hf_cached_embeds_log repo."""
|
32 |
+
|
33 |
+
repo_id = "Molbap/hf_cached_embeds_log"
|
34 |
+
try:
|
35 |
+
latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json")
|
36 |
+
info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
|
37 |
+
sha = info.get("sha")
|
38 |
+
key = f"{sha}/{sim_method}-{threshold:.2f}-m{int(multimodal)}"
|
39 |
+
|
40 |
+
html_fp = hf_hub_download(repo_id=repo_id, filename=f"{kind}/{key}.html")
|
41 |
+
json_fp = hf_hub_download(repo_id=repo_id, filename=f"{kind}/{key}.json")
|
42 |
+
|
43 |
+
raw_html = Path(html_fp).read_text(encoding="utf-8")
|
44 |
+
json_text = Path(json_fp).read_text(encoding="utf-8")
|
45 |
+
|
46 |
+
iframe_html = f'<iframe style="width:100%;height:85vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
|
47 |
+
tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1])
|
48 |
+
tmp.write_text(json_text, encoding="utf-8")
|
49 |
+
return iframe_html, str(tmp)
|
50 |
+
except Exception:
|
51 |
+
return None
|
52 |
+
|
53 |
HF_MAIN_REPO = "https://github.com/huggingface/transformers"
|
54 |
|
55 |
# βββββββββββββββββββββββββββββ cache repo once per 24β―h βββββββββββββββββββββββββββ
|
|
|
75 |
|
76 |
# βββββββββββββββββββββββββββββ main callback βββββββββββββββββββββββββββββββββββββ
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
|
80 |
"""Generate the dependency graph visualization."""
|
81 |
+
hit = _fetch_from_cache_repo("graph", sim_method, threshold, multimodal)
|
82 |
+
if hit:
|
83 |
+
return hit
|
84 |
+
|
85 |
repo_path = clone_or_cache(repo_url)
|
86 |
|
87 |
graph = build_graph_json(
|
|
|
104 |
|
105 |
def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
|
106 |
"""Generate the chronological timeline visualization."""
|
107 |
+
hit = _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal)
|
108 |
+
if hit:
|
109 |
+
return hit
|
110 |
+
|
111 |
repo_path = clone_or_cache(repo_url)
|
112 |
|
113 |
timeline = build_timeline_json(
|
build_cache.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import subprocess
|
4 |
+
import tempfile
|
5 |
+
from pathlib import Path
|
6 |
+
from datetime import datetime, timezone
|
7 |
+
from huggingface_hub import HfApi
|
8 |
+
|
9 |
+
from modular_graph_and_candidates import (
|
10 |
+
build_graph_json,
|
11 |
+
generate_html,
|
12 |
+
build_timeline_json,
|
13 |
+
generate_timeline_html
|
14 |
+
)
|
15 |
+
|
16 |
+
REPO_URL = os.getenv("REPO_URL", "https://github.com/huggingface/transformers")
|
17 |
+
CACHE_REPO = "Molbap/hf_cached_embeds_log"
|
18 |
+
THRESH = float(os.getenv("SIM_THRESHOLD", "0.50"))
|
19 |
+
MULTIMODAL = os.getenv("MULTIMODAL", "0") in {"1", "true", "True", "YES", "yes"}
|
20 |
+
SIM_METHOD = os.getenv("SIM_METHOD", "jaccard")
|
21 |
+
|
22 |
+
def main():
|
23 |
+
print(f"Building cache for {REPO_URL}")
|
24 |
+
print(f"Config: threshold={THRESH}, multimodal={MULTIMODAL}, method={SIM_METHOD}")
|
25 |
+
|
26 |
+
tmp = Path(tempfile.mkdtemp())
|
27 |
+
print(f"Working in {tmp}")
|
28 |
+
|
29 |
+
print("Cloning repository...")
|
30 |
+
subprocess.check_call([
|
31 |
+
"git", "clone", "--depth", "1", REPO_URL, str(tmp / "repo")
|
32 |
+
])
|
33 |
+
|
34 |
+
sha = subprocess.check_output([
|
35 |
+
"git", "rev-parse", "HEAD"
|
36 |
+
], cwd=tmp / "repo", text=True).strip()
|
37 |
+
|
38 |
+
print(f"Repository SHA: {sha}")
|
39 |
+
|
40 |
+
repo_path = tmp / "repo"
|
41 |
+
|
42 |
+
print("Building graph...")
|
43 |
+
graph = build_graph_json(
|
44 |
+
transformers_dir=repo_path,
|
45 |
+
threshold=THRESH,
|
46 |
+
multimodal=MULTIMODAL,
|
47 |
+
sim_method=SIM_METHOD,
|
48 |
+
)
|
49 |
+
|
50 |
+
print("Building timeline...")
|
51 |
+
timeline = build_timeline_json(
|
52 |
+
transformers_dir=repo_path,
|
53 |
+
threshold=THRESH,
|
54 |
+
multimodal=MULTIMODAL,
|
55 |
+
sim_method=SIM_METHOD,
|
56 |
+
)
|
57 |
+
|
58 |
+
print("Generating HTML...")
|
59 |
+
graph_html = generate_html(graph)
|
60 |
+
timeline_html = generate_timeline_html(timeline)
|
61 |
+
|
62 |
+
print(f"Uploading to {CACHE_REPO}...")
|
63 |
+
|
64 |
+
api = HfApi()
|
65 |
+
|
66 |
+
key = f"{sha}/{SIM_METHOD}-{THRESH:.2f}-m{int(MULTIMODAL)}"
|
67 |
+
|
68 |
+
latest = {
|
69 |
+
"sha": sha,
|
70 |
+
"updated_utc": datetime.now(timezone.utc).isoformat(),
|
71 |
+
"defaults": {
|
72 |
+
"sim_method": SIM_METHOD,
|
73 |
+
"threshold": THRESH,
|
74 |
+
"multimodal": MULTIMODAL
|
75 |
+
},
|
76 |
+
"paths": {
|
77 |
+
"graph_json": f"graph/{key}.json",
|
78 |
+
"graph_html": f"graph/{key}.html",
|
79 |
+
"timeline_json": f"timeline/{key}.json",
|
80 |
+
"timeline_html": f"timeline/{key}.html",
|
81 |
+
},
|
82 |
+
}
|
83 |
+
|
84 |
+
files_to_upload = [
|
85 |
+
(f"graph/{key}.json", json.dumps(graph, separators=(',', ':'))),
|
86 |
+
(f"graph/{key}.html", graph_html),
|
87 |
+
(f"timeline/{key}.json", json.dumps(timeline, separators=(',', ':'))),
|
88 |
+
(f"timeline/{key}.html", timeline_html),
|
89 |
+
("latest.json", json.dumps(latest, separators=(',', ':'))),
|
90 |
+
]
|
91 |
+
|
92 |
+
for path_in_repo, content in files_to_upload:
|
93 |
+
temp_file = tmp / "upload_temp"
|
94 |
+
temp_file.write_text(content, encoding="utf-8")
|
95 |
+
|
96 |
+
api.upload_file(
|
97 |
+
path_or_fileobj=str(temp_file),
|
98 |
+
path_in_repo=path_in_repo,
|
99 |
+
repo_id=CACHE_REPO,
|
100 |
+
commit_message=f"Cache update {sha[:7]} - {SIM_METHOD} t={THRESH} m={int(MULTIMODAL)}"
|
101 |
+
)
|
102 |
+
print(f"Uploaded {path_in_repo}")
|
103 |
+
|
104 |
+
print(f"Successfully uploaded cache for {key}")
|
105 |
+
|
106 |
+
if __name__ == "__main__":
|
107 |
+
main()
|
modular_graph_and_candidates.py
CHANGED
@@ -685,7 +685,6 @@ function updateVisibility() {
|
|
685 |
}
|
686 |
document.getElementById('toggleRed').addEventListener('change', updateVisibility);
|
687 |
|
688 |
-
const HF_LOGO_URI = "./static/hf-logo.png";
|
689 |
const graph = __GRAPH_DATA__;
|
690 |
const W = innerWidth, H = innerHeight;
|
691 |
const svg = d3.select('#dependency').call(d3.zoom().on('zoom', e => g.attr('transform', e.transform)));
|
@@ -709,23 +708,7 @@ const node = g.selectAll('g.node')
|
|
709 |
.call(d3.drag().on('start', dragStart).on('drag', dragged).on('end', dragEnd));
|
710 |
|
711 |
const baseSel = node.filter(d => d.cls === 'base');
|
712 |
-
|
713 |
-
baseSel.append('image')
|
714 |
-
.attr('href', HF_LOGO_URI)
|
715 |
-
.attr('width', 40)
|
716 |
-
.attr('height', 40)
|
717 |
-
.attr('x', -20)
|
718 |
-
.attr('y', -20)
|
719 |
-
.on('error', function() {
|
720 |
-
console.log('Image failed to load:', HF_LOGO_URI);
|
721 |
-
// Fallback to circle
|
722 |
-
d3.select(this.parentNode).append('circle')
|
723 |
-
.attr('r', 22).attr('fill', '#ffbe0b');
|
724 |
-
});
|
725 |
-
console.log('Loading logo from:', HF_LOGO_URI);
|
726 |
-
}else{
|
727 |
-
baseSel.append('circle').attr('r', d => 22*d.sz).attr('fill', '#ffbe0b');
|
728 |
-
}
|
729 |
node.filter(d => d.cls !== 'base').append('circle').attr('r', d => 20*d.sz);
|
730 |
|
731 |
node.append('text')
|
@@ -922,7 +905,6 @@ function updateVisibility() {
|
|
922 |
}
|
923 |
document.getElementById('toggleRed').addEventListener('change', updateVisibility);
|
924 |
|
925 |
-
const HF_LOGO_URI = "./static/hf-logo.png";
|
926 |
const timeline = __TIMELINE_DATA__;
|
927 |
const W = innerWidth, H = innerHeight;
|
928 |
|
@@ -1003,20 +985,7 @@ const node = g.selectAll('g.node')
|
|
1003 |
.call(d3.drag().on('start', dragStart).on('drag', dragged).on('end', dragEnd));
|
1004 |
|
1005 |
const baseSel = node.filter(d => d.cls === 'base');
|
1006 |
-
|
1007 |
-
baseSel.append('image')
|
1008 |
-
.attr('href', HF_LOGO_URI)
|
1009 |
-
.attr('width', 35)
|
1010 |
-
.attr('height', 35)
|
1011 |
-
.attr('x', -17.5)
|
1012 |
-
.attr('y', -17.5)
|
1013 |
-
.on('error', function() {
|
1014 |
-
d3.select(this.parentNode).append('circle')
|
1015 |
-
.attr('r', 20).attr('fill', '#ffbe0b');
|
1016 |
-
});
|
1017 |
-
} else {
|
1018 |
-
baseSel.append('circle').attr('r', 20).attr('fill', '#ffbe0b');
|
1019 |
-
}
|
1020 |
node.filter(d => d.cls !== 'base').append('circle').attr('r', 18);
|
1021 |
|
1022 |
node.append('text')
|
|
|
685 |
}
|
686 |
document.getElementById('toggleRed').addEventListener('change', updateVisibility);
|
687 |
|
|
|
688 |
const graph = __GRAPH_DATA__;
|
689 |
const W = innerWidth, H = innerHeight;
|
690 |
const svg = d3.select('#dependency').call(d3.zoom().on('zoom', e => g.attr('transform', e.transform)));
|
|
|
708 |
.call(d3.drag().on('start', dragStart).on('drag', dragged).on('end', dragEnd));
|
709 |
|
710 |
const baseSel = node.filter(d => d.cls === 'base');
|
711 |
+
baseSel.append('circle').attr('r', d => 22*d.sz).attr('fill', '#ffbe0b');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
712 |
node.filter(d => d.cls !== 'base').append('circle').attr('r', d => 20*d.sz);
|
713 |
|
714 |
node.append('text')
|
|
|
905 |
}
|
906 |
document.getElementById('toggleRed').addEventListener('change', updateVisibility);
|
907 |
|
|
|
908 |
const timeline = __TIMELINE_DATA__;
|
909 |
const W = innerWidth, H = innerHeight;
|
910 |
|
|
|
985 |
.call(d3.drag().on('start', dragStart).on('drag', dragged).on('end', dragEnd));
|
986 |
|
987 |
const baseSel = node.filter(d => d.cls === 'base');
|
988 |
+
baseSel.append('circle').attr('r', 20).attr('fill', '#ffbe0b');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
989 |
node.filter(d => d.cls !== 'base').append('circle').attr('r', 18);
|
990 |
|
991 |
node.append('text')
|