|
from __future__ import annotations |
|
|
|
import json |
|
import tempfile |
|
from pathlib import Path |
|
|
|
import gradio as gr |
|
from huggingface_hub import hf_hub_download |
|
|
|
from modular_graph_and_candidates import ( |
|
build_graph_json, |
|
generate_html, |
|
build_timeline_json, |
|
generate_timeline_html, |
|
filter_graph_by_threshold, |
|
) |
|
|
|
def _escape_srcdoc(text: str) -> str: |
|
return ( |
|
text.replace("&", "&") |
|
.replace("\"", """) |
|
.replace("'", "'") |
|
.replace("<", "<") |
|
.replace(">", ">") |
|
) |
|
|
|
HF_MAIN_REPO = "https://github.com/huggingface/transformers" |
|
CACHE_REPO = "Molbap/hf_cached_embeds_log" |
|
|
|
def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool, *, height_vh: int = 85): |
|
repo_id = CACHE_REPO |
|
latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset") |
|
info = json.loads(Path(latest_fp).read_text(encoding="utf-8")) |
|
sha = info.get("sha") |
|
key = f"{sha}/{sim_method}-m{int(multimodal)}" |
|
json_fp = hf_hub_download(repo_id=repo_id, filename=f"{kind}/{key}.json", repo_type="dataset") |
|
|
|
raw_data = json.loads(Path(json_fp).read_text(encoding="utf-8")) |
|
filtered_data = filter_graph_by_threshold(raw_data, threshold) |
|
|
|
if kind == "timeline": |
|
raw_html = generate_timeline_html(filtered_data) |
|
else: |
|
raw_html = generate_html(filtered_data) |
|
|
|
iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>' |
|
tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1]) |
|
tmp.write_text(json.dumps(filtered_data), encoding="utf-8") |
|
return iframe_html, str(tmp) |
|
|
|
def run_loc(sim_method: str, multimodal: bool, *, height_vh: int = 85): |
|
latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset") |
|
info = json.loads(Path(latest_fp).read_text(encoding="utf-8")) |
|
sha = info["sha"] |
|
key = f"{sha}/{sim_method}-m{int(multimodal)}" |
|
html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset") |
|
raw_html = Path(html_fp).read_text(encoding="utf-8") |
|
iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>' |
|
return iframe_html |
|
|
|
def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85): |
|
return _fetch_from_cache_repo("graph", sim_method, threshold, multimodal, height_vh=height_vh) |
|
|
|
def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85): |
|
return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal, height_vh=height_vh) |
|
|
|
|
|
|
|
CUSTOM_CSS = """ |
|
#graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;} |
|
""" |
|
|
|
TAB_INDEX = {"timeline": 0, "loc": 1, "graph": 2} |
|
|
|
with gr.Blocks(css=CUSTOM_CSS) as demo: |
|
header = gr.Markdown("## π Modular-candidate explorer for π€ Transformers") |
|
|
|
with gr.Tabs() as tabs: |
|
with gr.Tab("Chronological Timeline", id="timeline"): |
|
with gr.Row(): |
|
timeline_repo_in = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL") |
|
timeline_thresh = gr.Slider(0.50, 0.95, value=0.5, step=0.01, label="Similarity β₯") |
|
timeline_multi_cb = gr.Checkbox(label="Only multimodal models") |
|
gr.Markdown("**Embedding method:** TBD") |
|
timeline_btn = gr.Button("Build timeline") |
|
|
|
timeline_html_out = gr.HTML(elem_id="timeline_html", show_label=False) |
|
timeline_json_out = gr.File(label="Download timeline.json") |
|
|
|
timeline_btn.click( |
|
lambda repo, thresh, multi: run_timeline(repo, thresh, multi, "jaccard"), |
|
[timeline_repo_in, timeline_thresh, timeline_multi_cb], |
|
[timeline_html_out, timeline_json_out], |
|
) |
|
|
|
with gr.Tab("LOC Growth", id="loc"): |
|
sim_radio2 = gr.Radio(["jaccard","embedding"], value="jaccard", label="Similarity metric") |
|
multi_cb2 = gr.Checkbox(label="Only multimodal models") |
|
go_loc = gr.Button("Show LOC growth") |
|
loc_html = gr.HTML(show_label=False) |
|
go_loc.click(run_loc, [sim_radio2, multi_cb2], loc_html) |
|
|
|
with gr.Tab("Dependency Graph", id="graph"): |
|
with gr.Row(): |
|
repo_in = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL") |
|
thresh = gr.Slider(0.50, 0.95, value=0.5, step=0.01, label="Similarity β₯") |
|
multi_cb = gr.Checkbox(label="Only multimodal models") |
|
gr.Markdown("**Embedding method:** TBD") |
|
go_btn = gr.Button("Build graph") |
|
|
|
graph_html_out = gr.HTML(elem_id="graph_html", show_label=False) |
|
graph_json_out = gr.File(label="Download graph.json") |
|
|
|
go_btn.click( |
|
lambda repo, thresh, multi: run_graph(repo, thresh, multi, "jaccard"), |
|
[repo_in, thresh, multi_cb], |
|
[graph_html_out, graph_json_out], |
|
) |
|
|
|
|
|
embed_html = gr.HTML(visible=False) |
|
|
|
def _on_load(req: gr.Request): |
|
qp = req.query_params or {} |
|
tab_key = (qp.get("tab") or "").lower() |
|
embed = (qp.get("embed") == "1") |
|
tab_sel = TAB_INDEX.get(tab_key, 0) |
|
|
|
if embed: |
|
|
|
if tab_key == "graph": |
|
html, _ = run_graph(HF_MAIN_REPO, 0.7, False, "jaccard", height_vh=60) |
|
elif tab_key == "timeline": |
|
html, _ = run_timeline(HF_MAIN_REPO, 0.7, False, "jaccard", height_vh=60) |
|
else: |
|
html = run_loc("jaccard", False, height_vh=60) |
|
|
|
return ( |
|
gr.Markdown.update(visible=False), |
|
gr.Tabs.update(visible=False), |
|
gr.HTML.update(value=html, visible=True), |
|
) |
|
|
|
return ( |
|
gr.Markdown.update(visible=True), |
|
gr.Tabs.update(visible=True, selected=tab_sel), |
|
gr.HTML.update(visible=False), |
|
) |
|
|
|
demo.load(_on_load, outputs=[header, tabs, embed_html]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(allowed_paths=["static"]) |
|
|