File size: 6,733 Bytes
53c0cc8
 
 
 
 
 
 
f62613d
53c0cc8
f62613d
 
 
 
 
 
 
53c0cc8
6d106b8
 
 
 
 
 
 
 
 
53c0cc8
6641fa8
53c0cc8
f62613d
6641fa8
 
 
 
31b1b7e
6641fa8
31b1b7e
 
 
 
 
 
 
 
 
f62613d
6641fa8
31b1b7e
6641fa8
53c0cc8
f62613d
c7100d5
 
 
 
 
 
f62613d
c7100d5
 
f62613d
 
c7100d5
f62613d
 
49600c8
c410e03
ceffe7d
 
49600c8
ceffe7d
f62613d
5ff759b
ceffe7d
 
5163a1c
f62613d
a2b0a06
 
49600c8
 
 
 
ae4e744
49600c8
53c0cc8
49600c8
 
53c0cc8
a2b0a06
 
 
 
 
 
 
c7100d5
 
 
 
 
53c0cc8
a2b0a06
d77dd13
 
 
 
 
 
 
 
 
 
a2b0a06
 
 
 
 
f62613d
 
 
a2b0a06
a83bf3e
 
 
 
 
f62613d
a83bf3e
f62613d
a83bf3e
f62613d
a83bf3e
f62613d
 
 
 
a83bf3e
624b0a0
 
 
a83bf3e
f62613d
2c2f9f8
624b0a0
 
 
2c2f9f8
f62613d
a83bf3e
f62613d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from __future__ import annotations

import json
import tempfile
from pathlib import Path

import gradio as gr
from huggingface_hub import hf_hub_download

from modular_graph_and_candidates import (
    build_graph_json,
    generate_html,
    build_timeline_json,
    generate_timeline_html,
    filter_graph_by_threshold,
)

def _escape_srcdoc(text: str) -> str:
    return (
        text.replace("&", "&")
            .replace("\"", """)
            .replace("'", "'")
            .replace("<", "&lt;")
            .replace(">", "&gt;")
    )

HF_MAIN_REPO = "https://github.com/huggingface/transformers"
CACHE_REPO = "Molbap/hf_cached_embeds_log"

def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool, *, height_vh: int = 85):
    repo_id = CACHE_REPO
    latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset")
    info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
    sha = info.get("sha")
    key = f"{sha}/{sim_method}-m{int(multimodal)}"
    json_fp = hf_hub_download(repo_id=repo_id, filename=f"{kind}/{key}.json", repo_type="dataset")

    raw_data = json.loads(Path(json_fp).read_text(encoding="utf-8"))
    filtered_data = filter_graph_by_threshold(raw_data, threshold)

    if kind == "timeline":
        raw_html = generate_timeline_html(filtered_data)
    else:
        raw_html = generate_html(filtered_data)

    iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
    tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1])
    tmp.write_text(json.dumps(filtered_data), encoding="utf-8")
    return iframe_html, str(tmp)

def run_loc(sim_method: str, multimodal: bool, *, height_vh: int = 85):
    latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset")
    info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
    sha = info["sha"]
    key = f"{sha}/{sim_method}-m{int(multimodal)}"
    html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset")
    raw_html = Path(html_fp).read_text(encoding="utf-8")
    iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
    return iframe_html

def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85):
    return _fetch_from_cache_repo("graph", sim_method, threshold, multimodal, height_vh=height_vh)

def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85):
    return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal, height_vh=height_vh)

# ───────────────────────────── UI ────────────────────────────────────────────────

CUSTOM_CSS = """
#graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;}
"""

TAB_INDEX = {"timeline": 0, "loc": 1, "graph": 2}

with gr.Blocks(css=CUSTOM_CSS) as demo:
    header = gr.Markdown("## πŸ” Modular-candidate explorer for πŸ€— Transformers")

    with gr.Tabs() as tabs:
        with gr.Tab("Chronological Timeline", id="timeline"):
            with gr.Row():
                timeline_repo_in = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL")
                timeline_thresh = gr.Slider(0.50, 0.95, value=0.5, step=0.01, label="Similarity β‰₯")
                timeline_multi_cb = gr.Checkbox(label="Only multimodal models")
                gr.Markdown("**Embedding method:** TBD")
                timeline_btn = gr.Button("Build timeline")

            timeline_html_out = gr.HTML(elem_id="timeline_html", show_label=False)
            timeline_json_out = gr.File(label="Download timeline.json")

            timeline_btn.click(
                lambda repo, thresh, multi: run_timeline(repo, thresh, multi, "jaccard"),
                [timeline_repo_in, timeline_thresh, timeline_multi_cb],
                [timeline_html_out, timeline_json_out],
            )

        with gr.Tab("LOC Growth", id="loc"):
            sim_radio2 = gr.Radio(["jaccard","embedding"], value="jaccard", label="Similarity metric")
            multi_cb2  = gr.Checkbox(label="Only multimodal models")
            go_loc     = gr.Button("Show LOC growth")
            loc_html   = gr.HTML(show_label=False)
            go_loc.click(run_loc, [sim_radio2, multi_cb2], loc_html)

        with gr.Tab("Dependency Graph", id="graph"):
            with gr.Row():
                repo_in   = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL")
                thresh    = gr.Slider(0.50, 0.95, value=0.5, step=0.01, label="Similarity β‰₯")
                multi_cb  = gr.Checkbox(label="Only multimodal models")
                gr.Markdown("**Embedding method:** TBD")
                go_btn    = gr.Button("Build graph")

            graph_html_out  = gr.HTML(elem_id="graph_html", show_label=False)
            graph_json_out  = gr.File(label="Download graph.json")

            go_btn.click(
                lambda repo, thresh, multi: run_graph(repo, thresh, multi, "jaccard"),
                [repo_in, thresh, multi_cb],
                [graph_html_out, graph_json_out],
            )

    # make embed_html a sibling of Tabs (not a child), so we can hide Tabs but show this
    embed_html = gr.HTML(visible=False)

    def _on_load(req: gr.Request):
        qp = req.query_params or {}
        tab_key = (qp.get("tab") or "").lower()
        embed = (qp.get("embed") == "1")
        tab_sel = TAB_INDEX.get(tab_key, 0)

        if embed:
            # shorter iframe inside article view
            if tab_key == "graph":
                html, _ = run_graph(HF_MAIN_REPO, 0.7, False, "jaccard", height_vh=60)
            elif tab_key == "timeline":
                html, _ = run_timeline(HF_MAIN_REPO, 0.7, False, "jaccard", height_vh=60)
            else:
                html = run_loc("jaccard", False, height_vh=60)

            return (
                gr.update(visible=False),                    # header
                gr.update(visible=False),                        # tabs
                gr.update(value=html, visible=True),             # embed_html
            )

        return (
            gr.update(visible=True),
            gr.update(visible=True, selected=tab_sel),
            gr.update(visible=False),
        )

    demo.load(_on_load, outputs=[header, tabs, embed_html])

if __name__ == "__main__":
    demo.launch(allowed_paths=["static"])