# app.py – Gradio Space wrapper for modular_graph_and_candidates from __future__ import annotations import json import shutil import subprocess import tempfile from datetime import datetime, timedelta from functools import lru_cache from pathlib import Path import os, json, tempfile from pathlib import Path from huggingface_hub import hf_hub_download import gradio as gr # —— refactored helpers —— from modular_graph_and_candidates import build_graph_json, generate_html, build_timeline_json, generate_timeline_html def _escape_srcdoc(text: str) -> str: """Escape for inclusion inside an ' tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1]) tmp.write_text(json_text, encoding="utf-8") return iframe_html, str(tmp) except Exception: return None HF_MAIN_REPO = "https://github.com/huggingface/transformers" # ───────────────────────────── cache repo once per 24 h ─────────────────────────── @lru_cache(maxsize=4) def clone_or_cache(repo_url: str) -> Path: """Shallow‑clone *repo_url* and reuse it for 24 h.""" tmp_root = Path(tempfile.gettempdir()) cache_dir = tmp_root / f"repo_{abs(hash(repo_url))}" stamp = cache_dir / ".cloned_at" if cache_dir.exists() and stamp.exists(): try: if datetime.utcnow() - datetime.fromisoformat(stamp.read_text().strip()) < timedelta(days=1): return cache_dir except Exception: pass # fall through → reclone shutil.rmtree(cache_dir, ignore_errors=True) subprocess.check_call(["git", "clone", "--depth", "1", repo_url, str(cache_dir)]) stamp.write_text(datetime.utcnow().isoformat()) return cache_dir # ───────────────────────────── main callback ───────────────────────────────────── def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str): """Generate the dependency graph visualization.""" hit = _fetch_from_cache_repo("graph", sim_method, threshold, multimodal) if hit: return hit repo_path = clone_or_cache(repo_url) graph = build_graph_json( transformers_dir=repo_path, threshold=threshold, multimodal=multimodal, sim_method=sim_method, ) raw_html = generate_html(graph) iframe_html = ( f'' ) tmp_json = Path(tempfile.mktemp(suffix=".json")) tmp_json.write_text(json.dumps(graph), encoding="utf-8") return iframe_html, str(tmp_json) def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str): """Generate the chronological timeline visualization.""" hit = _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal) if hit: return hit repo_path = clone_or_cache(repo_url) timeline = build_timeline_json( transformers_dir=repo_path, threshold=threshold, multimodal=multimodal, sim_method=sim_method, ) raw_html = generate_timeline_html(timeline) iframe_html = ( f'' ) tmp_json = Path(tempfile.mktemp(suffix="_timeline.json")) tmp_json.write_text(json.dumps(timeline), encoding="utf-8") return iframe_html, str(tmp_json) # ───────────────────────────── UI ──────────────────────────────────────────────── CUSTOM_CSS = """ #graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;} """ with gr.Blocks(css=CUSTOM_CSS) as demo: gr.Markdown("## 🔍 Modular‑candidate explorer for 🤗 Transformers") with gr.Tabs(): with gr.Tab("Dependency Graph"): with gr.Row(): repo_in = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL") thresh = gr.Slider(0.50, 0.95, value=0.5, step=0.01, label="Similarity ≥") multi_cb = gr.Checkbox(label="Only multimodal models") sim_radio = gr.Radio(["jaccard", "embedding"], value="jaccard", label="Similarity metric") go_btn = gr.Button("Build graph") graph_html_out = gr.HTML(elem_id="graph_html", show_label=False) graph_json_out = gr.File(label="Download graph.json") go_btn.click(run_graph, [repo_in, thresh, multi_cb, sim_radio], [graph_html_out, graph_json_out]) with gr.Tab("Chronological Timeline"): with gr.Row(): timeline_repo_in = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL") timeline_thresh = gr.Slider(0.50, 0.95, value=0.5, step=0.01, label="Similarity ≥") timeline_multi_cb = gr.Checkbox(label="Only multimodal models") timeline_sim_radio = gr.Radio(["jaccard", "embedding"], value="jaccard", label="Similarity metric") timeline_btn = gr.Button("Build timeline") timeline_html_out = gr.HTML(elem_id="timeline_html", show_label=False) timeline_json_out = gr.File(label="Download timeline.json") timeline_btn.click(run_timeline, [timeline_repo_in, timeline_thresh, timeline_multi_cb, timeline_sim_radio], [timeline_html_out, timeline_json_out]) if __name__ == "__main__": demo.launch(allowed_paths=["static"])