Molbap HF Staff commited on
Commit
f62613d
Β·
verified Β·
1 Parent(s): a83bf3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -42
app.py CHANGED
@@ -1,20 +1,21 @@
1
  from __future__ import annotations
2
 
3
  import json
4
- import shutil
5
- import subprocess
6
  import tempfile
7
- from datetime import datetime, timedelta
8
- from functools import lru_cache
9
  from pathlib import Path
10
- from huggingface_hub import hf_hub_download
11
 
12
  import gradio as gr
 
13
 
14
- from modular_graph_and_candidates import build_graph_json, generate_html, build_timeline_json, generate_timeline_html, filter_graph_by_threshold
 
 
 
 
 
 
15
 
16
  def _escape_srcdoc(text: str) -> str:
17
- """Escape for inclusion inside an <iframe srcdoc="…"> attribute."""
18
  return (
19
  text.replace("&", "&amp;")
20
  .replace("\"", "&quot;")
@@ -23,12 +24,10 @@ def _escape_srcdoc(text: str) -> str:
23
  .replace(">", "&gt;")
24
  )
25
 
26
-
27
  HF_MAIN_REPO = "https://github.com/huggingface/transformers"
28
-
29
  CACHE_REPO = "Molbap/hf_cached_embeds_log"
30
 
31
- def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool):
32
  repo_id = CACHE_REPO
33
  latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset")
34
  info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
@@ -40,49 +39,42 @@ def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimo
40
  filtered_data = filter_graph_by_threshold(raw_data, threshold)
41
 
42
  if kind == "timeline":
43
- from modular_graph_and_candidates import generate_timeline_html
44
  raw_html = generate_timeline_html(filtered_data)
45
  else:
46
  raw_html = generate_html(filtered_data)
47
 
48
- iframe_html = f'<iframe style="width:100%;height:85vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
49
  tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1])
50
  tmp.write_text(json.dumps(filtered_data), encoding="utf-8")
51
  return iframe_html, str(tmp)
52
 
53
-
54
-
55
- def run_loc(sim_method: str, multimodal: bool):
56
  latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset")
57
  info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
58
  sha = info["sha"]
59
  key = f"{sha}/{sim_method}-m{int(multimodal)}"
60
  html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset")
61
  raw_html = Path(html_fp).read_text(encoding="utf-8")
62
- iframe_html = f'<iframe style="width:100%;height:85vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
63
  return iframe_html
64
 
 
 
65
 
66
- def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
67
- return _fetch_from_cache_repo("graph", sim_method, threshold, multimodal)
68
-
69
-
70
- def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
71
- return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal)
72
-
73
-
74
 
75
  # ───────────────────────────── UI ────────────────────────────────────────────────
76
 
77
-
78
  CUSTOM_CSS = """
79
  #graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;}
80
  """
 
81
  TAB_INDEX = {"timeline": 0, "loc": 1, "graph": 2}
82
 
83
  with gr.Blocks(css=CUSTOM_CSS) as demo:
84
  header = gr.Markdown("## πŸ” Modular-candidate explorer for πŸ€— Transformers")
85
- # capture the Tabs as a component so we can control selection
86
  with gr.Tabs() as tabs:
87
  with gr.Tab("Chronological Timeline", id="timeline"):
88
  with gr.Row():
@@ -124,33 +116,38 @@ with gr.Blocks(css=CUSTOM_CSS) as demo:
124
  [repo_in, thresh, multi_cb],
125
  [graph_html_out, graph_json_out],
126
  )
127
- embed_html = gr.HTML(visible=False)
 
 
128
 
129
  def _on_load(req: gr.Request):
130
  qp = req.query_params or {}
131
  tab_key = (qp.get("tab") or "").lower()
132
  embed = (qp.get("embed") == "1")
133
-
134
  tab_sel = TAB_INDEX.get(tab_key, 0)
135
-
136
  if embed:
 
137
  if tab_key == "graph":
138
- html, _ = run_graph(HF_MAIN_REPO, 0.7, False, "jaccard")
139
  elif tab_key == "timeline":
140
- html, _ = run_timeline(HF_MAIN_REPO, 0.7, False, "jaccard")
141
- else: # "loc" or anything else
142
- html = run_loc("jaccard", False)
143
-
144
  return (
145
- gr.update(visible=False), # header
146
- gr.update(visible=False), # tabs
147
- gr.update(value=html, visible=True), # embed_html
148
  )
149
-
150
  return (
151
- gr.update(visible=True), # header
152
- gr.update(visible=True, selected=tab_sel), # tabs
153
- gr.update(visible=False), # embed_html
154
  )
155
-
156
  demo.load(_on_load, outputs=[header, tabs, embed_html])
 
 
 
 
1
  from __future__ import annotations
2
 
3
  import json
 
 
4
  import tempfile
 
 
5
  from pathlib import Path
 
6
 
7
  import gradio as gr
8
+ from huggingface_hub import hf_hub_download
9
 
10
+ from modular_graph_and_candidates import (
11
+ build_graph_json,
12
+ generate_html,
13
+ build_timeline_json,
14
+ generate_timeline_html,
15
+ filter_graph_by_threshold,
16
+ )
17
 
18
  def _escape_srcdoc(text: str) -> str:
 
19
  return (
20
  text.replace("&", "&amp;")
21
  .replace("\"", "&quot;")
 
24
  .replace(">", "&gt;")
25
  )
26
 
 
27
  HF_MAIN_REPO = "https://github.com/huggingface/transformers"
 
28
  CACHE_REPO = "Molbap/hf_cached_embeds_log"
29
 
30
+ def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool, *, height_vh: int = 85):
31
  repo_id = CACHE_REPO
32
  latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset")
33
  info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
 
39
  filtered_data = filter_graph_by_threshold(raw_data, threshold)
40
 
41
  if kind == "timeline":
 
42
  raw_html = generate_timeline_html(filtered_data)
43
  else:
44
  raw_html = generate_html(filtered_data)
45
 
46
+ iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
47
  tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1])
48
  tmp.write_text(json.dumps(filtered_data), encoding="utf-8")
49
  return iframe_html, str(tmp)
50
 
51
+ def run_loc(sim_method: str, multimodal: bool, *, height_vh: int = 85):
 
 
52
  latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset")
53
  info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
54
  sha = info["sha"]
55
  key = f"{sha}/{sim_method}-m{int(multimodal)}"
56
  html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset")
57
  raw_html = Path(html_fp).read_text(encoding="utf-8")
58
+ iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
59
  return iframe_html
60
 
61
+ def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85):
62
+ return _fetch_from_cache_repo("graph", sim_method, threshold, multimodal, height_vh=height_vh)
63
 
64
+ def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85):
65
+ return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal, height_vh=height_vh)
 
 
 
 
 
 
66
 
67
  # ───────────────────────────── UI ────────────────────────────────────────────────
68
 
 
69
  CUSTOM_CSS = """
70
  #graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;}
71
  """
72
+
73
  TAB_INDEX = {"timeline": 0, "loc": 1, "graph": 2}
74
 
75
  with gr.Blocks(css=CUSTOM_CSS) as demo:
76
  header = gr.Markdown("## πŸ” Modular-candidate explorer for πŸ€— Transformers")
77
+
78
  with gr.Tabs() as tabs:
79
  with gr.Tab("Chronological Timeline", id="timeline"):
80
  with gr.Row():
 
116
  [repo_in, thresh, multi_cb],
117
  [graph_html_out, graph_json_out],
118
  )
119
+
120
+ # make embed_html a sibling of Tabs (not a child), so we can hide Tabs but show this
121
+ embed_html = gr.HTML(visible=False)
122
 
123
  def _on_load(req: gr.Request):
124
  qp = req.query_params or {}
125
  tab_key = (qp.get("tab") or "").lower()
126
  embed = (qp.get("embed") == "1")
 
127
  tab_sel = TAB_INDEX.get(tab_key, 0)
128
+
129
  if embed:
130
+ # shorter iframe inside article view
131
  if tab_key == "graph":
132
+ html, _ = run_graph(HF_MAIN_REPO, 0.7, False, "jaccard", height_vh=60)
133
  elif tab_key == "timeline":
134
+ html, _ = run_timeline(HF_MAIN_REPO, 0.7, False, "jaccard", height_vh=60)
135
+ else:
136
+ html = run_loc("jaccard", False, height_vh=60)
137
+
138
  return (
139
+ gr.Markdown.update(visible=False), # header
140
+ gr.Tabs.update(visible=False), # tabs
141
+ gr.HTML.update(value=html, visible=True), # embed_html
142
  )
143
+
144
  return (
145
+ gr.Markdown.update(visible=True),
146
+ gr.Tabs.update(visible=True, selected=tab_sel),
147
+ gr.HTML.update(visible=False),
148
  )
149
+
150
  demo.load(_on_load, outputs=[header, tabs, embed_html])
151
+
152
+ if __name__ == "__main__":
153
+ demo.launch(allowed_paths=["static"])