Deepak Sahu commited on
Commit
6795e5d
·
1 Parent(s): 918ec9f

interface touchup

Browse files
Files changed (2) hide show
  1. README.md +2 -4
  2. app.py +42 -13
README.md CHANGED
@@ -159,10 +159,8 @@ MRR = 0.311 implies that there's a good change that the target book will be in r
159
  ## Inference View
160
 
161
  1. I rewrote the snippets from `z_evaluate.py` to `app.py` with minor changes to expriment with view.
162
- 2. To make the HF space faster (previously: 234s; **NOW: 15s**) I added `gr.NO_RELOAD` context block in the module files when loading embedding and generation model.
163
- Reference: https://www.gradio.app/guides/developing-faster-with-reload-mode
164
- 3. DONT set `debug=True` for gradio in HF space, else it doesn't start.
165
-
166
 
167
 
168
 
 
159
  ## Inference View
160
 
161
  1. I rewrote the snippets from `z_evaluate.py` to `app.py` with minor changes to expriment with view.
162
+ 2. DONT set `debug=True` for gradio in HF space, else it doesn't start.
163
+ 3. HF space work differently for retaining models across module scipts; local running (tried in colab space) works faster. You will see lot of my commits in HF Space to work around this problem.
 
 
164
 
165
 
166
 
app.py CHANGED
@@ -12,8 +12,20 @@ N_RECOMMENDS = 5
12
  set_seed(42)
13
  TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
14
  EMB_MODEL = "all-MiniLM-L6-v2"
 
 
 
15
 
16
- if gr.NO_RELOAD:
 
 
 
 
 
 
 
 
 
17
  # Load store books
18
  books_df = get_dataframe(CLEAN_DF_UNIQUE_TITLES)
19
 
@@ -23,9 +35,17 @@ if gr.NO_RELOAD:
23
  # Load embedding model
24
  emb_model = SentenceTransformer(EMB_MODEL)
25
 
 
 
 
 
 
26
 
27
- def get_recommendation(book_title: str) -> str:
 
 
28
  global generator_model, emb_model
 
29
  # output = generator_model("Love")
30
  fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
31
 
@@ -36,16 +56,15 @@ def get_recommendation(book_title: str) -> str:
36
  df_ranked = books_df.iloc[ranks]
37
  df_ranked = df_ranked.reset_index()
38
 
 
39
  books = df_ranked["book_name"].to_list()[:N_RECOMMENDS]
40
  summaries = df_ranked["summaries"].to_list()[:N_RECOMMENDS]
41
  scores = similarity[ranks][:N_RECOMMENDS]
42
-
43
- # label wise similarity
44
  label_similarity: dict = {book: score for book, score in zip(books, scores)}
45
  #
46
- # book_summaries: list[str] = [f"**{book}** \n {summary}" for book, summary in zip(books, summaries)]
47
-
48
- # Generate card-style HTML
49
  html = "<div style='display: flex; flex-wrap: wrap; gap: 1rem;'>"
50
  for book, summary in zip(books, summaries):
51
  html += f"""
@@ -56,15 +75,25 @@ def get_recommendation(book_title: str) -> str:
56
  """
57
  html += "</div>"
58
 
59
- # Club the output to be processed by gradio
60
  response = [label_similarity, html]
61
 
62
  return response
63
 
64
- # We instantiate the Textbox class
65
- textbox = gr.Textbox(label="Write random title", placeholder="The Man who knew", lines=2)
66
- output = [gr.Label(label="Similarity"), gr.HTML(label="Books Descriptions")]
67
- # output = gr.Textbox(label="something")
68
- demo = gr.Interface(fn=get_recommendation, inputs=textbox, outputs=output)
 
 
 
 
 
 
 
 
 
 
69
 
70
  demo.launch()
 
12
  set_seed(42)
13
  TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
14
  EMB_MODEL = "all-MiniLM-L6-v2"
15
+ GRADIO_TITLE = "Content Based Book Recommender"
16
+ GRADIO_DESCRIPTION = '''
17
+ This is a [HyDE](https://arxiv.org/abs/2212.10496) based searching mechanism that generates random summaries using your input book title and matches books which has summary similary to generated ones. The books, for search, are used from used [Kaggle Dataset: arpansri/books-summary](https://www.kaggle.com/datasets/arpansri/books-summary)
18
 
19
+ **Should take ~15s to ~30s** for inferencing
20
+ '''
21
+ GRADIO_EXAMPLES = [
22
+ "Rich Dad Poor Dad",
23
+ "Love at firs sight",
24
+ "Importance of idiots"
25
+ ]
26
+
27
+ # Caching mechanism for gradio
28
+ if gr.NO_RELOAD: # Reference: https://www.gradio.app/guides/developing-faster-with-reload-mode
29
  # Load store books
30
  books_df = get_dataframe(CLEAN_DF_UNIQUE_TITLES)
31
 
 
35
  # Load embedding model
36
  emb_model = SentenceTransformer(EMB_MODEL)
37
 
38
+ def get_recommendation(book_title: str) -> list:
39
+ '''Returns data model suitable to be render in gradio interface;
40
+
41
+ Args:
42
+ book_title: the book name you are looking for
43
 
44
+ Returns
45
+ list of two values; firs value is a dictionary of <book, similarity_score>; Second Value is the card view in html generated form
46
+ '''
47
  global generator_model, emb_model
48
+
49
  # output = generator_model("Love")
50
  fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
51
 
 
56
  df_ranked = books_df.iloc[ranks]
57
  df_ranked = df_ranked.reset_index()
58
 
59
+ # post-process for gradio interface
60
  books = df_ranked["book_name"].to_list()[:N_RECOMMENDS]
61
  summaries = df_ranked["summaries"].to_list()[:N_RECOMMENDS]
62
  scores = similarity[ranks][:N_RECOMMENDS]
63
+ #
64
+ # For gr.Label interface
65
  label_similarity: dict = {book: score for book, score in zip(books, scores)}
66
  #
67
+ # Generate card-style HTML; to render book names and their summaries
 
 
68
  html = "<div style='display: flex; flex-wrap: wrap; gap: 1rem;'>"
69
  for book, summary in zip(books, summaries):
70
  html += f"""
 
75
  """
76
  html += "</div>"
77
 
78
+ # Club the output to be processed by gradio INterface
79
  response = [label_similarity, html]
80
 
81
  return response
82
 
83
+ # Input Interface Render
84
+ input_textbox = gr.Textbox(label="Search for book with name similary to", placeholder="Rich Dad Poor Dad", max_lines=1)
85
+
86
+ # Output Interface Render
87
+ output = [gr.Label(label="Similar Books"), gr.HTML(label="Books Descriptions", label=True)]
88
+
89
+ # Stich interace and run
90
+ demo = gr.Interface(
91
+ fn=get_recommendation,
92
+ inputs=input_textbox,
93
+ outputs=output,
94
+ title=GRADIO_TITLE,
95
+ description=GRADIO_DESCRIPTION,
96
+ examples=GRADIO_DESCRIPTION
97
+ )
98
 
99
  demo.launch()