Spaces:
Sleeping
Sleeping
Deepak Sahu
commited on
Commit
·
6795e5d
1
Parent(s):
918ec9f
interface touchup
Browse files
README.md
CHANGED
@@ -159,10 +159,8 @@ MRR = 0.311 implies that there's a good change that the target book will be in r
|
|
159 |
## Inference View
|
160 |
|
161 |
1. I rewrote the snippets from `z_evaluate.py` to `app.py` with minor changes to expriment with view.
|
162 |
-
2.
|
163 |
-
|
164 |
-
3. DONT set `debug=True` for gradio in HF space, else it doesn't start.
|
165 |
-
|
166 |
|
167 |
|
168 |
|
|
|
159 |
## Inference View
|
160 |
|
161 |
1. I rewrote the snippets from `z_evaluate.py` to `app.py` with minor changes to expriment with view.
|
162 |
+
2. DONT set `debug=True` for gradio in HF space, else it doesn't start.
|
163 |
+
3. HF space work differently for retaining models across module scipts; local running (tried in colab space) works faster. You will see lot of my commits in HF Space to work around this problem.
|
|
|
|
|
164 |
|
165 |
|
166 |
|
app.py
CHANGED
@@ -12,8 +12,20 @@ N_RECOMMENDS = 5
|
|
12 |
set_seed(42)
|
13 |
TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
|
14 |
EMB_MODEL = "all-MiniLM-L6-v2"
|
|
|
|
|
|
|
15 |
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
# Load store books
|
18 |
books_df = get_dataframe(CLEAN_DF_UNIQUE_TITLES)
|
19 |
|
@@ -23,9 +35,17 @@ if gr.NO_RELOAD:
|
|
23 |
# Load embedding model
|
24 |
emb_model = SentenceTransformer(EMB_MODEL)
|
25 |
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
|
|
|
|
|
28 |
global generator_model, emb_model
|
|
|
29 |
# output = generator_model("Love")
|
30 |
fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
|
31 |
|
@@ -36,16 +56,15 @@ def get_recommendation(book_title: str) -> str:
|
|
36 |
df_ranked = books_df.iloc[ranks]
|
37 |
df_ranked = df_ranked.reset_index()
|
38 |
|
|
|
39 |
books = df_ranked["book_name"].to_list()[:N_RECOMMENDS]
|
40 |
summaries = df_ranked["summaries"].to_list()[:N_RECOMMENDS]
|
41 |
scores = similarity[ranks][:N_RECOMMENDS]
|
42 |
-
|
43 |
-
#
|
44 |
label_similarity: dict = {book: score for book, score in zip(books, scores)}
|
45 |
#
|
46 |
-
#
|
47 |
-
|
48 |
-
# Generate card-style HTML
|
49 |
html = "<div style='display: flex; flex-wrap: wrap; gap: 1rem;'>"
|
50 |
for book, summary in zip(books, summaries):
|
51 |
html += f"""
|
@@ -56,15 +75,25 @@ def get_recommendation(book_title: str) -> str:
|
|
56 |
"""
|
57 |
html += "</div>"
|
58 |
|
59 |
-
# Club the output to be processed by gradio
|
60 |
response = [label_similarity, html]
|
61 |
|
62 |
return response
|
63 |
|
64 |
-
#
|
65 |
-
|
66 |
-
|
67 |
-
#
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
demo.launch()
|
|
|
12 |
set_seed(42)
|
13 |
TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
|
14 |
EMB_MODEL = "all-MiniLM-L6-v2"
|
15 |
+
GRADIO_TITLE = "Content Based Book Recommender"
|
16 |
+
GRADIO_DESCRIPTION = '''
|
17 |
+
This is a [HyDE](https://arxiv.org/abs/2212.10496) based searching mechanism that generates random summaries using your input book title and matches books which has summary similary to generated ones. The books, for search, are used from used [Kaggle Dataset: arpansri/books-summary](https://www.kaggle.com/datasets/arpansri/books-summary)
|
18 |
|
19 |
+
**Should take ~15s to ~30s** for inferencing
|
20 |
+
'''
|
21 |
+
GRADIO_EXAMPLES = [
|
22 |
+
"Rich Dad Poor Dad",
|
23 |
+
"Love at firs sight",
|
24 |
+
"Importance of idiots"
|
25 |
+
]
|
26 |
+
|
27 |
+
# Caching mechanism for gradio
|
28 |
+
if gr.NO_RELOAD: # Reference: https://www.gradio.app/guides/developing-faster-with-reload-mode
|
29 |
# Load store books
|
30 |
books_df = get_dataframe(CLEAN_DF_UNIQUE_TITLES)
|
31 |
|
|
|
35 |
# Load embedding model
|
36 |
emb_model = SentenceTransformer(EMB_MODEL)
|
37 |
|
38 |
+
def get_recommendation(book_title: str) -> list:
|
39 |
+
'''Returns data model suitable to be render in gradio interface;
|
40 |
+
|
41 |
+
Args:
|
42 |
+
book_title: the book name you are looking for
|
43 |
|
44 |
+
Returns
|
45 |
+
list of two values; firs value is a dictionary of <book, similarity_score>; Second Value is the card view in html generated form
|
46 |
+
'''
|
47 |
global generator_model, emb_model
|
48 |
+
|
49 |
# output = generator_model("Love")
|
50 |
fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
|
51 |
|
|
|
56 |
df_ranked = books_df.iloc[ranks]
|
57 |
df_ranked = df_ranked.reset_index()
|
58 |
|
59 |
+
# post-process for gradio interface
|
60 |
books = df_ranked["book_name"].to_list()[:N_RECOMMENDS]
|
61 |
summaries = df_ranked["summaries"].to_list()[:N_RECOMMENDS]
|
62 |
scores = similarity[ranks][:N_RECOMMENDS]
|
63 |
+
#
|
64 |
+
# For gr.Label interface
|
65 |
label_similarity: dict = {book: score for book, score in zip(books, scores)}
|
66 |
#
|
67 |
+
# Generate card-style HTML; to render book names and their summaries
|
|
|
|
|
68 |
html = "<div style='display: flex; flex-wrap: wrap; gap: 1rem;'>"
|
69 |
for book, summary in zip(books, summaries):
|
70 |
html += f"""
|
|
|
75 |
"""
|
76 |
html += "</div>"
|
77 |
|
78 |
+
# Club the output to be processed by gradio INterface
|
79 |
response = [label_similarity, html]
|
80 |
|
81 |
return response
|
82 |
|
83 |
+
# Input Interface Render
|
84 |
+
input_textbox = gr.Textbox(label="Search for book with name similary to", placeholder="Rich Dad Poor Dad", max_lines=1)
|
85 |
+
|
86 |
+
# Output Interface Render
|
87 |
+
output = [gr.Label(label="Similar Books"), gr.HTML(label="Books Descriptions", label=True)]
|
88 |
+
|
89 |
+
# Stich interace and run
|
90 |
+
demo = gr.Interface(
|
91 |
+
fn=get_recommendation,
|
92 |
+
inputs=input_textbox,
|
93 |
+
outputs=output,
|
94 |
+
title=GRADIO_TITLE,
|
95 |
+
description=GRADIO_DESCRIPTION,
|
96 |
+
examples=GRADIO_DESCRIPTION
|
97 |
+
)
|
98 |
|
99 |
demo.launch()
|