Spaces:
Sleeping
Sleeping
Deepak Sahu
commited on
Commit
·
f2b9b39
1
Parent(s):
e446a52
code cleanup
Browse files- README.md +2 -2
- z_hypothetical_summary.py +8 -8
- z_similarity.py +2 -0
README.md
CHANGED
|
@@ -216,9 +216,9 @@ Before discussing evaluation metric let me walk you through two important pieces
|
|
| 216 |
|
| 217 |
### Recommendation Generation
|
| 218 |
|
| 219 |
-
The generation is handled by script `z_hypothetical_summary.py`. Under the hood following happens
|
| 220 |
|
| 221 |
-

|
| 222 |
|
| 223 |
Code Preview. I did the minimal post processing to chop of the `prompt` from the generated summaries before returning the result.
|
| 224 |
|
z_hypothetical_summary.py
CHANGED
|
@@ -11,8 +11,12 @@ TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
|
|
| 11 |
generator_model = None
|
| 12 |
|
| 13 |
def load_model():
|
|
|
|
| 14 |
global generator_model
|
| 15 |
-
generator_model
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
def generate_summaries(book_title: str, genre: Optional[str] = None, n_samples=2, top_k = 50, top_p = 0.85, model=None) -> list[str]:
|
|
@@ -23,17 +27,13 @@ def generate_summaries(book_title: str, genre: Optional[str] = None, n_samples=2
|
|
| 23 |
n_samples: (default=2) count of hypothetical summaries
|
| 24 |
top_k: (default = 50)
|
| 25 |
top_p: (default=0.85)
|
| 26 |
-
|
| 27 |
model: CASUAL LM; this is a hack to adjust for faster response in gradio
|
|
|
|
| 28 |
Returns:
|
| 29 |
summaries: list of hypothetical summaries.
|
| 30 |
'''
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
if model:
|
| 34 |
-
generator_model = model
|
| 35 |
-
else:
|
| 36 |
-
generator_model = generator_model if generator_model is not None else load_model()
|
| 37 |
|
| 38 |
# basic prompt very similary to one used in fine-tuning
|
| 39 |
prompt = f'''Book Title: {book_title}
|
|
|
|
| 11 |
generator_model = None
|
| 12 |
|
| 13 |
def load_model():
|
| 14 |
+
'''Work around to speed up HF cross-script loading'''
|
| 15 |
global generator_model
|
| 16 |
+
if generator_model is None:
|
| 17 |
+
generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL)
|
| 18 |
+
else:
|
| 19 |
+
return generator_model
|
| 20 |
|
| 21 |
|
| 22 |
def generate_summaries(book_title: str, genre: Optional[str] = None, n_samples=2, top_k = 50, top_p = 0.85, model=None) -> list[str]:
|
|
|
|
| 27 |
n_samples: (default=2) count of hypothetical summaries
|
| 28 |
top_k: (default = 50)
|
| 29 |
top_p: (default=0.85)
|
|
|
|
| 30 |
model: CASUAL LM; this is a hack to adjust for faster response in gradio
|
| 31 |
+
|
| 32 |
Returns:
|
| 33 |
summaries: list of hypothetical summaries.
|
| 34 |
'''
|
| 35 |
+
# select model
|
| 36 |
+
generator_model = model if model else generator_model
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
# basic prompt very similary to one used in fine-tuning
|
| 39 |
prompt = f'''Book Title: {book_title}
|
z_similarity.py
CHANGED
|
@@ -15,6 +15,8 @@ def computes_similarity_w_hypothetical(hypothetical_summaries: list[str], model
|
|
| 15 |
Ranks of the books summaries based on above consine similarity Distance; Lower ranks means more similar
|
| 16 |
'''
|
| 17 |
global books_summaries_embs
|
|
|
|
|
|
|
| 18 |
model = model if model else load_model()
|
| 19 |
|
| 20 |
hypothetical_summaries_embs = get_embeddings(hypothetical_summaries)
|
|
|
|
| 15 |
Ranks of the books summaries based on above consine similarity Distance; Lower ranks means more similar
|
| 16 |
'''
|
| 17 |
global books_summaries_embs
|
| 18 |
+
|
| 19 |
+
# Select model
|
| 20 |
model = model if model else load_model()
|
| 21 |
|
| 22 |
hypothetical_summaries_embs = get_embeddings(hypothetical_summaries)
|