Spaces:
Sleeping
Sleeping
import numpy as np | |
import pandas as pd | |
import gradio as gr | |
import torch | |
from langchain_community.document_loaders import TextLoader | |
from langchain_text_splitters import CharacterTextSplitter | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_chroma import Chroma | |
books = pd.read_csv('data/books_with_emotions.csv') | |
# Book Thumbnail | |
books['large_thumbnail'] = books['thumbnail'] + '&fife=w800' | |
books['large_thumbnail'] = np.where( | |
books['large_thumbnail'].isna(), | |
'cover-not-found.jpg', | |
books['large_thumbnail'] | |
) | |
# Create Vector Database | |
raw_docs = TextLoader('./data/full_desc.txt', encoding='utf-8').load() | |
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator='\n') | |
docs = text_splitter.split_documents(raw_docs) | |
embeddings = HuggingFaceEmbeddings( | |
model_name='sentence-transformers/all-MiniLM-L6-v2', | |
model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'} | |
) | |
database = Chroma.from_documents(docs, embeddings) | |
# Retrieval | |
def retrieval(query: str, category: str=None, tone: str=None, init_top_k: int=80, final_top_k: int=16) -> pd.DataFrame: | |
# recs = database.similarity_search_with_score(query, k=init_top_k) | |
recs = database.similarity_search(query, k=init_top_k) | |
ids = [int(rec.page_content.strip('"').split()[0]) for rec in recs] | |
matches = books[books['isbn13'].isin(ids)] | |
if category != 'All': | |
matches = matches[matches['final_categories'] == category] | |
matches = matches.head(final_top_k) | |
if tone == 'Happy': | |
matches.sort_values(by='joy', ascending=False, inplace=True) | |
elif tone == 'Surprising': | |
matches.sort_values(by='surprise', ascending=False, inplace=True) | |
elif tone == 'Angry': | |
matches.sort_values(by='anger', ascending=False, inplace=True) | |
elif tone == 'Suspenseful': | |
matches.sort_values(by='fear', ascending=False, inplace=True) | |
elif tone == 'Sad': | |
matches.sort_values(by='sadness', ascending=False, inplace=True) | |
return matches | |
# Recommendation | |
def recommend(query: str, category: str, tone: str): | |
recs = retrieval(query, category, tone) | |
results = [] | |
for _, row in recs.iterrows(): | |
description = ' '.join(row['description'].split()[:30]) + '...' | |
authors = row['authors'].split(';') | |
if len(authors) == 2: | |
authors_str = authors[0] + ' and ' + authors[1] | |
elif len(authors) > 2: | |
authors_str = ', '.join(authors[:-1]) + ' and ' + authors[-1] | |
else: | |
authors_str = authors[0] | |
caption = f"{row['full_title']} by {authors_str}: {description}" | |
results.append([ | |
row['large_thumbnail'], | |
caption, | |
]) | |
return results | |
# Dashboard | |
categories = ['All'] + sorted(books['final_categories'].unique()) | |
tones = ['All'] + ['Happy', 'Surprising', 'Angry', 'Suspenseful', 'Sad'] | |
with gr.Blocks(theme=gr.themes.Glass()) as dashboard: | |
gr.Markdown('# Semantics Book Recommendation System') | |
with gr.Row(): | |
user_query = gr.Textbox( | |
label='Please enter the description of the book you want to read', | |
placeholder='e.g. A story about a boy who ...', | |
) | |
category = gr.Dropdown( | |
choices=categories, | |
label='Select a category', | |
value='All' | |
) | |
tone = gr.Dropdown( | |
choices=tones, | |
label='Select an emotional tone', | |
value='All' | |
) | |
btn = gr.Button('Find books') | |
gr.Markdown('## Recommendations') | |
output = gr.Gallery( | |
label='Recommended Books', | |
columns=8, rows=2 | |
) | |
btn.click( | |
fn=recommend, | |
inputs=[user_query, category, tone], | |
outputs=output | |
) | |
if __name__ == '__main__': | |
dashboard.launch(share=True) |