Spaces:

YuITC
/

Semantic-Book-Recommender

Sleeping

YuITC

Rename to app.py

42a1c5a 5 months ago

3.96 kB

	import numpy as np
	import pandas as pd
	import gradio as gr
	import torch

	from langchain_community.document_loaders import TextLoader
	from langchain_text_splitters import CharacterTextSplitter
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_chroma import Chroma

	books = pd.read_csv('data/books_with_emotions.csv')


	# Book Thumbnail
	books['large_thumbnail'] = books['thumbnail'] + '&fife=w800'
	books['large_thumbnail'] = np.where(
	books['large_thumbnail'].isna(),
	'cover-not-found.jpg',
	books['large_thumbnail']
	)


	# Create Vector Database
	raw_docs = TextLoader('./data/full_desc.txt', encoding='utf-8').load()
	text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator='\n')
	docs = text_splitter.split_documents(raw_docs)

	embeddings = HuggingFaceEmbeddings(
	model_name='sentence-transformers/all-MiniLM-L6-v2',
	model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
	)
	database = Chroma.from_documents(docs, embeddings)


	# Retrieval
	def retrieval(query: str, category: str=None, tone: str=None, init_top_k: int=80, final_top_k: int=16) -> pd.DataFrame:
	# recs = database.similarity_search_with_score(query, k=init_top_k)
	recs = database.similarity_search(query, k=init_top_k)
	ids = [int(rec.page_content.strip('"').split()[0]) for rec in recs]
	matches = books[books['isbn13'].isin(ids)]

	if category != 'All':
	matches = matches[matches['final_categories'] == category]
	matches = matches.head(final_top_k)

	if tone == 'Happy':
	matches.sort_values(by='joy', ascending=False, inplace=True)
	elif tone == 'Surprising':
	matches.sort_values(by='surprise', ascending=False, inplace=True)
	elif tone == 'Angry':
	matches.sort_values(by='anger', ascending=False, inplace=True)
	elif tone == 'Suspenseful':
	matches.sort_values(by='fear', ascending=False, inplace=True)
	elif tone == 'Sad':
	matches.sort_values(by='sadness', ascending=False, inplace=True)

	return matches


	# Recommendation
	def recommend(query: str, category: str, tone: str):
	recs = retrieval(query, category, tone)
	results = []

	for _, row in recs.iterrows():
	description = ' '.join(row['description'].split()[:30]) + '...'

	authors = row['authors'].split(';')
	if len(authors) == 2:
	authors_str = authors[0] + ' and ' + authors[1]
	elif len(authors) > 2:
	authors_str = ', '.join(authors[:-1]) + ' and ' + authors[-1]
	else:
	authors_str = authors[0]

	caption = f"{row['full_title']} by {authors_str}: {description}"

	results.append([
	row['large_thumbnail'],
	caption,
	])
	return results


	# Dashboard
	categories = ['All'] + sorted(books['final_categories'].unique())
	tones = ['All'] + ['Happy', 'Surprising', 'Angry', 'Suspenseful', 'Sad']

	with gr.Blocks(theme=gr.themes.Glass()) as dashboard:
	gr.Markdown('# Semantics Book Recommendation System')

	with gr.Row():
	user_query = gr.Textbox(
	label='Please enter the description of the book you want to read',
	placeholder='e.g. A story about a boy who ...',
	)
	category = gr.Dropdown(
	choices=categories,
	label='Select a category',
	value='All'
	)
	tone = gr.Dropdown(
	choices=tones,
	label='Select an emotional tone',
	value='All'
	)
	btn = gr.Button('Find books')

	gr.Markdown('## Recommendations')
	output = gr.Gallery(
	label='Recommended Books',
	columns=8, rows=2
	)

	btn.click(
	fn=recommend,
	inputs=[user_query, category, tone],
	outputs=output
	)

	if __name__ == '__main__':
	dashboard.launch(share=True)