File size: 3,960 Bytes
2d13434
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import numpy as np
import pandas as pd
import gradio as gr 
import torch

from langchain_community.document_loaders import TextLoader 
from langchain_text_splitters             import CharacterTextSplitter
from langchain_huggingface                import HuggingFaceEmbeddings
from langchain_chroma                     import Chroma

books = pd.read_csv('data/books_with_emotions.csv')


# Book Thumbnail
books['large_thumbnail'] = books['thumbnail'] + '&fife=w800'
books['large_thumbnail'] = np.where(
    books['large_thumbnail'].isna(),
    'cover-not-found.jpg',
    books['large_thumbnail']
)


# Create Vector Database
raw_docs      = TextLoader('./data/full_desc.txt', encoding='utf-8').load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator='\n')
docs          = text_splitter.split_documents(raw_docs)

embeddings = HuggingFaceEmbeddings(  
    model_name='sentence-transformers/all-MiniLM-L6-v2',
    model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
)
database = Chroma.from_documents(docs, embeddings)


# Retrieval
def retrieval(query: str, category: str=None, tone: str=None, init_top_k: int=80, final_top_k: int=16) -> pd.DataFrame:
    # recs  = database.similarity_search_with_score(query, k=init_top_k)
    recs    = database.similarity_search(query, k=init_top_k)
    ids     = [int(rec.page_content.strip('"').split()[0]) for rec in recs]
    matches = books[books['isbn13'].isin(ids)]
    
    if category != 'All':
        matches = matches[matches['final_categories'] == category]
    matches = matches.head(final_top_k)

    if tone == 'Happy':
        matches.sort_values(by='joy', ascending=False, inplace=True)
    elif tone == 'Surprising':
        matches.sort_values(by='surprise', ascending=False, inplace=True)
    elif tone == 'Angry':
        matches.sort_values(by='anger', ascending=False, inplace=True)
    elif tone == 'Suspenseful':
        matches.sort_values(by='fear', ascending=False, inplace=True)
    elif tone == 'Sad':
        matches.sort_values(by='sadness', ascending=False, inplace=True)

    return matches


# Recommendation
def recommend(query: str, category: str, tone: str):
    recs    = retrieval(query, category, tone)
    results = []

    for _, row in recs.iterrows():
        description = ' '.join(row['description'].split()[:30]) + '...'
        
        authors = row['authors'].split(';')
        if len(authors) == 2:
            authors_str = authors[0] + ' and ' + authors[1]
        elif len(authors) > 2:
            authors_str = ', '.join(authors[:-1]) + ' and ' + authors[-1]
        else:
            authors_str = authors[0]
            
        caption = f"{row['full_title']} by {authors_str}: {description}"

        results.append([
            row['large_thumbnail'],
            caption,
        ])
    return results


# Dashboard
categories = ['All'] + sorted(books['final_categories'].unique())
tones      = ['All'] + ['Happy', 'Surprising', 'Angry', 'Suspenseful', 'Sad']

with gr.Blocks(theme=gr.themes.Glass()) as dashboard:
    gr.Markdown('# Semantics Book Recommendation System')
    
    with gr.Row():
        user_query = gr.Textbox(
            label='Please enter the description of the book you want to read',
            placeholder='e.g. A story about a boy who ...',
        )
        category = gr.Dropdown(
            choices=categories,
            label='Select a category',
            value='All'
        )
        tone = gr.Dropdown(
            choices=tones,
            label='Select an emotional tone',
            value='All'
        )
        btn = gr.Button('Find books')
        
    gr.Markdown('## Recommendations')
    output = gr.Gallery(
        label='Recommended Books',
        columns=8, rows=2
    )
    
    btn.click(
        fn=recommend,
        inputs=[user_query, category, tone],
        outputs=output
    )
    
if __name__ == '__main__':
    dashboard.launch(share=True)