import numpy as np
import pandas as pd
import gradio as gr 
import torch

from langchain_community.document_loaders import TextLoader 
from langchain_text_splitters             import CharacterTextSplitter
from langchain_huggingface                import HuggingFaceEmbeddings
from langchain_chroma                     import Chroma

books = pd.read_csv('data/books_with_emotions.csv')


# Book Thumbnail
books['large_thumbnail'] = books['thumbnail'] + '&fife=w800'
books['large_thumbnail'] = np.where(
    books['large_thumbnail'].isna(),
    'cover-not-found.jpg',
    books['large_thumbnail']
)


# Create Vector Database
raw_docs      = TextLoader('./data/full_desc.txt', encoding='utf-8').load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator='\n')
docs          = text_splitter.split_documents(raw_docs)

embeddings = HuggingFaceEmbeddings(  
    model_name='sentence-transformers/all-MiniLM-L6-v2',
    model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
)
database = Chroma.from_documents(docs, embeddings)


# Retrieval
def retrieval(query: str, category: str=None, tone: str=None, init_top_k: int=80, final_top_k: int=16) -> pd.DataFrame:
    # recs  = database.similarity_search_with_score(query, k=init_top_k)
    recs    = database.similarity_search(query, k=init_top_k)
    ids     = [int(rec.page_content.strip('"').split()[0]) for rec in recs]
    matches = books[books['isbn13'].isin(ids)]
    
    if category != 'All':
        matches = matches[matches['final_categories'] == category]
    matches = matches.head(final_top_k)

    if tone == 'Happy':
        matches.sort_values(by='joy', ascending=False, inplace=True)
    elif tone == 'Surprising':
        matches.sort_values(by='surprise', ascending=False, inplace=True)
    elif tone == 'Angry':
        matches.sort_values(by='anger', ascending=False, inplace=True)
    elif tone == 'Suspenseful':
        matches.sort_values(by='fear', ascending=False, inplace=True)
    elif tone == 'Sad':
        matches.sort_values(by='sadness', ascending=False, inplace=True)

    return matches


# Recommendation
def recommend(query: str, category: str, tone: str):
    recs    = retrieval(query, category, tone)
    results = []

    for _, row in recs.iterrows():
        description = ' '.join(row['description'].split()[:30]) + '...'
        
        authors = row['authors'].split(';')
        if len(authors) == 2:
            authors_str = authors[0] + ' and ' + authors[1]
        elif len(authors) > 2:
            authors_str = ', '.join(authors[:-1]) + ' and ' + authors[-1]
        else:
            authors_str = authors[0]
            
        caption = f"{row['full_title']} by {authors_str}: {description}"

        results.append([
            row['large_thumbnail'],
            caption,
        ])
    return results


# Dashboard
categories = ['All'] + sorted(books['final_categories'].unique())
tones      = ['All'] + ['Happy', 'Surprising', 'Angry', 'Suspenseful', 'Sad']

with gr.Blocks(theme=gr.themes.Glass()) as dashboard:
    gr.Markdown('# Semantics Book Recommendation System')
    
    with gr.Row():
        user_query = gr.Textbox(
            label='Please enter the description of the book you want to read',
            placeholder='e.g. A story about a boy who ...',
        )
        category = gr.Dropdown(
            choices=categories,
            label='Select a category',
            value='All'
        )
        tone = gr.Dropdown(
            choices=tones,
            label='Select an emotional tone',
            value='All'
        )
        btn = gr.Button('Find books')
        
    gr.Markdown('## Recommendations')
    output = gr.Gallery(
        label='Recommended Books',
        columns=8, rows=2
    )
    
    btn.click(
        fn=recommend,
        inputs=[user_query, category, tone],
        outputs=output
    )
    
if __name__ == '__main__':
    dashboard.launch(share=True)