In [1]:
import pandas as pd

books = pd.read_csv('data/books_with_categories.csv')
books.head()

Unnamed: 0,isbn13,full_title,authors,categories,description,full_desc,published_year,num_pages,average_rating,ratings_count,thumbnail,final_categories
0,9780002005883,Gilead,Marilynne Robinson,Fiction,A NOVEL THAT READERS and critics have been eag...,9780002005883 A NOVEL THAT READERS and critics...,2004.0,247.0,3.85,361.0,http://books.google.com/books/content?id=KQZCP...,Fiction
1,9780002261982,Spider's Web: A Novel,Charles Osborne;Agatha Christie,Detective and mystery stories,A new 'Christie for Christmas' -- a full-lengt...,9780002261982 A new 'Christie for Christmas' -...,2000.0,241.0,3.83,5164.0,http://books.google.com/books/content?id=gA5GP...,Fiction
2,9780006178736,Rage of angels,Sidney Sheldon,Fiction,"A memorable, mesmerizing heroine Jennifer -- b...","9780006178736 A memorable, mesmerizing heroine...",1993.0,512.0,3.93,29532.0,http://books.google.com/books/content?id=FKo2T...,Fiction
3,9780006280897,The Four Loves,Clive Staples Lewis,Christian life,Lewis' work on the nature of love divides love...,9780006280897 Lewis' work on the nature of lov...,2002.0,170.0,4.15,33684.0,http://books.google.com/books/content?id=XhQ5X...,Nonfiction
4,9780006280934,The Problem of Pain,Clive Staples Lewis,Christian life,"""In The Problem of Pain, C.S. Lewis, one of th...","9780006280934 ""In The Problem of Pain, C.S. Le...",2002.0,176.0,4.09,37569.0,http://books.google.com/books/content?id=Kk-uV...,Nonfiction


In [2]:
import torch
from transformers import pipeline

classifier = pipeline('text-classification', model='j-hartmann/emotion-english-distilroberta-base',
                      top_k=None,
                      device=0 if torch.cuda.is_available() else -1)

Device set to use cuda:0


In [3]:
sentences  = [s.strip() for s in books['description'][0].split('.') if s.strip()]
emotions   = classifier(sentences)

for sentence, emotion in zip(sentences, emotions):
    print(sentence)
    print(emotion[0])
    print('--------------------------------\n')

A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives
{'label': 'surprise', 'score': 0.7296027541160583}
--------------------------------

John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers
{'label': 'neutral', 'score': 0.4662497639656067}
--------------------------------

It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up
{'label': 'neutral', 'score': 0.6978469491004944}
--------------------------------

Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist
{'label': 'fear', 'score': 0.9839729070663452}
--------------------------------

He is troubled, too, by his prodiga

In [4]:
import numpy as np

emotion_labels = sorted(['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])

def calculate_book_emotion_scores(emotions):
    sentence_emotion_scores = {label: [] for label in emotion_labels}
    
    for emotion in emotions:
        emotion = sorted(emotion, key=lambda x: x['label'])
        
        for idx, label in enumerate(emotion_labels):
            sentence_emotion_scores[label].append(emotion[idx]['score'])
            
    return {label: np.max(scores) for label, scores in sentence_emotion_scores.items()} # Note: Try to use both max and mean later

In [6]:
from tqdm import tqdm

isbns          = []
emotion_scores = {label: [] for label in emotion_labels}

for i in tqdm(range(len(books))):
    isbns.append(books['isbn13'][i])
    sentences = books['description'][i].split('.')
    emotions  = classifier(sentences)
    max_emotion_scores = calculate_book_emotion_scores(emotions)
    for label in emotion_labels:
        emotion_scores[label].append(max_emotion_scores[label])

100%|██████████| 5197/5197 [02:51<00:00, 30.27it/s]


In [7]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df['isbn13'] = isbns
emotions_df.head()

Unnamed: 0,anger,disgust,fear,joy,neutral,sadness,surprise,isbn13
0,0.064134,0.273591,0.928168,0.932797,0.646217,0.967158,0.729603,9780002005883
1,0.612619,0.348284,0.942528,0.704421,0.88794,0.11169,0.252545,9780002261982
2,0.064134,0.104007,0.972321,0.767237,0.549477,0.11169,0.078766,9780006178736
3,0.351483,0.150722,0.360707,0.251881,0.732686,0.11169,0.078766,9780006280897
4,0.081412,0.184495,0.095043,0.040564,0.88439,0.475881,0.078766,9780006280934


In [8]:
books_with_emotions = pd.merge(books, emotions_df, on='isbn13')

books_with_emotions.to_csv('data/books_with_emotions.csv', index=False)
books_with_emotions.head()

Unnamed: 0,isbn13,full_title,authors,categories,description,full_desc,published_year,num_pages,average_rating,ratings_count,thumbnail,final_categories,anger,disgust,fear,joy,neutral,sadness,surprise
0,9780002005883,Gilead,Marilynne Robinson,Fiction,A NOVEL THAT READERS and critics have been eag...,9780002005883 A NOVEL THAT READERS and critics...,2004.0,247.0,3.85,361.0,http://books.google.com/books/content?id=KQZCP...,Fiction,0.064134,0.273591,0.928168,0.932797,0.646217,0.967158,0.729603
1,9780002261982,Spider's Web: A Novel,Charles Osborne;Agatha Christie,Detective and mystery stories,A new 'Christie for Christmas' -- a full-lengt...,9780002261982 A new 'Christie for Christmas' -...,2000.0,241.0,3.83,5164.0,http://books.google.com/books/content?id=gA5GP...,Fiction,0.612619,0.348284,0.942528,0.704421,0.88794,0.11169,0.252545
2,9780006178736,Rage of angels,Sidney Sheldon,Fiction,"A memorable, mesmerizing heroine Jennifer -- b...","9780006178736 A memorable, mesmerizing heroine...",1993.0,512.0,3.93,29532.0,http://books.google.com/books/content?id=FKo2T...,Fiction,0.064134,0.104007,0.972321,0.767237,0.549477,0.11169,0.078766
3,9780006280897,The Four Loves,Clive Staples Lewis,Christian life,Lewis' work on the nature of love divides love...,9780006280897 Lewis' work on the nature of lov...,2002.0,170.0,4.15,33684.0,http://books.google.com/books/content?id=XhQ5X...,Nonfiction,0.351483,0.150722,0.360707,0.251881,0.732686,0.11169,0.078766
4,9780006280934,The Problem of Pain,Clive Staples Lewis,Christian life,"""In The Problem of Pain, C.S. Lewis, one of th...","9780006280934 ""In The Problem of Pain, C.S. Le...",2002.0,176.0,4.09,37569.0,http://books.google.com/books/content?id=Kk-uV...,Nonfiction,0.081412,0.184495,0.095043,0.040564,0.88439,0.475881,0.078766
