{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
isbn13full_titleauthorscategoriesdescriptionfull_descpublished_yearnum_pagesaverage_ratingratings_countthumbnailfinal_categories
09780002005883GileadMarilynne RobinsonFictionA NOVEL THAT READERS and critics have been eag...9780002005883 A NOVEL THAT READERS and critics...2004.0247.03.85361.0http://books.google.com/books/content?id=KQZCP...Fiction
19780002261982Spider's Web: A NovelCharles Osborne;Agatha ChristieDetective and mystery storiesA new 'Christie for Christmas' -- a full-lengt...9780002261982 A new 'Christie for Christmas' -...2000.0241.03.835164.0http://books.google.com/books/content?id=gA5GP...Fiction
29780006178736Rage of angelsSidney SheldonFictionA memorable, mesmerizing heroine Jennifer -- b...9780006178736 A memorable, mesmerizing heroine...1993.0512.03.9329532.0http://books.google.com/books/content?id=FKo2T...Fiction
39780006280897The Four LovesClive Staples LewisChristian lifeLewis' work on the nature of love divides love...9780006280897 Lewis' work on the nature of lov...2002.0170.04.1533684.0http://books.google.com/books/content?id=XhQ5X...Nonfiction
49780006280934The Problem of PainClive Staples LewisChristian life\"In The Problem of Pain, C.S. Lewis, one of th...9780006280934 \"In The Problem of Pain, C.S. Le...2002.0176.04.0937569.0http://books.google.com/books/content?id=Kk-uV...Nonfiction
\n", "
" ], "text/plain": [ " isbn13 full_title authors \\\n", "0 9780002005883 Gilead Marilynne Robinson \n", "1 9780002261982 Spider's Web: A Novel Charles Osborne;Agatha Christie \n", "2 9780006178736 Rage of angels Sidney Sheldon \n", "3 9780006280897 The Four Loves Clive Staples Lewis \n", "4 9780006280934 The Problem of Pain Clive Staples Lewis \n", "\n", " categories \\\n", "0 Fiction \n", "1 Detective and mystery stories \n", "2 Fiction \n", "3 Christian life \n", "4 Christian life \n", "\n", " description \\\n", "0 A NOVEL THAT READERS and critics have been eag... \n", "1 A new 'Christie for Christmas' -- a full-lengt... \n", "2 A memorable, mesmerizing heroine Jennifer -- b... \n", "3 Lewis' work on the nature of love divides love... \n", "4 \"In The Problem of Pain, C.S. Lewis, one of th... \n", "\n", " full_desc published_year \\\n", "0 9780002005883 A NOVEL THAT READERS and critics... 2004.0 \n", "1 9780002261982 A new 'Christie for Christmas' -... 2000.0 \n", "2 9780006178736 A memorable, mesmerizing heroine... 1993.0 \n", "3 9780006280897 Lewis' work on the nature of lov... 2002.0 \n", "4 9780006280934 \"In The Problem of Pain, C.S. Le... 2002.0 \n", "\n", " num_pages average_rating ratings_count \\\n", "0 247.0 3.85 361.0 \n", "1 241.0 3.83 5164.0 \n", "2 512.0 3.93 29532.0 \n", "3 170.0 4.15 33684.0 \n", "4 176.0 4.09 37569.0 \n", "\n", " thumbnail final_categories \n", "0 http://books.google.com/books/content?id=KQZCP... Fiction \n", "1 http://books.google.com/books/content?id=gA5GP... Fiction \n", "2 http://books.google.com/books/content?id=FKo2T... Fiction \n", "3 http://books.google.com/books/content?id=XhQ5X... Nonfiction \n", "4 http://books.google.com/books/content?id=Kk-uV... Nonfiction " ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "books = pd.read_csv('data/books_with_categories.csv')\n", "books.head()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Device set to use cuda:0\n" ] } ], "source": [ "import torch\n", "from transformers import pipeline\n", "\n", "classifier = pipeline('text-classification', model='j-hartmann/emotion-english-distilroberta-base',\n", " top_k=None,\n", " device=0 if torch.cuda.is_available() else -1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives\n", "{'label': 'surprise', 'score': 0.7296027541160583}\n", "--------------------------------\n", "\n", "John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers\n", "{'label': 'neutral', 'score': 0.4662497639656067}\n", "--------------------------------\n", "\n", "It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up\n", "{'label': 'neutral', 'score': 0.6978469491004944}\n", "--------------------------------\n", "\n", "Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist\n", "{'label': 'fear', 'score': 0.9839729070663452}\n", "--------------------------------\n", "\n", "He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption\n", "{'label': 'sadness', 'score': 0.9560651183128357}\n", "--------------------------------\n", "\n", "Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gilead is a song of celebration and acceptance of the best and the worst the world has to offer\n", "{'label': 'joy', 'score': 0.9490270018577576}\n", "--------------------------------\n", "\n", "At its heart is a tale of the sacred bonds between fathers and sons, pitch-perfect in style and story, set to dazzle critics and readers alike\n", "{'label': 'joy', 'score': 0.6701961159706116}\n", "--------------------------------\n", "\n" ] } ], "source": [ "sentences = [s.strip() for s in books['description'][0].split('.') if s.strip()]\n", "emotions = classifier(sentences)\n", "\n", "for sentence, emotion in zip(sentences, emotions):\n", " print(sentence)\n", " print(emotion[0])\n", " print('--------------------------------\\n')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "emotion_labels = sorted(['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])\n", "\n", "def calculate_book_emotion_scores(emotions):\n", " sentence_emotion_scores = {label: [] for label in emotion_labels}\n", " \n", " for emotion in emotions:\n", " emotion = sorted(emotion, key=lambda x: x['label'])\n", " \n", " for idx, label in enumerate(emotion_labels):\n", " sentence_emotion_scores[label].append(emotion[idx]['score'])\n", " \n", " return {label: np.max(scores) for label, scores in sentence_emotion_scores.items()} # Note: Try to use both max and mean later" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 5197/5197 [02:51<00:00, 30.27it/s]\n" ] } ], "source": [ "from tqdm import tqdm\n", "\n", "isbns = []\n", "emotion_scores = {label: [] for label in emotion_labels}\n", "\n", "for i in tqdm(range(len(books))):\n", " isbns.append(books['isbn13'][i])\n", " sentences = books['description'][i].split('.')\n", " emotions = classifier(sentences)\n", " max_emotion_scores = calculate_book_emotion_scores(emotions)\n", " for label in emotion_labels:\n", " emotion_scores[label].append(max_emotion_scores[label])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
angerdisgustfearjoyneutralsadnesssurpriseisbn13
00.0641340.2735910.9281680.9327970.6462170.9671580.7296039780002005883
10.6126190.3482840.9425280.7044210.8879400.1116900.2525459780002261982
20.0641340.1040070.9723210.7672370.5494770.1116900.0787669780006178736
30.3514830.1507220.3607070.2518810.7326860.1116900.0787669780006280897
40.0814120.1844950.0950430.0405640.8843900.4758810.0787669780006280934
\n", "
" ], "text/plain": [ " anger disgust fear joy neutral sadness surprise \\\n", "0 0.064134 0.273591 0.928168 0.932797 0.646217 0.967158 0.729603 \n", "1 0.612619 0.348284 0.942528 0.704421 0.887940 0.111690 0.252545 \n", "2 0.064134 0.104007 0.972321 0.767237 0.549477 0.111690 0.078766 \n", "3 0.351483 0.150722 0.360707 0.251881 0.732686 0.111690 0.078766 \n", "4 0.081412 0.184495 0.095043 0.040564 0.884390 0.475881 0.078766 \n", "\n", " isbn13 \n", "0 9780002005883 \n", "1 9780002261982 \n", "2 9780006178736 \n", "3 9780006280897 \n", "4 9780006280934 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "emotions_df = pd.DataFrame(emotion_scores)\n", "emotions_df['isbn13'] = isbns\n", "emotions_df.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
isbn13full_titleauthorscategoriesdescriptionfull_descpublished_yearnum_pagesaverage_ratingratings_countthumbnailfinal_categoriesangerdisgustfearjoyneutralsadnesssurprise
09780002005883GileadMarilynne RobinsonFictionA NOVEL THAT READERS and critics have been eag...9780002005883 A NOVEL THAT READERS and critics...2004.0247.03.85361.0http://books.google.com/books/content?id=KQZCP...Fiction0.0641340.2735910.9281680.9327970.6462170.9671580.729603
19780002261982Spider's Web: A NovelCharles Osborne;Agatha ChristieDetective and mystery storiesA new 'Christie for Christmas' -- a full-lengt...9780002261982 A new 'Christie for Christmas' -...2000.0241.03.835164.0http://books.google.com/books/content?id=gA5GP...Fiction0.6126190.3482840.9425280.7044210.8879400.1116900.252545
29780006178736Rage of angelsSidney SheldonFictionA memorable, mesmerizing heroine Jennifer -- b...9780006178736 A memorable, mesmerizing heroine...1993.0512.03.9329532.0http://books.google.com/books/content?id=FKo2T...Fiction0.0641340.1040070.9723210.7672370.5494770.1116900.078766
39780006280897The Four LovesClive Staples LewisChristian lifeLewis' work on the nature of love divides love...9780006280897 Lewis' work on the nature of lov...2002.0170.04.1533684.0http://books.google.com/books/content?id=XhQ5X...Nonfiction0.3514830.1507220.3607070.2518810.7326860.1116900.078766
49780006280934The Problem of PainClive Staples LewisChristian life\"In The Problem of Pain, C.S. Lewis, one of th...9780006280934 \"In The Problem of Pain, C.S. Le...2002.0176.04.0937569.0http://books.google.com/books/content?id=Kk-uV...Nonfiction0.0814120.1844950.0950430.0405640.8843900.4758810.078766
\n", "
" ], "text/plain": [ " isbn13 full_title authors \\\n", "0 9780002005883 Gilead Marilynne Robinson \n", "1 9780002261982 Spider's Web: A Novel Charles Osborne;Agatha Christie \n", "2 9780006178736 Rage of angels Sidney Sheldon \n", "3 9780006280897 The Four Loves Clive Staples Lewis \n", "4 9780006280934 The Problem of Pain Clive Staples Lewis \n", "\n", " categories \\\n", "0 Fiction \n", "1 Detective and mystery stories \n", "2 Fiction \n", "3 Christian life \n", "4 Christian life \n", "\n", " description \\\n", "0 A NOVEL THAT READERS and critics have been eag... \n", "1 A new 'Christie for Christmas' -- a full-lengt... \n", "2 A memorable, mesmerizing heroine Jennifer -- b... \n", "3 Lewis' work on the nature of love divides love... \n", "4 \"In The Problem of Pain, C.S. Lewis, one of th... \n", "\n", " full_desc published_year \\\n", "0 9780002005883 A NOVEL THAT READERS and critics... 2004.0 \n", "1 9780002261982 A new 'Christie for Christmas' -... 2000.0 \n", "2 9780006178736 A memorable, mesmerizing heroine... 1993.0 \n", "3 9780006280897 Lewis' work on the nature of lov... 2002.0 \n", "4 9780006280934 \"In The Problem of Pain, C.S. Le... 2002.0 \n", "\n", " num_pages average_rating ratings_count \\\n", "0 247.0 3.85 361.0 \n", "1 241.0 3.83 5164.0 \n", "2 512.0 3.93 29532.0 \n", "3 170.0 4.15 33684.0 \n", "4 176.0 4.09 37569.0 \n", "\n", " thumbnail final_categories \\\n", "0 http://books.google.com/books/content?id=KQZCP... Fiction \n", "1 http://books.google.com/books/content?id=gA5GP... Fiction \n", "2 http://books.google.com/books/content?id=FKo2T... Fiction \n", "3 http://books.google.com/books/content?id=XhQ5X... Nonfiction \n", "4 http://books.google.com/books/content?id=Kk-uV... Nonfiction \n", "\n", " anger disgust fear joy neutral sadness surprise \n", "0 0.064134 0.273591 0.928168 0.932797 0.646217 0.967158 0.729603 \n", "1 0.612619 0.348284 0.942528 0.704421 0.887940 0.111690 0.252545 \n", "2 0.064134 0.104007 0.972321 0.767237 0.549477 0.111690 0.078766 \n", "3 0.351483 0.150722 0.360707 0.251881 0.732686 0.111690 0.078766 \n", "4 0.081412 0.184495 0.095043 0.040564 0.884390 0.475881 0.078766 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books_with_emotions = pd.merge(books, emotions_df, on='isbn13')\n", "\n", "books_with_emotions.to_csv('data/books_with_emotions.csv', index=False)\n", "books_with_emotions.head()" ] } ], "metadata": { "kernelspec": { "display_name": "book_rcm", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 2 }