luthrabhuvan commited on
Commit
93c1744
·
verified ·
1 Parent(s): 65814e3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.feature_extraction.text import TfidfVectorizer
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ import gradio as gr
5
+
6
+ class BookRecommender:
7
+ def __init__(self):
8
+ self.df = None
9
+ self.similarity_matrix = None
10
+
11
+ def load_data(self, filepath):
12
+ try:
13
+ if filepath.endswith('.csv'):
14
+ df = pd.read_csv(filepath)
15
+ elif filepath.endswith(('.xls', '.xlsx')):
16
+ df = pd.read_excel(filepath)
17
+ else:
18
+ raise ValueError("Unsupported file format. Please provide a CSV or Excel file.")
19
+ return df
20
+ except FileNotFoundError:
21
+ raise FileNotFoundError(f"File not found at {filepath}")
22
+ except ValueError as e:
23
+ raise ValueError(f"Error loading data: {e}")
24
+ except Exception as e:
25
+ raise Exception(f"Error loading data: {e}")
26
+
27
+ def preprocess_data(self, df, summary_column='summary', title_column='title'):
28
+ if df[summary_column].isnull().any():
29
+ df[summary_column] = df[summary_column].fillna('')
30
+ print("Handled missing values in summary column.")
31
+
32
+ if df[title_column].isnull().any():
33
+ df[title_column] = df[title_column].fillna('')
34
+ print("Handled missing values in title column.")
35
+
36
+ df = df.drop_duplicates(subset=[title_column, summary_column], keep='first')
37
+ print("Removed duplicate rows.")
38
+
39
+ df = df[~(df[title_column] == '') | (df[summary_column] == '')]
40
+ print("Removed rows with blank title and summary.")
41
+
42
+ return df
43
+
44
+ def create_tfidf_matrix(self, df, summary_column='summary'):
45
+ tfidf = TfidfVectorizer(stop_words='english')
46
+ tfidf_matrix = tfidf.fit_transform(df[summary_column])
47
+ return tfidf_matrix, tfidf
48
+
49
+ def calculate_similarity(self, tfidf_matrix):
50
+ similarity_matrix = cosine_similarity(tfidf_matrix)
51
+ return similarity_matrix
52
+
53
+ def recommend_books(self, book_title):
54
+ try:
55
+ book_index = self.df[self.df['title'] == book_title].index[0]
56
+ except IndexError:
57
+ return "Book title not found."
58
+ except Exception as e:
59
+ return f"An error occurred: {e}"
60
+
61
+ similar_books_indices = self.similarity_matrix[book_index].argsort()[::-1][1:6] # Fixed top_n to 5
62
+ recommended_books = self.df['title'].iloc[similar_books_indices].tolist()
63
+ return recommended_books
64
+
65
+ def create_interface(self):
66
+ def upload_and_process(file_obj):
67
+ if file_obj is None:
68
+ return "Please upload a file first.", None
69
+ filepath = file_obj.name
70
+ try:
71
+ self.df = self.load_data(filepath)
72
+ self.df = self.preprocess_data(self.df)
73
+ tfidf_matrix, _ = self.create_tfidf_matrix(self.df)
74
+ self.similarity_matrix = self.calculate_similarity(tfidf_matrix)
75
+ return "File uploaded and processed successfully!", gr.update(interactive=True)
76
+ except Exception as e:
77
+ return f"Error: {e}", None
78
+
79
+ def recommend_book_interface(book_title):
80
+ if self.df is None or self.similarity_matrix is None:
81
+ return "Please upload and process a file first."
82
+
83
+ recommendations = self.recommend_books(book_title)
84
+ formatted_recommendations = [[rec] for rec in recommendations]
85
+ return formatted_recommendations
86
+
87
+ with gr.Blocks() as iface:
88
+ file_output = gr.File(label="Upload CSV or Excel file", file_types=[".csv", ".xls", ".xlsx"])
89
+ process_button = gr.Button("Process File")
90
+ status_text = gr.Textbox(label="Status")
91
+ text_input = gr.Textbox(lines=1, placeholder="Enter book title", interactive=False)
92
+ output_list = gr.List(label="Recommended Books")
93
+
94
+ process_button.click(upload_and_process, inputs=file_output, outputs=[status_text, text_input])
95
+ text_input.change(recommend_book_interface, inputs=text_input, outputs=output_list)
96
+
97
+ return iface # Correct indentation here
98
+
99
+ if __name__ == '__main__':
100
+ recommender = BookRecommender()
101
+ interface = recommender.create_interface()
102
+ interface.launch()