trttung1610 commited on
Commit
d45cbcb
1 Parent(s): e11c8d1

Upload 2 files

Browse files
Files changed (2) hide show
  1. main_v2_en.py +206 -0
  2. requirements.txt +9 -0
main_v2_en.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import RobertaForSequenceClassification, AutoTokenizer, pipeline
2
+ import torch
3
+ import nltk
4
+ import docx2txt
5
+ import pandas as pd
6
+ import os
7
+ import matplotlib.pyplot as plt
8
+ import openpyxl
9
+ from openpyxl.styles import Font, Color, PatternFill
10
+ from openpyxl.styles.colors import WHITE
11
+ import gradio as gr
12
+
13
+ nltk.download('punkt')
14
+
15
+ # Load the model and tokenizer
16
+ senti_model = RobertaForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
17
+ senti_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest", use_fast=False)
18
+
19
+ # File read
20
+ def read_file(docx):
21
+ try:
22
+ text = docx2txt.process(docx)
23
+ lines = text.split('\n')
24
+ lines = [line.strip() for line in lines]
25
+ lines = [line for line in lines if line]
26
+ return lines # add this line
27
+ except Exception as e:
28
+ print(f"Error reading file: {e}")
29
+
30
+
31
+ # Define a function to analyze the sentiment of a text
32
+ def analyze(sentence):
33
+ input_ids = torch.tensor([senti_tokenizer.encode(sentence)])
34
+ with torch.no_grad():
35
+ out = senti_model(input_ids)
36
+ results = out.logits.softmax(dim=-1).tolist()
37
+ return results[0]
38
+
39
+
40
+ def file_analysis(docx):
41
+ # Read the file and segment the sentences
42
+ sentences = read_file(docx)
43
+
44
+ # Analyze the sentiment of each sentence
45
+ results = []
46
+ for sentence in sentences:
47
+ results.append(analyze(sentence))
48
+
49
+ return results
50
+
51
+
52
+ def generate_pie_chart(df):
53
+ # Calculate the average scores
54
+ neg_avg = df['Negative'].mean()
55
+ pos_avg = df['Positive'].mean()
56
+ neu_avg = df['Neutral'].mean()
57
+
58
+ # Create a new DataFrame with the average scores
59
+ avg_df = pd.DataFrame({'Sentiment': ['Negative', 'Neutral', 'Positive'],
60
+ 'Score': [neg_avg, neu_avg, pos_avg]})
61
+
62
+ # Set custom colors for the pie chart
63
+ colors = ['#BDBDBD', '#87CEFA', '#9ACD32']
64
+
65
+ # Create a pie chart showing the average scores
66
+ plt.pie(avg_df['Score'], labels=avg_df['Sentiment'], colors=colors, autopct='%1.1f%%')
67
+ plt.title('Average Scores by Sentiment')
68
+
69
+ # Save the pie chart as an image file in the static folder
70
+ pie_chart_name = 'pie_chart.png'
71
+ plt.savefig(pie_chart_name)
72
+ plt.close()
73
+
74
+ return pie_chart_name
75
+
76
+
77
+ def generate_excel_file(df):
78
+ # Create a new workbook and worksheet
79
+ wb = openpyxl.Workbook()
80
+ ws = wb.active
81
+
82
+ # Add column headers to the worksheet
83
+ headers = ['Negative', 'Neutral', 'Positive', 'Text']
84
+ for col_num, header in enumerate(headers, 1):
85
+ cell = ws.cell(row=1, column=col_num)
86
+ cell.value = header
87
+ cell.font = Font(bold=True)
88
+
89
+ # Set up cell formatting for each sentiment
90
+ fill_dict = {
91
+ 'Negative': PatternFill(start_color='BDBDBD', end_color='BDBDBD', fill_type='solid'),
92
+ 'Positive': PatternFill(start_color='9ACD32', end_color='9ACD32', fill_type='solid'),
93
+ 'Neutral': PatternFill(start_color='87CEFA', end_color='87CEFA', fill_type='solid')
94
+ }
95
+
96
+ # Loop through each row of the input DataFrame and write data to the worksheet
97
+ for row_num, row_data in df.iterrows():
98
+ # Calculate the highest score and corresponding sentiment for this row
99
+ sentiment_cols = ['Negative', 'Neutral', 'Positive']
100
+ scores = [row_data[col] for col in sentiment_cols]
101
+ max_score = max(scores)
102
+ max_index = scores.index(max_score)
103
+ sentiment = sentiment_cols[max_index]
104
+
105
+ # Write the data to the worksheet
106
+ for col_num, col_data in enumerate(row_data, 1):
107
+ cell = ws.cell(row=row_num + 2, column=col_num)
108
+ cell.value = col_data
109
+ if col_num in [1, 2, 3]:
110
+ if col_data == max_score:
111
+ cell.fill = fill_dict[sentiment]
112
+ if col_num == 4:
113
+ fill = fill_dict[sentiment]
114
+ font_color = WHITE if fill.start_color.rgb == 'BDBDBD' else Color('000000')
115
+ cell.fill = fill
116
+ cell.font = Font(color=font_color)
117
+ if col_data == max_score:
118
+ cell.fill = fill_dict[sentiment]
119
+
120
+ # Save the workbook
121
+ excel_file_path = 'result.xlsx'
122
+ wb.save(excel_file_path)
123
+
124
+ return excel_file_path
125
+
126
+
127
+ def process_file(docx):
128
+ # Perform analysis on the file
129
+ results = file_analysis(docx)
130
+
131
+ # Create a DataFrame from the results
132
+ df = pd.DataFrame(results, columns=['Negative', 'Neutral', 'Positive'])
133
+ df['Text'] = read_file(docx)
134
+
135
+ # Generate the pie chart and excel file
136
+ pie_chart_name = generate_pie_chart(df)
137
+ excel_file_path = generate_excel_file(df)
138
+
139
+ return pie_chart_name, excel_file_path
140
+
141
+ def analyze_file(file, sentence):
142
+ if file and sentence:
143
+ # Both file and sentence inputs are provided
144
+ # Process the uploaded file and generate the output files
145
+ pie_chart_name, excel_file_path = process_file(file.name)
146
+
147
+ # Analyze the sentiment of the input sentence
148
+ results = analyze(sentence)
149
+
150
+ # Get the label names
151
+ label_names = ['Negative', 'Neutral', 'Positive']
152
+
153
+ # Create the output text with labels and scores
154
+ output_text = ""
155
+ for label, score in zip(label_names, results):
156
+ score_formatted = "{:.2f}".format(score)
157
+ output_text += f"{label}: {score_formatted}\n"
158
+
159
+ return excel_file_path, pie_chart_name, output_text
160
+
161
+ elif sentence:
162
+ # Only sentence input is provided
163
+ # Analyze the sentiment of the input sentence
164
+ results = analyze(sentence)
165
+
166
+ # Get the label names
167
+ label_names = ['Negative', 'Neutral', 'Positive']
168
+
169
+ # Create the output text with labels and scores
170
+ output_text = ""
171
+ for label, score in zip(label_names, results):
172
+ score_formatted = "{:.2f}".format(score)
173
+ output_text += f"{label}: {score_formatted}\n"
174
+
175
+ return None, None, output_text
176
+ elif file:
177
+ # Only file input is provided
178
+ # Process the uploaded file and generate the output files
179
+ pie_chart_name, excel_file_path = process_file(file.name)
180
+
181
+ # Return the file paths for the pie chart and excel file
182
+ return excel_file_path, pie_chart_name, None
183
+
184
+ inputs = [
185
+ gr.inputs.File(label="Select File for Analysis"),
186
+ gr.inputs.Textbox(label="Enter Text")
187
+ ]
188
+ outputs = [
189
+ gr.outputs.File(label="Analysis Result Excel"),
190
+ gr.outputs.Image(type="filepath", label="Analysis Metrics"),
191
+ gr.outputs.Textbox(label="Analysis Result")
192
+ ]
193
+
194
+
195
+
196
+ interface = gr.Interface(
197
+ fn=analyze_file,
198
+ inputs=inputs,
199
+ outputs=outputs,
200
+ title="Sentiment Analysis",
201
+ allow_flagging="never" # Disable flag button
202
+ )
203
+
204
+
205
+ if __name__ == "__main__":
206
+ interface.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ nltk
4
+ python-docx
5
+ pandas
6
+ matplotlib
7
+ openpyxl
8
+ gradio
9
+ scipy