Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -142,42 +142,93 @@ def fDistance(text2Party):
|
|
| 142 |
mem[x[0]]=x[1]
|
| 143 |
return normalize(mem)
|
| 144 |
|
| 145 |
-
def fDistancePlot(text2Party,plotN=
|
| 146 |
'''
|
| 147 |
-
most frequent words
|
| 148 |
'''
|
| 149 |
word_tokens_party = word_tokenize(text2Party) #Tokenizing
|
| 150 |
fdistance = FreqDist(word_tokens_party)
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
|
| 154 |
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
def analysis(Manifesto,Search):
|
| 158 |
raw_party = Parsing(Manifesto)
|
| 159 |
text_Party=clean_text(raw_party)
|
| 160 |
text_Party= Preprocess(text_Party)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
fdist_Party=fDistance(text_Party)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
searchRes=concordance(text_Party,Search)
|
| 163 |
searChRes=clean(searchRes)
|
| 164 |
-
# searChRes=searchRes.replace(Search,f"\u0332{Search}\u0332 ")
|
| 165 |
searChRes=searchRes.replace(Search,"\u0332".join(Search))
|
| 166 |
-
return fdist_Party,
|
| 167 |
|
| 168 |
|
| 169 |
Search_txt=gr.inputs.Textbox()
|
| 170 |
filePdf = gr.inputs.File()
|
| 171 |
text = gr.outputs.Textbox(label='SEARCHED OUTPUT')
|
| 172 |
-
mfw=gr.outputs.Label(label="Most Relevant
|
| 173 |
-
|
| 174 |
-
gr.
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
|
| 182 |
|
| 183 |
|
|
|
|
| 142 |
mem[x[0]]=x[1]
|
| 143 |
return normalize(mem)
|
| 144 |
|
| 145 |
+
def fDistancePlot(text2Party,plotN=30):
|
| 146 |
'''
|
| 147 |
+
most frequent words visualization
|
| 148 |
'''
|
| 149 |
word_tokens_party = word_tokenize(text2Party) #Tokenizing
|
| 150 |
fdistance = FreqDist(word_tokens_party)
|
| 151 |
+
plt.figure(figsize=(4,6))
|
| 152 |
+
fdistance.plot(plotN)
|
| 153 |
+
plt.savefig('distplot.png')
|
| 154 |
+
plt.clf()
|
| 155 |
|
| 156 |
|
| 157 |
|
| 158 |
+
def getSubjectivity(text):
|
| 159 |
+
return TextBlob(text).sentiment.subjectivity
|
| 160 |
+
|
| 161 |
+
# Create a function to get the polarity
|
| 162 |
+
def getPolarity(text):
|
| 163 |
+
return TextBlob(text).sentiment.polarity
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def getAnalysis(score):
|
| 167 |
+
if score < 0:
|
| 168 |
+
return 'Negative'
|
| 169 |
+
elif score == 0:
|
| 170 |
+
return 'Neutral'
|
| 171 |
+
else:
|
| 172 |
+
return 'Positive'
|
| 173 |
+
|
| 174 |
+
|
| 175 |
|
| 176 |
def analysis(Manifesto,Search):
|
| 177 |
raw_party = Parsing(Manifesto)
|
| 178 |
text_Party=clean_text(raw_party)
|
| 179 |
text_Party= Preprocess(text_Party)
|
| 180 |
+
|
| 181 |
+
df = pd.DataFrame(raw_party.split('\n'), columns=['Content'])
|
| 182 |
+
df['Subjectivity'] = df['Content'].apply(getSubjectivity)
|
| 183 |
+
df['Polarity'] = df['Content'].apply(getPolarity)
|
| 184 |
+
df['Analysis on Polarity'] = df['Polarity'].apply(getAnalysis)
|
| 185 |
+
df['Analysis on Subjectivity'] = df['Subjectivity'].apply(getAnalysis)
|
| 186 |
+
plt.title('Sentiment Analysis')
|
| 187 |
+
plt.xlabel('Sentiment')
|
| 188 |
+
plt.ylabel('Counts')
|
| 189 |
+
plt.figure(figsize=(4,6))
|
| 190 |
+
df['Analysis on Polarity'].value_counts().plot(kind ='bar')
|
| 191 |
+
plt.savefig('sentimentAnalysis.png')
|
| 192 |
+
plt.clf()
|
| 193 |
+
|
| 194 |
+
plt.figure(figsize=(4,6))
|
| 195 |
+
df['Analysis on Subjectivity'].value_counts().plot(kind ='bar')
|
| 196 |
+
plt.savefig('sentimentAnalysis2.png')
|
| 197 |
+
plt.clf()
|
| 198 |
+
|
| 199 |
+
wordcloud = WordCloud(max_words=2000, background_color="white",mode="RGB").generate(text_Party)
|
| 200 |
+
plt.figure(figsize=(4,3))
|
| 201 |
+
plt.imshow(wordcloud, interpolation="bilinear")
|
| 202 |
+
plt.axis("off")
|
| 203 |
+
plt.savefig('wordcloud.png')
|
| 204 |
+
plt.clf()
|
| 205 |
+
|
| 206 |
fdist_Party=fDistance(text_Party)
|
| 207 |
+
fDistancePlot(text_Party)
|
| 208 |
+
|
| 209 |
+
img1=cv2.imread('../sentimentAnalysis.png')
|
| 210 |
+
img2=cv2.imread('../wordcloud.png')
|
| 211 |
+
img3=cv2.imread('../sentimentAnalysis2.png')
|
| 212 |
+
img4=cv2.imread('../distplot.png')
|
| 213 |
+
|
| 214 |
searchRes=concordance(text_Party,Search)
|
| 215 |
searChRes=clean(searchRes)
|
|
|
|
| 216 |
searChRes=searchRes.replace(Search,"\u0332".join(Search))
|
| 217 |
+
return searChRes,fdist_Party,img4,img1,img2,img3
|
| 218 |
|
| 219 |
|
| 220 |
Search_txt=gr.inputs.Textbox()
|
| 221 |
filePdf = gr.inputs.File()
|
| 222 |
text = gr.outputs.Textbox(label='SEARCHED OUTPUT')
|
| 223 |
+
mfw=gr.outputs.Label(label="Most Relevant Topics")
|
| 224 |
+
# mfw2=gr.outputs.Image(label="Most Relevant Topics Plot")
|
| 225 |
+
plot1=gr.outputs. Image(label='Sentiment Analysis')
|
| 226 |
+
plot2=gr.outputs.Image(label='Word Cloud')
|
| 227 |
+
plot3=gr.outputs.Image(label='Subjectivity')
|
| 228 |
+
plot4=gr.outputs.Image(label='Frequency Distribution')
|
| 229 |
+
|
| 230 |
+
io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot4,plot1,plot2,plot3], title='Manifesto Analysis')
|
| 231 |
+
io.launch(debug=False,share=True)
|
| 232 |
|
| 233 |
|
| 234 |
|