Spaces:

manish72
/

Disaster_tweet_sentiment_Analysis

Sleeping

App Files Files Community

Disaster_tweet_sentiment_Analysis / app.py

manish72

Upload 10 files

0bd26c3 verified over 1 year ago

raw

history blame contribute delete

8.53 kB

	# importing Libraries

	import streamlit as st
	import PIL
	from PIL import Image
	import tensorflow as tf
	from nltk.stem import WordNetLemmatizer
	from nltk.tokenize import RegexpTokenizer
	import re
	import string
	import numpy as np
	import pandas as pd
	import nltk

	try: # Check if wordnet is installed
	nltk.find("corpora/wordnet.zip")
	except LookupError:
	nltk.download('wordnet')

	# ----------------------------------------------------------------------------------
	# read files
	try:
	acronyms_dict, contractions_dict, stops
	except NameError:
	acronyms_dict = pd.read_json("helper/acronym.json", typ = "series")
	contractions_dict = pd.read_json("helper/contractions.json", typ = "series")
	stops = list(pd.read_csv('helper/stopwords.csv').values.flatten())

	# ----------------------------------------------------------------------------------
	# Defining tokenizer
	regexp = RegexpTokenizer("[\w']+")

	# preprocess Function
	def preprocess(text):

	text = text.lower() # lowercase
	text = text.strip() # whitespaces

	# Removing html tags
	html = re.compile(r'<.*?>')
	text = html.sub(r'', text) # html tags

	# Removing emoji patterns
	emoji_pattern = re.compile("["
	u"\U0001F600-\U0001F64F" # emoticons
	u"\U0001F300-\U0001F5FF" # symbols & pictographs
	u"\U0001F680-\U0001F6FF" # transport & map symbols
	u"\U0001F1E0-\U0001F1FF" # flags (iOS)
	u"\U00002702-\U000027B0"
	u"\U000024C2-\U0001F251"
	"]+", flags = re.UNICODE)
	text = emoji_pattern.sub(r'', text) # unicode char

	# Removing urls
	http = "https?://\S+\|www\.\S+" # matching strings beginning with http (but not just "http")
	pattern = r"({})".format(http) # creating pattern
	text = re.sub(pattern, "", text) # remove urls

	# Removing twitter usernames
	pattern = r'@[\w_]+'
	text = re.sub(pattern, "", text) # remove @twitter usernames

	# Removing punctuations and numbers
	punct_str = string.punctuation + string.digits
	punct_str = punct_str.replace("'", "")
	punct_str = punct_str.replace("-", "")
	text = text.translate(str.maketrans('', '', punct_str)) # punctuation and numbers

	# Replacing "-" in text with empty space
	text = text.replace("-", " ") # "-"

	# Substituting acronyms
	words = []
	for word in regexp.tokenize(text):
	if word in acronyms_dict.index:
	words = words + acronyms_dict[word].split()
	else:
	words = words + word.split()
	text = ' '.join(words) # acronyms

	# Substituting Contractions
	words = []
	for word in regexp.tokenize(text):
	if word in contractions_dict.index:
	words = words + contractions_dict[word].split()
	else:
	words = words + word.split()
	text = " ".join(words) # contractions

	punct_str = string.punctuation
	text = text.translate(str.maketrans('', '', punct_str)) # punctuation again to remove "'"

	# lemmatization
	lemmatizer = WordNetLemmatizer()
	text = " ".join([lemmatizer.lemmatize(word) for word in regexp.tokenize(text)]) # lemmatize

	# Stopwords Removal
	text = ' '.join([word for word in regexp.tokenize(text) if word not in stops]) # stopwords

	# Removing all characters except alphabets and " " (space)
	filter = string.ascii_letters + " "
	text = "".join([chr for chr in text if chr in filter]) # remove all characters except alphabets and " " (space)

	# Removing words with one alphabet occuring more than 3 times continuously
	pattern = r'\b\w?(.)\1{2,}\w\b'
	text = re.sub(pattern, "", text).strip() # remove words with one alphabet occuring more than 3 times continuously

	# Removing words with less than 3 characters
	short_words = r'\b\w{1,2}\b'
	text = re.sub(short_words, "", text) # remove words with less than 3 characters

	# return final output
	return text

	# ================================================================================================================================================================
	# STREAMLIT
	# ================================================================================================================================================================

	# App Devolopment Starts
	st.set_page_config(layout="wide")
	st.write("# A Predictive Analysis of Disaster Tweets")

	img = Image.open("images/t2.png")
	st.image(img)

	tweet = st.text_input(label = "Type or paste your tweet here", value = "")

	# Defining a function to store the model in streamlit cache memory
	@st.cache_resource
	def cache_model(model_name):
	model = tf.keras.models.load_model(model_name)
	return model

	model = cache_model("model/tweet_model") #--------------------------- model

	# if user gives any input
	if len(tweet) > 0:
	clean_tweet = preprocess(tweet) # cleans tweet
	y_pred = model.predict([clean_tweet]) # gives probability of class = 1
	y_pred_num = int(np.round(y_pred)[0][0]) # get final prediction of output class

	if y_pred_num == 0:
	# st.write(f"#### Non-Disaster tweet with disaster probability {round(y_pred[0][0]*100, 4)}%")
	st.write(f"#### 🌞🌞This tweet is not flagged as a disaster, but with a probability of {round(y_pred[0][0]*100, 4)}% that it might be. ")
	else:
	st.write(f"#### 🚩🚩High probability ( {round(y_pred[0][0]*100, 4)}%) indicates that this tweet is related to a disaster🚨🚨.")

	# ================================================================================================================================================================
	# -------------------------------------------------------------------- Example of Tweets -----------------------------------------------------------------------
	# ================================================================================================================================================================

	# ---------------------------- Disaster Tweets -------------------------------
	# "🚨 Just felt a strong earthquake! Stay safe everyone! #earthquake #safetyfirst" [93.62]
	# "⚠️ Urgent: Massive wildfire approaching our community. Evacuation orders in effect. Please heed warnings and evacuate immediately. #wildfire #safety" [99.30]
	# "🌪️ Tornado warning in effect for our area. Take shelter now! #tornadowarning #safetyfirst" [92.84]
	# "🌊 Coastal areas under tsunami alert. Seek higher ground immediately! #tsunami #emergencyalert" [99.54]


	# ---------------------------- Non disaster Tweets -------------------------------
	# "Enjoying a peaceful evening with a good book and a cup of tea. #Relaxation" [4.52]
	# "Excited for the weekend! Planning a movie night with friends. 🍿🎬 #FridayFeeling" [3.27]
	# "Just finished a great workout session at the gym. Feeling energized! 💪 #FitnessGoals" [6.17]
	# "Spent the day exploring a new hiking trail. Nature is so beautiful! 🌳 #OutdoorAdventure" [19.44]
	# "Cooked a delicious homemade dinner tonight. #Foodie #HomeChef" [7.1]