# importing Libraries | |
import streamlit as st | |
import PIL | |
from PIL import Image | |
import tensorflow as tf | |
from nltk.stem import WordNetLemmatizer | |
from nltk.tokenize import RegexpTokenizer | |
import re | |
import string | |
import numpy as np | |
import pandas as pd | |
import nltk | |
try: # Check if wordnet is installed | |
nltk.find("corpora/wordnet.zip") | |
except LookupError: | |
nltk.download('wordnet') | |
# ---------------------------------------------------------------------------------- | |
# read files | |
try: | |
acronyms_dict, contractions_dict, stops | |
except NameError: | |
acronyms_dict = pd.read_json("helper/acronym.json", typ = "series") | |
contractions_dict = pd.read_json("helper/contractions.json", typ = "series") | |
stops = list(pd.read_csv('helper/stopwords.csv').values.flatten()) | |
# ---------------------------------------------------------------------------------- | |
# Defining tokenizer | |
regexp = RegexpTokenizer("[\w']+") | |
# preprocess Function | |
def preprocess(text): | |
text = text.lower() # lowercase | |
text = text.strip() # whitespaces | |
# Removing html tags | |
html = re.compile(r'<.*?>') | |
text = html.sub(r'', text) # html tags | |
# Removing emoji patterns | |
emoji_pattern = re.compile("[" | |
u"\U0001F600-\U0001F64F" # emoticons | |
u"\U0001F300-\U0001F5FF" # symbols & pictographs | |
u"\U0001F680-\U0001F6FF" # transport & map symbols | |
u"\U0001F1E0-\U0001F1FF" # flags (iOS) | |
u"\U00002702-\U000027B0" | |
u"\U000024C2-\U0001F251" | |
"]+", flags = re.UNICODE) | |
text = emoji_pattern.sub(r'', text) # unicode char | |
# Removing urls | |
http = "https?://\S+|www\.\S+" # matching strings beginning with http (but not just "http") | |
pattern = r"({})".format(http) # creating pattern | |
text = re.sub(pattern, "", text) # remove urls | |
# Removing twitter usernames | |
pattern = r'@[\w_]+' | |
text = re.sub(pattern, "", text) # remove @twitter usernames | |
# Removing punctuations and numbers | |
punct_str = string.punctuation + string.digits | |
punct_str = punct_str.replace("'", "") | |
punct_str = punct_str.replace("-", "") | |
text = text.translate(str.maketrans('', '', punct_str)) # punctuation and numbers | |
# Replacing "-" in text with empty space | |
text = text.replace("-", " ") # "-" | |
# Substituting acronyms | |
words = [] | |
for word in regexp.tokenize(text): | |
if word in acronyms_dict.index: | |
words = words + acronyms_dict[word].split() | |
else: | |
words = words + word.split() | |
text = ' '.join(words) # acronyms | |
# Substituting Contractions | |
words = [] | |
for word in regexp.tokenize(text): | |
if word in contractions_dict.index: | |
words = words + contractions_dict[word].split() | |
else: | |
words = words + word.split() | |
text = " ".join(words) # contractions | |
punct_str = string.punctuation | |
text = text.translate(str.maketrans('', '', punct_str)) # punctuation again to remove "'" | |
# lemmatization | |
lemmatizer = WordNetLemmatizer() | |
text = " ".join([lemmatizer.lemmatize(word) for word in regexp.tokenize(text)]) # lemmatize | |
# Stopwords Removal | |
text = ' '.join([word for word in regexp.tokenize(text) if word not in stops]) # stopwords | |
# Removing all characters except alphabets and " " (space) | |
filter = string.ascii_letters + " " | |
text = "".join([chr for chr in text if chr in filter]) # remove all characters except alphabets and " " (space) | |
# Removing words with one alphabet occuring more than 3 times continuously | |
pattern = r'\b\w*?(.)\1{2,}\w*\b' | |
text = re.sub(pattern, "", text).strip() # remove words with one alphabet occuring more than 3 times continuously | |
# Removing words with less than 3 characters | |
short_words = r'\b\w{1,2}\b' | |
text = re.sub(short_words, "", text) # remove words with less than 3 characters | |
# return final output | |
return text | |
# ================================================================================================================================================================ | |
# STREAMLIT | |
# ================================================================================================================================================================ | |
# App Devolopment Starts | |
st.set_page_config(layout="wide") | |
st.write("# A Predictive Analysis of Disaster Tweets") | |
img = Image.open("images/t2.png") | |
st.image(img) | |
tweet = st.text_input(label = "Type or paste your tweet here", value = "") | |
# Defining a function to store the model in streamlit cache memory | |
def cache_model(model_name): | |
model = tf.keras.models.load_model(model_name) | |
return model | |
model = cache_model("model/tweet_model") #--------------------------- model | |
# if user gives any input | |
if len(tweet) > 0: | |
clean_tweet = preprocess(tweet) # cleans tweet | |
y_pred = model.predict([clean_tweet]) # gives probability of class = 1 | |
y_pred_num = int(np.round(y_pred)[0][0]) # get final prediction of output class | |
if y_pred_num == 0: | |
# st.write(f"#### Non-Disaster tweet with disaster probability {round(y_pred[0][0]*100, 4)}%") | |
st.write(f"#### 🌞🌞This tweet is not flagged as a disaster, but with a probability of {round(y_pred[0][0]*100, 4)}% that it might be. ") | |
else: | |
st.write(f"#### 🚩🚩High probability ( {round(y_pred[0][0]*100, 4)}%) indicates that this tweet is related to a disaster🚨🚨.") | |
# ================================================================================================================================================================ | |
# -------------------------------------------------------------------- Example of Tweets ----------------------------------------------------------------------- | |
# ================================================================================================================================================================ | |
# ---------------------------- Disaster Tweets ------------------------------- | |
# "🚨 Just felt a strong earthquake! Stay safe everyone! #earthquake #safetyfirst" [93.62] | |
# "⚠️ Urgent: Massive wildfire approaching our community. Evacuation orders in effect. Please heed warnings and evacuate immediately. #wildfire #safety" [99.30] | |
# "🌪️ Tornado warning in effect for our area. Take shelter now! #tornadowarning #safetyfirst" [92.84] | |
# "🌊 Coastal areas under tsunami alert. Seek higher ground immediately! #tsunami #emergencyalert" [99.54] | |
# ---------------------------- Non disaster Tweets ------------------------------- | |
# "Enjoying a peaceful evening with a good book and a cup of tea. #Relaxation" [4.52] | |
# "Excited for the weekend! Planning a movie night with friends. 🍿🎬 #FridayFeeling" [3.27] | |
# "Just finished a great workout session at the gym. Feeling energized! 💪 #FitnessGoals" [6.17] | |
# "Spent the day exploring a new hiking trail. Nature is so beautiful! 🌳 #OutdoorAdventure" [19.44] | |
# "Cooked a delicious homemade dinner tonight. #Foodie #HomeChef" [7.1] |