Spaces:

zionia
/

phishing-email-detector-project

Sleeping

App Files Files Community

phishing-email-detector-project / app.py

zionia

Update app.py

80585ad verified 3 months ago

raw

history blame contribute delete

4.34 kB

	import gradio as gr
	import re
	from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
	import numpy as np

	model = AutoModelForSequenceClassification.from_pretrained("zionia/email-phishing-detector")
	tokenizer = AutoTokenizer.from_pretrained("zionia/email-phishing-detector")
	pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)

	PHISHY_KEYWORDS = ["verify", "urgent", "login", "click", "bank", "account", "update", "password",
	"security", "alert", "confirm", "immediately"]
	ATTACHMENT_KEYWORDS = [".xls", ".xlsx", ".pdf", ".doc", ".docx", "attachment", "attached", "file"]
	OPERATIONAL_KEYWORDS = ["nom", "actual", "vols", "schedule", "attached", "report", "data", "summary"]
	DATE_RELATED = {"jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec",
	"january", "february", "march", "april", "may", "june", "july", "august",
	"september", "october", "november", "december"} \| {str(y) for y in range(2001, 2026)}

	def detect_phishing(email_text):
	result = pipe(email_text)
	label = result[0]['label']
	score = result[0]['score']
	if label == "LABEL_1":
	return f"Phishing detected! (Confidence: {score:.2%})"
	else:
	return f"Legitimate email (Confidence: {score:.2%})"

	def highlight_suspicious_text(email_text):
	highlighted = email_text
	for word in PHISHY_KEYWORDS:
	pattern = re.compile(rf'\b({re.escape(word)})\b', re.IGNORECASE)
	highlighted = pattern.sub(r'<mark style="background-color: #ffcccc">\1</mark>', highlighted)
	return highlighted

	def extract_features(email_text):
	tokens = email_text.lower().split()
	token_count = len(tokens)
	avg_token_len = sum(len(token) for token in tokens) / token_count if token_count > 0 else 0
	date_tokens = sum(1 for token in tokens if token in DATE_RELATED)
	attachment_present = any(ext in email_text.lower() for ext in ATTACHMENT_KEYWORDS)
	operational_terms = any(word in email_text.lower() for word in OPERATIONAL_KEYWORDS)
	phishy_terms = [word for word in PHISHY_KEYWORDS if word in email_text.lower()]

	features = {
	"Text Length": len(email_text),
	"Token Count": token_count,
	"Avg Token Length": round(avg_token_len, 2),
	"Date References": date_tokens,
	"Contains Attachment": "Yes" if attachment_present else "No",
	"Operational Terms Present": "Yes" if operational_terms else "No",
	"Suspicious Keywords": ", ".join(phishy_terms) if phishy_terms else "None"
	}

	feature_str = "\n".join([f"{k}: {v}" for k, v in features.items()])
	return feature_str

	with gr.Blocks(title="Email Phishing Detector") as app:
	gr.Markdown("# Zion's Email Phishing Detector")
	gr.Markdown("Use this tool to analyse suspicious emails. It will tell you if the email is legitimate or a phishing attempt!")

	with gr.Row():
	email_input = gr.Textbox(label="Email Text", placeholder="Paste the email content here...", lines=10)

	with gr.Tabs():
	with gr.TabItem("Detection"):
	detection_output = gr.Textbox(label="Result")
	with gr.TabItem("Suspicious Highlights"):
	suspicious_output = gr.HTML(label="Suspicious Keywords Highlighted")
	with gr.TabItem("Feature Breakdown"):
	feature_output = gr.Textbox(label="Analysed Features", lines=8)

	examples = [
	["Dear customer, your account has been compromised. Click here to verify your identity: http://bit.ly/2XyZABC"],
	["Hi team, please review the attached document for our quarterly meeting tomorrow."],
	["URGENT: Your PayPal account will be suspended unless you confirm your details now!"],
	["Hello John, just following up on our conversation yesterday about the project timeline."],
	["You've won a $1000 Amazon gift card! Click to claim your prize within 24 hours!"]
	]

	gr.Examples(
	examples=examples,
	inputs=email_input
	)

	def full_analysis(email_text):
	return detect_phishing(email_text), highlight_suspicious_text(email_text), extract_features(email_text)

	email_input.change(fn=full_analysis, inputs=email_input,
	outputs=[detection_output, suspicious_output, feature_output])

	app.launch()