|
import gradio as gr |
|
import re |
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline |
|
import numpy as np |
|
|
|
model = AutoModelForSequenceClassification.from_pretrained("zionia/email-phishing-detector") |
|
tokenizer = AutoTokenizer.from_pretrained("zionia/email-phishing-detector") |
|
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer) |
|
|
|
PHISHY_KEYWORDS = ["verify", "urgent", "login", "click", "bank", "account", "update", "password", |
|
"security", "alert", "confirm", "immediately"] |
|
ATTACHMENT_KEYWORDS = [".xls", ".xlsx", ".pdf", ".doc", ".docx", "attachment", "attached", "file"] |
|
OPERATIONAL_KEYWORDS = ["nom", "actual", "vols", "schedule", "attached", "report", "data", "summary"] |
|
DATE_RELATED = {"jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec", |
|
"january", "february", "march", "april", "may", "june", "july", "august", |
|
"september", "october", "november", "december"} | {str(y) for y in range(2001, 2026)} |
|
|
|
def detect_phishing(email_text): |
|
result = pipe(email_text) |
|
label = result[0]['label'] |
|
score = result[0]['score'] |
|
if label == "LABEL_1": |
|
return f"Phishing detected! (Confidence: {score:.2%})" |
|
else: |
|
return f"Legitimate email (Confidence: {score:.2%})" |
|
|
|
def highlight_suspicious_text(email_text): |
|
highlighted = email_text |
|
for word in PHISHY_KEYWORDS: |
|
pattern = re.compile(rf'\b({re.escape(word)})\b', re.IGNORECASE) |
|
highlighted = pattern.sub(r'<mark style="background-color: #ffcccc">\1</mark>', highlighted) |
|
return highlighted |
|
|
|
def extract_features(email_text): |
|
tokens = email_text.lower().split() |
|
token_count = len(tokens) |
|
avg_token_len = sum(len(token) for token in tokens) / token_count if token_count > 0 else 0 |
|
date_tokens = sum(1 for token in tokens if token in DATE_RELATED) |
|
attachment_present = any(ext in email_text.lower() for ext in ATTACHMENT_KEYWORDS) |
|
operational_terms = any(word in email_text.lower() for word in OPERATIONAL_KEYWORDS) |
|
phishy_terms = [word for word in PHISHY_KEYWORDS if word in email_text.lower()] |
|
|
|
features = { |
|
"Text Length": len(email_text), |
|
"Token Count": token_count, |
|
"Avg Token Length": round(avg_token_len, 2), |
|
"Date References": date_tokens, |
|
"Contains Attachment": "Yes" if attachment_present else "No", |
|
"Operational Terms Present": "Yes" if operational_terms else "No", |
|
"Suspicious Keywords": ", ".join(phishy_terms) if phishy_terms else "None" |
|
} |
|
|
|
feature_str = "\n".join([f"{k}: {v}" for k, v in features.items()]) |
|
return feature_str |
|
|
|
with gr.Blocks(title="Email Phishing Detector") as app: |
|
gr.Markdown("# Zion's Email Phishing Detector") |
|
gr.Markdown("Use this tool to analyse suspicious emails. It will tell you if the email is legitimate or a phishing attempt!") |
|
|
|
with gr.Row(): |
|
email_input = gr.Textbox(label="Email Text", placeholder="Paste the email content here...", lines=10) |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("Detection"): |
|
detection_output = gr.Textbox(label="Result") |
|
with gr.TabItem("Suspicious Highlights"): |
|
suspicious_output = gr.HTML(label="Suspicious Keywords Highlighted") |
|
with gr.TabItem("Feature Breakdown"): |
|
feature_output = gr.Textbox(label="Analysed Features", lines=8) |
|
|
|
examples = [ |
|
["Dear customer, your account has been compromised. Click here to verify your identity: http://bit.ly/2XyZABC"], |
|
["Hi team, please review the attached document for our quarterly meeting tomorrow."], |
|
["URGENT: Your PayPal account will be suspended unless you confirm your details now!"], |
|
["Hello John, just following up on our conversation yesterday about the project timeline."], |
|
["You've won a $1000 Amazon gift card! Click to claim your prize within 24 hours!"] |
|
] |
|
|
|
gr.Examples( |
|
examples=examples, |
|
inputs=email_input |
|
) |
|
|
|
def full_analysis(email_text): |
|
return detect_phishing(email_text), highlight_suspicious_text(email_text), extract_features(email_text) |
|
|
|
email_input.change(fn=full_analysis, inputs=email_input, |
|
outputs=[detection_output, suspicious_output, feature_output]) |
|
|
|
app.launch() |
|
|