File size: 4,340 Bytes
9213634 bd40926 d332f5d bd40926 682e36a d332f5d 682e36a d332f5d 682e36a d332f5d bd40926 d332f5d bd40926 d332f5d bd40926 d332f5d bd40926 d332f5d bd40926 d332f5d 80585ad d332f5d bd40926 d332f5d bd40926 d332f5d bd40926 d332f5d 2042018 d332f5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
import re
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import numpy as np
model = AutoModelForSequenceClassification.from_pretrained("zionia/email-phishing-detector")
tokenizer = AutoTokenizer.from_pretrained("zionia/email-phishing-detector")
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
PHISHY_KEYWORDS = ["verify", "urgent", "login", "click", "bank", "account", "update", "password",
"security", "alert", "confirm", "immediately"]
ATTACHMENT_KEYWORDS = [".xls", ".xlsx", ".pdf", ".doc", ".docx", "attachment", "attached", "file"]
OPERATIONAL_KEYWORDS = ["nom", "actual", "vols", "schedule", "attached", "report", "data", "summary"]
DATE_RELATED = {"jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec",
"january", "february", "march", "april", "may", "june", "july", "august",
"september", "october", "november", "december"} | {str(y) for y in range(2001, 2026)}
def detect_phishing(email_text):
result = pipe(email_text)
label = result[0]['label']
score = result[0]['score']
if label == "LABEL_1":
return f"Phishing detected! (Confidence: {score:.2%})"
else:
return f"Legitimate email (Confidence: {score:.2%})"
def highlight_suspicious_text(email_text):
highlighted = email_text
for word in PHISHY_KEYWORDS:
pattern = re.compile(rf'\b({re.escape(word)})\b', re.IGNORECASE)
highlighted = pattern.sub(r'<mark style="background-color: #ffcccc">\1</mark>', highlighted)
return highlighted
def extract_features(email_text):
tokens = email_text.lower().split()
token_count = len(tokens)
avg_token_len = sum(len(token) for token in tokens) / token_count if token_count > 0 else 0
date_tokens = sum(1 for token in tokens if token in DATE_RELATED)
attachment_present = any(ext in email_text.lower() for ext in ATTACHMENT_KEYWORDS)
operational_terms = any(word in email_text.lower() for word in OPERATIONAL_KEYWORDS)
phishy_terms = [word for word in PHISHY_KEYWORDS if word in email_text.lower()]
features = {
"Text Length": len(email_text),
"Token Count": token_count,
"Avg Token Length": round(avg_token_len, 2),
"Date References": date_tokens,
"Contains Attachment": "Yes" if attachment_present else "No",
"Operational Terms Present": "Yes" if operational_terms else "No",
"Suspicious Keywords": ", ".join(phishy_terms) if phishy_terms else "None"
}
feature_str = "\n".join([f"{k}: {v}" for k, v in features.items()])
return feature_str
with gr.Blocks(title="Email Phishing Detector") as app:
gr.Markdown("# Zion's Email Phishing Detector")
gr.Markdown("Use this tool to analyse suspicious emails. It will tell you if the email is legitimate or a phishing attempt!")
with gr.Row():
email_input = gr.Textbox(label="Email Text", placeholder="Paste the email content here...", lines=10)
with gr.Tabs():
with gr.TabItem("Detection"):
detection_output = gr.Textbox(label="Result")
with gr.TabItem("Suspicious Highlights"):
suspicious_output = gr.HTML(label="Suspicious Keywords Highlighted")
with gr.TabItem("Feature Breakdown"):
feature_output = gr.Textbox(label="Analysed Features", lines=8)
examples = [
["Dear customer, your account has been compromised. Click here to verify your identity: http://bit.ly/2XyZABC"],
["Hi team, please review the attached document for our quarterly meeting tomorrow."],
["URGENT: Your PayPal account will be suspended unless you confirm your details now!"],
["Hello John, just following up on our conversation yesterday about the project timeline."],
["You've won a $1000 Amazon gift card! Click to claim your prize within 24 hours!"]
]
gr.Examples(
examples=examples,
inputs=email_input
)
def full_analysis(email_text):
return detect_phishing(email_text), highlight_suspicious_text(email_text), extract_features(email_text)
email_input.change(fn=full_analysis, inputs=email_input,
outputs=[detection_output, suspicious_output, feature_output])
app.launch()
|