import streamlit as st import requests import pandas as pd import socket import whois from urllib.parse import urlparse from bs4 import BeautifulSoup from datetime import datetime import pickle def extract_features(url): try: socket.inet_aton(urlparse(url).netloc) having_IP_Address = 1 except: having_IP_Address = 0 URL_Length = 1 if len(url) >= 54 else 0 try: response = requests.get(url, timeout=5) soup = BeautifulSoup(response.content, "html.parser") anchors = soup.find_all("a", href=True) if len(anchors) == 0: URL_of_Anchor = 1 else: unsafe = [a for a in anchors if not a['href'].startswith(url)] URL_of_Anchor = 1 if len(unsafe) / len(anchors) > 0.5 else 0 except: URL_of_Anchor = 1 try: domain_info = whois.whois(urlparse(url).netloc) if isinstance(domain_info.creation_date, list): creation_date = domain_info.creation_date[0] else: creation_date = domain_info.creation_date age_of_domain = 1 if (datetime.now() - creation_date).days > 180 else 0 except: age_of_domain = 0 SSLfinal_State = 1 if url.startswith("https") else 0 try: request_response = requests.get(url, timeout=5) if request_response.url == url: Request_URL = 0 else: Request_URL = 1 except: Request_URL = 1 try: forms = soup.find_all("form", action=True) if len(forms) == 0: SFH = 1 else: for form in forms: if form['action'] == "about:blank" or not form['action'].startswith("http"): SFH = 1 break else: SFH = 0 except: SFH = 1 try: if "window.open" in response.text: popUpWidnow = 1 else: popUpWidnow = 0 except: popUpWidnow = 0 return [SFH, popUpWidnow, SSLfinal_State, Request_URL, URL_of_Anchor, URL_Length, age_of_domain, having_IP_Address] def predict_url(url, model): features = extract_features(url) X_columns = ['SFH', 'popUpWidnow', 'SSLfinal_State', 'Request_URL', 'URL_of_Anchor', 'URL_Length', 'age_of_domain', 'having_IP_Address'] features_df = pd.DataFrame([features], columns=X_columns) prediction = model.predict(features_df) if prediction[0] == 1: return "Phishing" elif prediction[0] == 0: return "Legitimate" else: return "Unknown" # Streamlit app configuration st.set_page_config(page_title='Phishing URL Detection', layout='centered') # App Header st.markdown(""" """, unsafe_allow_html=True) st.title('🔍 Phishing URL Detection App') st.write('Enter a URL to check if it is Phishing or Legitimate.') # Load the trained model with open('phishing_model.pkl', 'rb') as f: model = pickle.load(f) # Input URL url_input = st.text_input('Enter URL:', '') if st.button('Check URL'): if url_input: try: # Make prediction result = predict_url(url_input, model) if result == 'Phishing': st.error('🚨 This URL is likely a **Phishing Site**. Be careful!') elif result == 'Legitimate': st.success('✅ This URL is likely **Legitimate**.') else: st.warning('⚠️ Unable to determine. Try again later.') except Exception as e: st.error(f'Error: {e}') else: st.warning('Please enter a valid URL.')