import requests from bs4 import BeautifulSoup from urllib.parse import urlparse, urljoin import gradio as gr import re import concurrent.futures import pandas as pd from datetime import datetime import matplotlib.pyplot as plt import numpy as np import time class SEOChecker: def __init__(self): self.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' self.headers = {'User-Agent': self.user_agent} def get_page_content(self, url): """Fetch page content with error handling and timeout""" if not url.startswith(('http://', 'https://')): url = "https://" + url try: response = requests.get(url, headers=self.headers, timeout=15) response.raise_for_status() return response.text, url, None except requests.exceptions.RequestException as e: return None, url, str(e) def check_page_speed(self, url): """Basic page load time measurement""" start_time = time.time() try: requests.get(url, headers=self.headers, timeout=10) load_time = time.time() - start_time return load_time except: return None def get_keyword_suggestions(self, content): """Extract potential keywords from text content""" if not content: return [] # Remove HTML tags text = BeautifulSoup(content, "html.parser").get_text() # Clean and tokenize words = re.findall(r'\b[a-zA-Z]{4,15}\b', text.lower()) # Count word frequency word_freq = {} for word in words: if word not in ['this', 'that', 'with', 'from', 'have', 'were', 'they', 'will', 'what', 'when', 'where', 'which']: word_freq[word] = word_freq.get(word, 0) + 1 # Return top keywords keywords = sorted(word_freq.items(), key=lambda x: x[1], reverse=True) return keywords[:10] def analyze_seo(self, url): """Main SEO analysis function""" content, final_url, error = self.get_page_content(url) if error: return { "status": "error", "message": f"Error accessing URL: {error}", "details": {}, "score": 0, "suggestions": [] } soup = BeautifulSoup(content, "html.parser") parsed_url = urlparse(final_url) base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" # Initialize result dictionary result = { "status": "success", "url": final_url, "details": {}, "checks": [], "suggestions": [], "keywords": self.get_keyword_suggestions(content) } # Title check title = soup.title.string.strip() if soup.title else "" title_len = len(title) if title else 0 title_status = "good" if title and 10 <= title_len <= 60 else "warning" if title else "error" result["details"]["title"] = { "content": title, "length": title_len, "status": title_status } if not title: result["checks"].append({"type": "error", "message": "Missing tag"}) result["suggestions"].append("Add a descriptive title tag between 50-60 characters") elif title_len > 60: result["checks"].append({"type": "warning", "message": f"Title is too long ({title_len} chars)"}) result["suggestions"].append("Keep title under 60 characters for better display in search results") elif title_len < 10: result["checks"].append({"type": "warning", "message": f"Title is too short ({title_len} chars)"}) result["suggestions"].append("Make title more descriptive (30-60 characters recommended)") else: result["checks"].append({"type": "good", "message": f"Title length is good ({title_len} chars)"}) # Meta description desc_tag = soup.find("meta", attrs={"name": "description"}) desc = desc_tag["content"].strip() if desc_tag and desc_tag.get("content") else "" desc_len = len(desc) if desc else 0 desc_status = "good" if desc and 50 <= desc_len <= 160 else "warning" if desc else "error" result["details"]["meta_description"] = { "content": desc, "length": desc_len, "status": desc_status } if not desc: result["checks"].append({"type": "error", "message": "Missing meta description"}) result["suggestions"].append("Add a meta description summarizing your page content") elif desc_len > 160: result["checks"].append({"type": "warning", "message": f"Meta description is too long ({desc_len} chars)"}) result["suggestions"].append("Keep meta description under 160 characters") elif desc_len < 50: result["checks"].append({"type": "warning", "message": f"Meta description is too short ({desc_len} chars)"}) result["suggestions"].append("Make meta description more informative (100-160 chars recommended)") else: result["checks"].append({"type": "good", "message": f"Meta description length is good ({desc_len} chars)"}) # Canonical URL canonical = soup.find("link", rel="canonical") canonical_url = canonical.get("href") if canonical else None result["details"]["canonical"] = { "exists": canonical is not None, "url": canonical_url } if not canonical: result["checks"].append({"type": "warning", "message": "Missing canonical link"}) result["suggestions"].append("Add a canonical link to prevent duplicate content issues") else: result["checks"].append({"type": "good", "message": "Canonical link is present"}) # Headings structure headings = {f"h{i}": len(soup.find_all(f"h{i}")) for i in range(1, 7)} result["details"]["headings"] = headings if headings["h1"] == 0: result["checks"].append({"type": "error", "message": "No H1 heading found"}) result["suggestions"].append("Add a single H1 heading that describes your main content") elif headings["h1"] > 1: result["checks"].append({"type": "warning", "message": f"Multiple H1 headings found ({headings['h1']})"}) result["suggestions"].append("Use only one H1 heading per page for SEO clarity") else: result["checks"].append({"type": "good", "message": "Single H1 heading structure is good"}) if sum(headings.values()) < 3: result["checks"].append({"type": "warning", "message": "Few headings used in content"}) result["suggestions"].append("Structure content with more headings for readability and SEO") # Mobile viewport viewport = soup.find("meta", attrs={"name": "viewport"}) result["details"]["viewport"] = viewport is not None if not viewport: result["checks"].append({"type": "warning", "message": "No viewport meta tag"}) result["suggestions"].append("Add viewport meta tag for mobile responsiveness") else: result["checks"].append({"type": "good", "message": "Viewport meta tag is present"}) # HTTPS check is_https = final_url.startswith("https://") result["details"]["https"] = is_https if not is_https: result["checks"].append({"type": "error", "message": "Site is not using HTTPS"}) result["suggestions"].append("Install SSL and redirect HTTP to HTTPS for security and SEO") else: result["checks"].append({"type": "good", "message": "Site is using HTTPS"}) # Images alt text images = soup.find_all("img") images_no_alt = [img.get('src', '(no src)') for img in images if not img.get("alt")] result["details"]["images"] = { "total": len(images), "missing_alt": len(images_no_alt), "examples_missing_alt": images_no_alt[:3] } if images and images_no_alt: result["checks"].append({"type": "warning", "message": f"{len(images_no_alt)} of {len(images)} images missing alt text"}) result["suggestions"].append("Add descriptive alt attributes to all images for accessibility and SEO") elif images: result["checks"].append({"type": "good", "message": "All images have alt text"}) # Check robots.txt and sitemap with concurrent.futures.ThreadPoolExecutor() as executor: robots_future = executor.submit(self.check_file_exists, urljoin(base_url, "/robots.txt")) sitemap_future = executor.submit(self.check_file_exists, urljoin(base_url, "/sitemap.xml")) robots_exists = robots_future.result() sitemap_exists = sitemap_future.result() result["details"]["robots_txt"] = robots_exists result["details"]["sitemap_xml"] = sitemap_exists if not robots_exists: result["checks"].append({"type": "warning", "message": "robots.txt not found"}) result["suggestions"].append("Create a robots.txt file to guide search engines") else: result["checks"].append({"type": "good", "message": "robots.txt file exists"}) if not sitemap_exists: result["checks"].append({"type": "warning", "message": "sitemap.xml not found"}) result["suggestions"].append("Add a sitemap.xml file for better crawling") else: result["checks"].append({"type": "good", "message": "sitemap.xml file exists"}) # Open Graph Tags og_tags = { "title": soup.find("meta", property="og:title") is not None, "description": soup.find("meta", property="og:description") is not None, "image": soup.find("meta", property="og:image") is not None } result["details"]["open_graph"] = og_tags og_missing = [tag for tag, exists in og_tags.items() if not exists] if og_missing: result["checks"].append({"type": "warning", "message": f"Missing Open Graph tags: {', '.join(og_missing)}"}) result["suggestions"].append("Add Open Graph meta tags to improve sharing on social media") else: result["checks"].append({"type": "good", "message": "Open Graph meta tags are present"}) # Link analysis links = soup.find_all("a", href=True) internal_links = [] external_links = [] for link in links: href = link.get('href', '') if not href or href.startswith('#'): continue if href.startswith('/') or parsed_url.netloc in href: internal_links.append(href) elif href.startswith(('http://', 'https://')): external_links.append(href) result["details"]["links"] = { "internal": len(internal_links), "external": len(external_links), "total": len(internal_links) + len(external_links) } result["checks"].append({"type": "info", "message": f"Found {len(internal_links)} internal and {len(external_links)} external links"}) if len(internal_links) < 2 and not (len(internal_links) == 0 and len(external_links) == 0): result["suggestions"].append("Add more internal links to improve site structure") # Text to HTML ratio analysis html_size = len(content) text = soup.get_text() text_size = len(text) if html_size > 0: text_ratio = (text_size / html_size) * 100 else: text_ratio = 0 result["details"]["content"] = { "html_size": html_size, "text_size": text_size, "text_ratio": text_ratio } if text_ratio < 10: result["checks"].append({"type": "warning", "message": f"Low text-to-HTML ratio: {text_ratio:.1f}%"}) result["suggestions"].append("Increase text content relative to HTML for better SEO") else: result["checks"].append({"type": "good", "message": f"Text-to-HTML ratio: {text_ratio:.1f}%"}) # Page speed (basic) load_time = self.check_page_speed(final_url) result["details"]["page_speed"] = load_time if load_time: if load_time > 2: result["checks"].append({"type": "warning", "message": f"Slow page load time: {load_time:.2f} seconds"}) result["suggestions"].append("Optimize page speed by reducing file sizes and requests") else: result["checks"].append({"type": "good", "message": f"Page load time: {load_time:.2f} seconds"}) # Calculate overall score scores = {"good": 10, "info": 5, "warning": 0, "error": -10} total_points = sum(scores.get(check["type"], 0) for check in result["checks"]) max_score = 10 * sum(1 for check in result["checks"] if check["type"] in ["good", "error"]) if max_score > 0: percentage_score = min(100, max(0, (total_points + max_score) / (2 * max_score) * 100)) else: percentage_score = 50 result["score"] = round(percentage_score) return result def check_file_exists(self, url): """Check if a file exists at the given URL""" try: response = requests.head(url, headers=self.headers, timeout=5) return response.status_code == 200 except: return False def generate_chart(self, result): """Generate SEO score chart data""" if result["status"] == "error": return None categories = { "title": result["details"]["title"]["status"] == "good", "meta_description": result["details"]["meta_description"]["status"] == "good", "headings": result["details"]["headings"]["h1"] == 1, "https": result["details"]["https"], "images": result["details"]["images"]["total"] == 0 or result["details"]["images"]["missing_alt"] == 0, "robots_sitemap": result["details"]["robots_txt"] and result["details"]["sitemap_xml"], "open_graph": all(result["details"]["open_graph"].values()) } return categories def format_result_html(result): """Format the SEO result as HTML for display""" if result["status"] == "error": return f""" <div style="padding: 20px; background-color: #ffebee; border-radius: 8px; margin-bottom: 20px;"> <h3 style="color: #c62828;">Error</h3> <p>{result["message"]}</p> </div> """ # Calculate counts for each check type check_counts = {"good": 0, "info": 0, "warning": 0, "error": 0} for check in result["checks"]: check_counts[check["type"]] = check_counts.get(check["type"], 0) + 1 # Build the HTML html = f""" <div style="font-family: Arial, sans-serif;"> <div style="display: flex; align-items: center; margin-bottom: 20px;"> <div style="width: 120px; height: 120px; position: relative; margin-right: 20px;"> <div style="position: absolute; width: 100%; height: 100%; border-radius: 50%; background: conic-gradient( from 0deg, #4caf50 0% {result["score"]}%, #e0e0e0 {result["score"]}% 100% );"></div> <div style="position: absolute; top: 10px; left: 10px; right: 10px; bottom: 10px; background: white; border-radius: 50%; display: flex; align-items: center; justify-content: center; flex-direction: column;"> <span style="font-size: 28px; font-weight: bold;">{result["score"]}</span> <span style="font-size: 12px;">SEO Score</span> </div> </div> <div> <h2 style="margin: 0 0 10px 0;">SEO Report for {result["url"]}</h2> <div style="display: flex; flex-wrap: wrap; gap: 10px;"> <span style="background-color: #e8f5e9; color: #2e7d32; padding: 5px 10px; border-radius: 4px; font-size: 12px;"> ✓ {check_counts["good"]} Passed </span> <span style="background-color: #fff8e1; color: #f57c00; padding: 5px 10px; border-radius: 4px; font-size: 12px;"> ⚠ {check_counts["warning"]} Warnings </span> <span style="background-color: #ffebee; color: #c62828; padding: 5px 10px; border-radius: 4px; font-size: 12px;"> ✕ {check_counts["error"]} Errors </span> <span style="background-color: #e3f2fd; color: #1565c0; padding: 5px 10px; border-radius: 4px; font-size: 12px;"> ℹ {check_counts["info"]} Info </span> </div> <div style="margin-top: 10px; color: #555; font-size: 13px;"> Generated on {datetime.now().strftime('%Y-%m-%d %H:%M')} </div> </div> </div> <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin-bottom: 20px;"> <div style="background-color: #f5f5f5; border-radius: 8px; padding: 15px;"> <h3 style="margin-top: 0; color: #333;">Page Details</h3> <table style="width: 100%; border-collapse: collapse;"> <tr> <td style="padding: 8px 0; border-bottom: 1px solid #ddd; width: 40%; color: #777;">Title</td> <td style="padding: 8px 0; border-bottom: 1px solid #ddd;"> {result["details"]["title"]["content"] or "Missing"} <div style="font-size: 12px; color: #777; margin-top: 4px;"> Length: {result["details"]["title"]["length"]} chars {" ✓" if result["details"]["title"]["status"] == "good" else " ⚠"} </div> </td> </tr> <tr> <td style="padding: 8px 0; border-bottom: 1px solid #ddd; color: #777;">Meta Description</td> <td style="padding: 8px 0; border-bottom: 1px solid #ddd;"> {result["details"]["meta_description"]["content"] or "Missing"} <div style="font-size: 12px; color: #777; margin-top: 4px;"> Length: {result["details"]["meta_description"]["length"]} chars {" ✓" if result["details"]["meta_description"]["status"] == "good" else " ⚠"} </div> </td> </tr> <tr> <td style="padding: 8px 0; border-bottom: 1px solid #ddd; color: #777;">Headings</td> <td style="padding: 8px 0; border-bottom: 1px solid #ddd;"> H1: {result["details"]["headings"]["h1"]}, H2: {result["details"]["headings"]["h2"]}, H3: {result["details"]["headings"]["h3"]} </td> </tr> <tr> <td style="padding: 8px 0; border-bottom: 1px solid #ddd; color: #777;">Security & Files</td> <td style="padding: 8px 0; border-bottom: 1px solid #ddd;"> HTTPS: {"✓" if result["details"]["https"] else "✕"}, robots.txt: {"✓" if result["details"]["robots_txt"] else "✕"}, sitemap.xml: {"✓" if result["details"]["sitemap_xml"] else "✕"} </td> </tr> <tr> <td style="padding: 8px 0; border-bottom: 1px solid #ddd; color: #777;">Links</td> <td style="padding: 8px 0; border-bottom: 1px solid #ddd;"> Internal: {result["details"]["links"]["internal"]}, External: {result["details"]["links"]["external"]} </td> </tr> <tr> <td style="padding: 8px 0; color: #777;">Images</td> <td style="padding: 8px 0;"> Total: {result["details"]["images"]["total"]}, Missing alt: {result["details"]["images"]["missing_alt"]} </td> </tr> </table> </div> <div style="background-color: #f5f5f5; border-radius: 8px; padding: 15px;"> <h3 style="margin-top: 0; color: #333;">Top Potential Keywords</h3> <div style="max-height: 200px; overflow-y: auto;"> <table style="width: 100%; border-collapse: collapse;"> <tr style="background-color: #eee;"> <th style="padding: 8px; text-align: left; border-bottom: 1px solid #ddd;">Keyword</th> <th style="padding: 8px; text-align: right; border-bottom: 1px solid #ddd;">Frequency</th> </tr> """ # Add keyword rows for keyword, count in result["keywords"]: html += f""" <tr> <td style="padding: 8px; border-bottom: 1px solid #ddd;">{keyword}</td> <td style="padding: 8px; border-bottom: 1px solid #ddd; text-align: right;">{count}</td> </tr> """ html += """ </table> </div> </div> </div> <div style="margin-bottom: 20px;"> <h3 style="color: #333;">SEO Checks</h3> <div style="display: grid; grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); gap: 15px;"> """ # Add check cards icons = { "good": "✓", "info": "ℹ", "warning": "⚠", "error": "✕" } bg_colors = { "good": "#e8f5e9", "info": "#e3f2fd", "warning": "#fff8e1", "error": "#ffebee" } text_colors = { "good": "#2e7d32", "info": "#1565c0", "warning": "#f57c00", "error": "#c62828" } for check in result["checks"]: html += f""" <div style="background-color: {bg_colors[check["type"]]}; border-radius: 8px; padding: 12px; position: relative;"> <div style="position: absolute; top: 12px; right: 12px; font-size: 18px;"> {icons[check["type"]]} </div> <div style="color: {text_colors[check["type"]]}; margin-bottom: 5px; font-weight: bold;"> {check["type"].capitalize()} </div> <div style="color: #333;"> {check["message"]} </div> </div> """ html += """ </div> </div> <div style="background-color: #f5f5f5; border-radius: 8px; padding: 15px; margin-bottom: 20px;"> <h3 style="margin-top: 0; color: #333;">Improvement Suggestions</h3> <ul style="margin: 0; padding-left: 20px;"> """ # Add suggestions for suggestion in result["suggestions"]: html += f""" <li style="margin-bottom: 8px;">{suggestion}</li> """ html += """ </ul> </div> </div> """ return html def seo_analysis(url): """Run the SEO analysis and return results in structured format""" checker = SEOChecker() result = checker.analyze_seo(url) if result["status"] == "error": return result["message"], "", "" # Format text report text_report = f"SEO Score: {result['score']}/100 for {result['url']}\n\n" text_report += "--- SEO CHECKS ---\n" for check in result["checks"]: icon = "✓" if check["type"] == "good" else "ℹ" if check["type"] == "info" else "⚠" if check["type"] == "warning" else "✕" text_report += f"{icon} {check['message']}\n" text_report += "\n--- SUGGESTIONS ---\n" for i, suggestion in enumerate(result["suggestions"], 1): text_report += f"{i}. {suggestion}\n" # Format HTML report html_report = format_result_html(result) # Generate chart data chart_data = checker.generate_chart(result) chart_html = "" if chart_data: # Create simple chart categories = list(chart_data.keys()) values = [int(v) * 100 for v in chart_data.values()] plt.figure(figsize=(10, 6)) colors = ['#4caf50' if v == 100 else '#f57c00' for v in values] y_pos = np.arange(len(categories)) plt.barh(y_pos, values, color=colors) plt.yticks(y_pos, [c.replace('_', ' ').title() for c in categories]) plt.xlim(0, 100) plt.title('SEO Category Performance') plt.xlabel('Score (%)') for i, v in enumerate(values): plt.text(v + 5, i, f"{v}%" if v > 0 else "0%", va='center') # Save to file chart_file = "seo_chart.png" plt.tight_layout() plt.savefig(chart_file) plt.close() # Create HTML image reference chart_html = f'<img src="file={chart_file}" alt="SEO Performance Chart" style="width:100%;max-width:800px;">' return text_report, html_report, chart_html def generate_example_report(): """Generate an example report for the demo""" checker = SEOChecker() sample_urls = [ "https://example.com", "https://websitelayout.net", "https://yahoo.com" ] # Select a random sample URL import random sample_url = random.choice(sample_urls) # Run analysis result = checker.analyze_seo(sample_url) if result["status"] == "error": return f"Error analyzing {sample_url}: {result.get('message', 'Unknown error')}", "", "" # Format text report text_report = f"SAMPLE REPORT - URL: {sample_url}\n\n" text_report += f"SEO Score: {result['score']}/100\n\n" text_report += "--- KEY FINDINGS ---\n" for check in result["checks"][:5]: # Just show top 5 findings icon = "✓" if check["type"] == "good" else "ℹ" if check["type"] == "info" else "⚠" if check["type"] == "warning" else "✕" text_report += f"{icon} {check['message']}\n" text_report += "\n(This is an example report - enter your own URL for a full analysis)" # HTML report html_report = format_result_html(result) html_report += '<div style="background-color: #e3f2fd; color: #0d47a1; padding: 10px; border-radius: 4px; margin-top: 20px; text-align: center;">This is an example report - enter your own URL for a full analysis</div>' # Generate chart data chart_data = checker.generate_chart(result) chart_html = "" return text_report, html_report, chart_html # Set up the Gradio interface def create_interface(): with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal"), css=""" .container { max-width: 1200px; margin: 0 auto; } .header { margin-bottom: 20px; text-align: center; } .header h1 { margin-bottom: 5px; color: #1e88e5; } .header p { color: #555; } .footer { margin-top: 30px; text-align: center; color: #777; font-size: 12px; } .score-box { display: flex; align-items: center; gap: 20px; padding: 20px; margin-bottom: 20px; } .url-input { margin-bottom: 20px; } .report-container { border-radius: 10px; overflow: hidden; } """) as demo: gr.HTML(""" <div class="header"> <h1>Advanced SEO Website Analyzer</h1> <p>Perform a comprehensive SEO audit of any website with detailed insights and recommendations</p> </div> """) with gr.Row(equal_height=True): with gr.Column(): url_input = gr.Textbox( label="Website URL to Analyze", placeholder="Enter URL (e.g., example.com or https://example.com)", scale=3 ) with gr.Row(): analyze_btn = gr.Button("Analyze Website", variant="primary", scale=2) example_btn = gr.Button("See Example Report", scale=1) text_output = gr.Textbox( label="Text Summary", placeholder="SEO analysis results will appear here...", lines=10, max_lines=20 ) with gr.Column(): html_output = gr.HTML( label="Visual Report", value='<div style="height: 400px; display: flex; justify-content: center; align-items: center; background-color: #f5f5f5; border-radius: 8px;"><p style="color: #777;">Enter a URL and click "Analyze Website" to see a detailed report here.</p></div>' ) with gr.Row(): chart_output = gr.HTML( label="Performance Chart", value="" ) gr.HTML(""" <div class="footer"> <p>© 2025 SEO Website Analyzer | Provides quick, comprehensive SEO analysis</p> </div> """) # Connect the components analyze_btn.click( fn=seo_analysis, inputs=url_input, outputs=[text_output, html_output, chart_output] ) example_btn.click( fn=generate_example_report, inputs=[], outputs=[text_output, html_output, chart_output] ) return demo # Run the app if __name__ == "__main__": demo = create_interface() demo.launch(share=True)