aiqcamp commited on
Commit
aa283e0
·
verified ·
1 Parent(s): cd9c49f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +697 -125
app.py CHANGED
@@ -2,134 +2,706 @@ import requests
2
  from bs4 import BeautifulSoup
3
  from urllib.parse import urlparse, urljoin
4
  import gradio as gr
 
 
 
 
 
 
 
5
 
6
- def seo_check(url):
7
- report = []
8
- suggestions = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # Ensure HTTPS
11
- if not url.startswith("http"):
12
- url = "https://" + url
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- try:
15
- response = requests.get(url, timeout=10)
16
- response.raise_for_status()
17
- html = response.text
18
- except Exception as e:
19
- return f" Error accessing URL: {e}", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- soup = BeautifulSoup(html, "html.parser")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- # Title Tag
24
- title = soup.title.string.strip() if soup.title else ""
25
- if not title:
26
- report.append("❌ Missing <title> tag.")
27
- suggestions.append("Add a <title> tag that describes your page in 50–60 characters.")
28
- elif len(title) > 70:
29
- report.append("⚠️ Title is too long.")
30
- suggestions.append("Keep title under 70 characters.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- # Meta Description
33
- desc_tag = soup.find("meta", attrs={"name": "description"})
34
- desc = desc_tag["content"].strip() if desc_tag and desc_tag.get("content") else ""
35
- if not desc:
36
- report.append("❌ Missing meta description.")
37
- suggestions.append("Add a <meta name='description'> summarizing the page.")
38
- elif len(desc) > 160:
39
- report.append("⚠️ Meta description is too long.")
40
- suggestions.append("Keep meta descriptions under 160 characters.")
41
-
42
- # Canonical Tag
43
- canonical = soup.find("link", rel="canonical")
44
- if not canonical:
45
- report.append("❌ Missing canonical link.")
46
- suggestions.append("Add a <link rel='canonical'> to avoid duplicate content.")
47
-
48
- # H1 Tag
49
- h1_tags = soup.find_all("h1")
50
- if len(h1_tags) != 1:
51
- report.append(f"⚠️ Found {len(h1_tags)} <h1> tags.")
52
- suggestions.append("Use exactly one <h1> tag for SEO clarity.")
53
-
54
- # Mobile viewport
55
- viewport = soup.find("meta", attrs={"name": "viewport"})
56
- if not viewport:
57
- report.append("⚠️ No viewport meta tag.")
58
- suggestions.append("Add a viewport meta tag for mobile responsiveness.")
59
-
60
- # HTTPS check
61
- if not url.startswith("https://"):
62
- report.append("⚠️ URL is not secure (no HTTPS).")
63
- suggestions.append("Install SSL and redirect HTTP to HTTPS.")
64
-
65
- # Robots.txt and sitemap.xml
66
- parsed = urlparse(url)
67
- base = f"{parsed.scheme}://{parsed.netloc}"
68
- robots_url = urljoin(base, "/robots.txt")
69
- sitemap_url = urljoin(base, "/sitemap.xml")
70
- try:
71
- r1 = requests.get(robots_url)
72
- if r1.status_code != 200:
73
- report.append("❌ robots.txt not found.")
74
- suggestions.append("Create a robots.txt to guide search bots.")
75
- except:
76
- report.append("❌ Could not access robots.txt.")
77
-
78
- try:
79
- r2 = requests.get(sitemap_url)
80
- if r2.status_code != 200:
81
- report.append("❌ sitemap.xml not found.")
82
- suggestions.append("Add sitemap.xml for better crawling.")
83
- except:
84
- report.append("❌ Could not access sitemap.xml.")
85
-
86
- # Open Graph Tags
87
- og_title = soup.find("meta", property="og:title")
88
- if not og_title:
89
- report.append("⚠️ Missing Open Graph (og:title).")
90
- suggestions.append("Add OG tags to improve sharing on social media.")
91
-
92
- # Image alt text
93
- images = soup.find_all("img")
94
- alt_missing = [img for img in images if not img.get("alt")]
95
- if alt_missing:
96
- report.append(f"⚠️ {len(alt_missing)} images missing alt text.")
97
- suggestions.append("Add descriptive alt attributes to all images.")
98
-
99
- # Internal and external links
100
- links = soup.find_all("a", href=True)
101
- internal = 0
102
- external = 0
103
- for link in links:
104
- href = link['href']
105
- if parsed.netloc in href:
106
- internal += 1
107
- elif href.startswith("http"):
108
- external += 1
109
- report.append(f"ℹ️ Internal Links: {internal} | External Links: {external}")
110
- suggestions.append("Ensure most important links are internal. Check broken links.")
111
-
112
- # Keyword density (basic)
113
- body_text = soup.get_text().lower()
114
- words = body_text.split()
115
- word_count = len(words)
116
- keyword = parsed.netloc.replace("www.", "").split(".")[0]
117
- keyword_freq = words.count(keyword)
118
- density = (keyword_freq / word_count) * 100 if word_count else 0
119
- report.append(f"ℹ️ Keyword '{keyword}' appears {keyword_freq} times ({density:.2f}% density)")
120
- if density < 0.5:
121
- suggestions.append("Consider using your main keyword more often (target 1–2%).")
122
-
123
- return "\n".join(report), "\n".join(suggestions)
124
-
125
- # Gradio UI
126
- gr.Interface(
127
- fn=seo_check,
128
- inputs=gr.Textbox(label="Enter Website URL"),
129
- outputs=[
130
- gr.Textbox(label="SEO Report", lines=15),
131
- gr.Textbox(label="Suggestions & Fixes", lines=15)
132
- ],
133
- title="SEO Website Checker",
134
- description="Analyze your website's SEO like Sitechecker.pro & SEOSiteCheckup, with clear solutions!"
135
- ).launch()
 
2
  from bs4 import BeautifulSoup
3
  from urllib.parse import urlparse, urljoin
4
  import gradio as gr
5
+ import re
6
+ import concurrent.futures
7
+ import pandas as pd
8
+ from datetime import datetime
9
+ import matplotlib.pyplot as plt
10
+ import numpy as np
11
+ import time
12
 
13
+ class SEOChecker:
14
+ def __init__(self):
15
+ self.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
16
+ self.headers = {'User-Agent': self.user_agent}
17
+
18
+ def get_page_content(self, url):
19
+ """Fetch page content with error handling and timeout"""
20
+ if not url.startswith(('http://', 'https://')):
21
+ url = "https://" + url
22
+
23
+ try:
24
+ response = requests.get(url, headers=self.headers, timeout=15)
25
+ response.raise_for_status()
26
+ return response.text, url, None
27
+ except requests.exceptions.RequestException as e:
28
+ return None, url, str(e)
29
+
30
+ def check_page_speed(self, url):
31
+ """Basic page load time measurement"""
32
+ start_time = time.time()
33
+ try:
34
+ requests.get(url, headers=self.headers, timeout=10)
35
+ load_time = time.time() - start_time
36
+ return load_time
37
+ except:
38
+ return None
39
+
40
+ def get_keyword_suggestions(self, content):
41
+ """Extract potential keywords from text content"""
42
+ if not content:
43
+ return []
44
+
45
+ # Remove HTML tags
46
+ text = BeautifulSoup(content, "html.parser").get_text()
47
+
48
+ # Clean and tokenize
49
+ words = re.findall(r'\b[a-zA-Z]{4,15}\b', text.lower())
50
+
51
+ # Count word frequency
52
+ word_freq = {}
53
+ for word in words:
54
+ if word not in ['this', 'that', 'with', 'from', 'have', 'were', 'they', 'will', 'what', 'when', 'where', 'which']:
55
+ word_freq[word] = word_freq.get(word, 0) + 1
56
+
57
+ # Return top keywords
58
+ keywords = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
59
+ return keywords[:10]
60
+
61
+ def analyze_seo(self, url):
62
+ """Main SEO analysis function"""
63
+ content, final_url, error = self.get_page_content(url)
64
+
65
+ if error:
66
+ return {
67
+ "status": "error",
68
+ "message": f"Error accessing URL: {error}",
69
+ "details": {},
70
+ "score": 0,
71
+ "suggestions": []
72
+ }
73
+
74
+ soup = BeautifulSoup(content, "html.parser")
75
+ parsed_url = urlparse(final_url)
76
+ base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
77
+
78
+ # Initialize result dictionary
79
+ result = {
80
+ "status": "success",
81
+ "url": final_url,
82
+ "details": {},
83
+ "checks": [],
84
+ "suggestions": [],
85
+ "keywords": self.get_keyword_suggestions(content)
86
+ }
87
+
88
+ # Title check
89
+ title = soup.title.string.strip() if soup.title else ""
90
+ title_len = len(title) if title else 0
91
+ title_status = "good" if title and 10 <= title_len <= 60 else "warning" if title else "error"
92
+ result["details"]["title"] = {
93
+ "content": title,
94
+ "length": title_len,
95
+ "status": title_status
96
+ }
97
+
98
+ if not title:
99
+ result["checks"].append({"type": "error", "message": "Missing <title> tag"})
100
+ result["suggestions"].append("Add a descriptive title tag between 50-60 characters")
101
+ elif title_len > 60:
102
+ result["checks"].append({"type": "warning", "message": f"Title is too long ({title_len} chars)"})
103
+ result["suggestions"].append("Keep title under 60 characters for better display in search results")
104
+ elif title_len < 10:
105
+ result["checks"].append({"type": "warning", "message": f"Title is too short ({title_len} chars)"})
106
+ result["suggestions"].append("Make title more descriptive (30-60 characters recommended)")
107
+ else:
108
+ result["checks"].append({"type": "good", "message": f"Title length is good ({title_len} chars)"})
109
+
110
+ # Meta description
111
+ desc_tag = soup.find("meta", attrs={"name": "description"})
112
+ desc = desc_tag["content"].strip() if desc_tag and desc_tag.get("content") else ""
113
+ desc_len = len(desc) if desc else 0
114
+ desc_status = "good" if desc and 50 <= desc_len <= 160 else "warning" if desc else "error"
115
+
116
+ result["details"]["meta_description"] = {
117
+ "content": desc,
118
+ "length": desc_len,
119
+ "status": desc_status
120
+ }
121
+
122
+ if not desc:
123
+ result["checks"].append({"type": "error", "message": "Missing meta description"})
124
+ result["suggestions"].append("Add a meta description summarizing your page content")
125
+ elif desc_len > 160:
126
+ result["checks"].append({"type": "warning", "message": f"Meta description is too long ({desc_len} chars)"})
127
+ result["suggestions"].append("Keep meta description under 160 characters")
128
+ elif desc_len < 50:
129
+ result["checks"].append({"type": "warning", "message": f"Meta description is too short ({desc_len} chars)"})
130
+ result["suggestions"].append("Make meta description more informative (100-160 chars recommended)")
131
+ else:
132
+ result["checks"].append({"type": "good", "message": f"Meta description length is good ({desc_len} chars)"})
133
+
134
+ # Canonical URL
135
+ canonical = soup.find("link", rel="canonical")
136
+ canonical_url = canonical.get("href") if canonical else None
137
+
138
+ result["details"]["canonical"] = {
139
+ "exists": canonical is not None,
140
+ "url": canonical_url
141
+ }
142
+
143
+ if not canonical:
144
+ result["checks"].append({"type": "warning", "message": "Missing canonical link"})
145
+ result["suggestions"].append("Add a canonical link to prevent duplicate content issues")
146
+ else:
147
+ result["checks"].append({"type": "good", "message": "Canonical link is present"})
148
+
149
+ # Headings structure
150
+ headings = {f"h{i}": len(soup.find_all(f"h{i}")) for i in range(1, 7)}
151
+ result["details"]["headings"] = headings
152
+
153
+ if headings["h1"] == 0:
154
+ result["checks"].append({"type": "error", "message": "No H1 heading found"})
155
+ result["suggestions"].append("Add a single H1 heading that describes your main content")
156
+ elif headings["h1"] > 1:
157
+ result["checks"].append({"type": "warning", "message": f"Multiple H1 headings found ({headings['h1']})"})
158
+ result["suggestions"].append("Use only one H1 heading per page for SEO clarity")
159
+ else:
160
+ result["checks"].append({"type": "good", "message": "Single H1 heading structure is good"})
161
+
162
+ if sum(headings.values()) < 3:
163
+ result["checks"].append({"type": "warning", "message": "Few headings used in content"})
164
+ result["suggestions"].append("Structure content with more headings for readability and SEO")
165
+
166
+ # Mobile viewport
167
+ viewport = soup.find("meta", attrs={"name": "viewport"})
168
+ result["details"]["viewport"] = viewport is not None
169
+
170
+ if not viewport:
171
+ result["checks"].append({"type": "warning", "message": "No viewport meta tag"})
172
+ result["suggestions"].append("Add viewport meta tag for mobile responsiveness")
173
+ else:
174
+ result["checks"].append({"type": "good", "message": "Viewport meta tag is present"})
175
+
176
+ # HTTPS check
177
+ is_https = final_url.startswith("https://")
178
+ result["details"]["https"] = is_https
179
+
180
+ if not is_https:
181
+ result["checks"].append({"type": "error", "message": "Site is not using HTTPS"})
182
+ result["suggestions"].append("Install SSL and redirect HTTP to HTTPS for security and SEO")
183
+ else:
184
+ result["checks"].append({"type": "good", "message": "Site is using HTTPS"})
185
+
186
+ # Images alt text
187
+ images = soup.find_all("img")
188
+ images_no_alt = [img.get('src', '(no src)') for img in images if not img.get("alt")]
189
+ result["details"]["images"] = {
190
+ "total": len(images),
191
+ "missing_alt": len(images_no_alt),
192
+ "examples_missing_alt": images_no_alt[:3]
193
+ }
194
+
195
+ if images and images_no_alt:
196
+ result["checks"].append({"type": "warning", "message": f"{len(images_no_alt)} of {len(images)} images missing alt text"})
197
+ result["suggestions"].append("Add descriptive alt attributes to all images for accessibility and SEO")
198
+ elif images:
199
+ result["checks"].append({"type": "good", "message": "All images have alt text"})
200
+
201
+ # Check robots.txt and sitemap
202
+ with concurrent.futures.ThreadPoolExecutor() as executor:
203
+ robots_future = executor.submit(self.check_file_exists, urljoin(base_url, "/robots.txt"))
204
+ sitemap_future = executor.submit(self.check_file_exists, urljoin(base_url, "/sitemap.xml"))
205
+
206
+ robots_exists = robots_future.result()
207
+ sitemap_exists = sitemap_future.result()
208
+
209
+ result["details"]["robots_txt"] = robots_exists
210
+ result["details"]["sitemap_xml"] = sitemap_exists
211
+
212
+ if not robots_exists:
213
+ result["checks"].append({"type": "warning", "message": "robots.txt not found"})
214
+ result["suggestions"].append("Create a robots.txt file to guide search engines")
215
+ else:
216
+ result["checks"].append({"type": "good", "message": "robots.txt file exists"})
217
+
218
+ if not sitemap_exists:
219
+ result["checks"].append({"type": "warning", "message": "sitemap.xml not found"})
220
+ result["suggestions"].append("Add a sitemap.xml file for better crawling")
221
+ else:
222
+ result["checks"].append({"type": "good", "message": "sitemap.xml file exists"})
223
+
224
+ # Open Graph Tags
225
+ og_tags = {
226
+ "title": soup.find("meta", property="og:title") is not None,
227
+ "description": soup.find("meta", property="og:description") is not None,
228
+ "image": soup.find("meta", property="og:image") is not None
229
+ }
230
+
231
+ result["details"]["open_graph"] = og_tags
232
+
233
+ og_missing = [tag for tag, exists in og_tags.items() if not exists]
234
+ if og_missing:
235
+ result["checks"].append({"type": "warning", "message": f"Missing Open Graph tags: {', '.join(og_missing)}"})
236
+ result["suggestions"].append("Add Open Graph meta tags to improve sharing on social media")
237
+ else:
238
+ result["checks"].append({"type": "good", "message": "Open Graph meta tags are present"})
239
+
240
+ # Link analysis
241
+ links = soup.find_all("a", href=True)
242
+ internal_links = []
243
+ external_links = []
244
+
245
+ for link in links:
246
+ href = link.get('href', '')
247
+ if not href or href.startswith('#'):
248
+ continue
249
+
250
+ if href.startswith('/') or parsed_url.netloc in href:
251
+ internal_links.append(href)
252
+ elif href.startswith(('http://', 'https://')):
253
+ external_links.append(href)
254
+
255
+ result["details"]["links"] = {
256
+ "internal": len(internal_links),
257
+ "external": len(external_links),
258
+ "total": len(internal_links) + len(external_links)
259
+ }
260
+
261
+ result["checks"].append({"type": "info", "message": f"Found {len(internal_links)} internal and {len(external_links)} external links"})
262
+
263
+ if len(internal_links) < 2 and not (len(internal_links) == 0 and len(external_links) == 0):
264
+ result["suggestions"].append("Add more internal links to improve site structure")
265
+
266
+ # Text to HTML ratio analysis
267
+ html_size = len(content)
268
+ text = soup.get_text()
269
+ text_size = len(text)
270
+
271
+ if html_size > 0:
272
+ text_ratio = (text_size / html_size) * 100
273
+ else:
274
+ text_ratio = 0
275
+
276
+ result["details"]["content"] = {
277
+ "html_size": html_size,
278
+ "text_size": text_size,
279
+ "text_ratio": text_ratio
280
+ }
281
+
282
+ if text_ratio < 10:
283
+ result["checks"].append({"type": "warning", "message": f"Low text-to-HTML ratio: {text_ratio:.1f}%"})
284
+ result["suggestions"].append("Increase text content relative to HTML for better SEO")
285
+ else:
286
+ result["checks"].append({"type": "good", "message": f"Text-to-HTML ratio: {text_ratio:.1f}%"})
287
+
288
+ # Page speed (basic)
289
+ load_time = self.check_page_speed(final_url)
290
+ result["details"]["page_speed"] = load_time
291
+
292
+ if load_time:
293
+ if load_time > 2:
294
+ result["checks"].append({"type": "warning", "message": f"Slow page load time: {load_time:.2f} seconds"})
295
+ result["suggestions"].append("Optimize page speed by reducing file sizes and requests")
296
+ else:
297
+ result["checks"].append({"type": "good", "message": f"Page load time: {load_time:.2f} seconds"})
298
+
299
+ # Calculate overall score
300
+ scores = {"good": 10, "info": 5, "warning": 0, "error": -10}
301
+ total_points = sum(scores.get(check["type"], 0) for check in result["checks"])
302
+ max_score = 10 * sum(1 for check in result["checks"] if check["type"] in ["good", "error"])
303
+
304
+ if max_score > 0:
305
+ percentage_score = min(100, max(0, (total_points + max_score) / (2 * max_score) * 100))
306
+ else:
307
+ percentage_score = 50
308
+
309
+ result["score"] = round(percentage_score)
310
+
311
+ return result
312
+
313
+ def check_file_exists(self, url):
314
+ """Check if a file exists at the given URL"""
315
+ try:
316
+ response = requests.head(url, headers=self.headers, timeout=5)
317
+ return response.status_code == 200
318
+ except:
319
+ return False
320
+
321
+ def generate_chart(self, result):
322
+ """Generate SEO score chart data"""
323
+ if result["status"] == "error":
324
+ return None
325
+
326
+ categories = {
327
+ "title": result["details"]["title"]["status"] == "good",
328
+ "meta_description": result["details"]["meta_description"]["status"] == "good",
329
+ "headings": result["details"]["headings"]["h1"] == 1,
330
+ "https": result["details"]["https"],
331
+ "images": result["details"]["images"]["total"] == 0 or result["details"]["images"]["missing_alt"] == 0,
332
+ "robots_sitemap": result["details"]["robots_txt"] and result["details"]["sitemap_xml"],
333
+ "open_graph": all(result["details"]["open_graph"].values())
334
+ }
335
+
336
+ return categories
337
 
338
+ def format_result_html(result):
339
+ """Format the SEO result as HTML for display"""
340
+ if result["status"] == "error":
341
+ return f"""
342
+ <div style="padding: 20px; background-color: #ffebee; border-radius: 8px; margin-bottom: 20px;">
343
+ <h3 style="color: #c62828;">Error</h3>
344
+ <p>{result["message"]}</p>
345
+ </div>
346
+ """
347
+
348
+ # Calculate counts for each check type
349
+ check_counts = {"good": 0, "info": 0, "warning": 0, "error": 0}
350
+ for check in result["checks"]:
351
+ check_counts[check["type"]] = check_counts.get(check["type"], 0) + 1
352
+
353
+ # Build the HTML
354
+ html = f"""
355
+ <div style="font-family: Arial, sans-serif;">
356
+ <div style="display: flex; align-items: center; margin-bottom: 20px;">
357
+ <div style="width: 120px; height: 120px; position: relative; margin-right: 20px;">
358
+ <div style="position: absolute; width: 100%; height: 100%; border-radius: 50%; background: conic-gradient(
359
+ from 0deg,
360
+ #4caf50 0% {result["score"]}%,
361
+ #e0e0e0 {result["score"]}% 100%
362
+ );"></div>
363
+ <div style="position: absolute; top: 10px; left: 10px; right: 10px; bottom: 10px; background: white; border-radius: 50%; display: flex; align-items: center; justify-content: center; flex-direction: column;">
364
+ <span style="font-size: 28px; font-weight: bold;">{result["score"]}</span>
365
+ <span style="font-size: 12px;">SEO Score</span>
366
+ </div>
367
+ </div>
368
+ <div>
369
+ <h2 style="margin: 0 0 10px 0;">SEO Report for {result["url"]}</h2>
370
+ <div style="display: flex; flex-wrap: wrap; gap: 10px;">
371
+ <span style="background-color: #e8f5e9; color: #2e7d32; padding: 5px 10px; border-radius: 4px; font-size: 12px;">
372
+ ✓ {check_counts["good"]} Passed
373
+ </span>
374
+ <span style="background-color: #fff8e1; color: #f57c00; padding: 5px 10px; border-radius: 4px; font-size: 12px;">
375
+ ⚠ {check_counts["warning"]} Warnings
376
+ </span>
377
+ <span style="background-color: #ffebee; color: #c62828; padding: 5px 10px; border-radius: 4px; font-size: 12px;">
378
+ ✕ {check_counts["error"]} Errors
379
+ </span>
380
+ <span style="background-color: #e3f2fd; color: #1565c0; padding: 5px 10px; border-radius: 4px; font-size: 12px;">
381
+ ℹ {check_counts["info"]} Info
382
+ </span>
383
+ </div>
384
+ <div style="margin-top: 10px; color: #555; font-size: 13px;">
385
+ Generated on {datetime.now().strftime('%Y-%m-%d %H:%M')}
386
+ </div>
387
+ </div>
388
+ </div>
389
+
390
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin-bottom: 20px;">
391
+ <div style="background-color: #f5f5f5; border-radius: 8px; padding: 15px;">
392
+ <h3 style="margin-top: 0; color: #333;">Page Details</h3>
393
+ <table style="width: 100%; border-collapse: collapse;">
394
+ <tr>
395
+ <td style="padding: 8px 0; border-bottom: 1px solid #ddd; width: 40%; color: #777;">Title</td>
396
+ <td style="padding: 8px 0; border-bottom: 1px solid #ddd;">
397
+ {result["details"]["title"]["content"] or "Missing"}
398
+ <div style="font-size: 12px; color: #777; margin-top: 4px;">
399
+ Length: {result["details"]["title"]["length"]} chars
400
+ {" ✓" if result["details"]["title"]["status"] == "good" else " ⚠"}
401
+ </div>
402
+ </td>
403
+ </tr>
404
+ <tr>
405
+ <td style="padding: 8px 0; border-bottom: 1px solid #ddd; color: #777;">Meta Description</td>
406
+ <td style="padding: 8px 0; border-bottom: 1px solid #ddd;">
407
+ {result["details"]["meta_description"]["content"] or "Missing"}
408
+ <div style="font-size: 12px; color: #777; margin-top: 4px;">
409
+ Length: {result["details"]["meta_description"]["length"]} chars
410
+ {" ✓" if result["details"]["meta_description"]["status"] == "good" else " ⚠"}
411
+ </div>
412
+ </td>
413
+ </tr>
414
+ <tr>
415
+ <td style="padding: 8px 0; border-bottom: 1px solid #ddd; color: #777;">Headings</td>
416
+ <td style="padding: 8px 0; border-bottom: 1px solid #ddd;">
417
+ H1: {result["details"]["headings"]["h1"]},
418
+ H2: {result["details"]["headings"]["h2"]},
419
+ H3: {result["details"]["headings"]["h3"]}
420
+ </td>
421
+ </tr>
422
+ <tr>
423
+ <td style="padding: 8px 0; border-bottom: 1px solid #ddd; color: #777;">Security & Files</td>
424
+ <td style="padding: 8px 0; border-bottom: 1px solid #ddd;">
425
+ HTTPS: {"✓" if result["details"]["https"] else "✕"},
426
+ robots.txt: {"✓" if result["details"]["robots_txt"] else "✕"},
427
+ sitemap.xml: {"✓" if result["details"]["sitemap_xml"] else "✕"}
428
+ </td>
429
+ </tr>
430
+ <tr>
431
+ <td style="padding: 8px 0; border-bottom: 1px solid #ddd; color: #777;">Links</td>
432
+ <td style="padding: 8px 0; border-bottom: 1px solid #ddd;">
433
+ Internal: {result["details"]["links"]["internal"]},
434
+ External: {result["details"]["links"]["external"]}
435
+ </td>
436
+ </tr>
437
+ <tr>
438
+ <td style="padding: 8px 0; color: #777;">Images</td>
439
+ <td style="padding: 8px 0;">
440
+ Total: {result["details"]["images"]["total"]},
441
+ Missing alt: {result["details"]["images"]["missing_alt"]}
442
+ </td>
443
+ </tr>
444
+ </table>
445
+ </div>
446
+
447
+ <div style="background-color: #f5f5f5; border-radius: 8px; padding: 15px;">
448
+ <h3 style="margin-top: 0; color: #333;">Top Potential Keywords</h3>
449
+ <div style="max-height: 200px; overflow-y: auto;">
450
+ <table style="width: 100%; border-collapse: collapse;">
451
+ <tr style="background-color: #eee;">
452
+ <th style="padding: 8px; text-align: left; border-bottom: 1px solid #ddd;">Keyword</th>
453
+ <th style="padding: 8px; text-align: right; border-bottom: 1px solid #ddd;">Frequency</th>
454
+ </tr>
455
+ """
456
+
457
+ # Add keyword rows
458
+ for keyword, count in result["keywords"]:
459
+ html += f"""
460
+ <tr>
461
+ <td style="padding: 8px; border-bottom: 1px solid #ddd;">{keyword}</td>
462
+ <td style="padding: 8px; border-bottom: 1px solid #ddd; text-align: right;">{count}</td>
463
+ </tr>
464
+ """
465
+
466
+ html += """
467
+ </table>
468
+ </div>
469
+ </div>
470
+ </div>
471
+
472
+ <div style="margin-bottom: 20px;">
473
+ <h3 style="color: #333;">SEO Checks</h3>
474
+ <div style="display: grid; grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); gap: 15px;">
475
+ """
476
+
477
+ # Add check cards
478
+ icons = {
479
+ "good": "✓",
480
+ "info": "ℹ",
481
+ "warning": "⚠",
482
+ "error": "✕"
483
+ }
484
+
485
+ bg_colors = {
486
+ "good": "#e8f5e9",
487
+ "info": "#e3f2fd",
488
+ "warning": "#fff8e1",
489
+ "error": "#ffebee"
490
+ }
491
+
492
+ text_colors = {
493
+ "good": "#2e7d32",
494
+ "info": "#1565c0",
495
+ "warning": "#f57c00",
496
+ "error": "#c62828"
497
+ }
498
+
499
+ for check in result["checks"]:
500
+ html += f"""
501
+ <div style="background-color: {bg_colors[check["type"]]}; border-radius: 8px; padding: 12px; position: relative;">
502
+ <div style="position: absolute; top: 12px; right: 12px; font-size: 18px;">
503
+ {icons[check["type"]]}
504
+ </div>
505
+ <div style="color: {text_colors[check["type"]]}; margin-bottom: 5px; font-weight: bold;">
506
+ {check["type"].capitalize()}
507
+ </div>
508
+ <div style="color: #333;">
509
+ {check["message"]}
510
+ </div>
511
+ </div>
512
+ """
513
+
514
+ html += """
515
+ </div>
516
+ </div>
517
+
518
+ <div style="background-color: #f5f5f5; border-radius: 8px; padding: 15px; margin-bottom: 20px;">
519
+ <h3 style="margin-top: 0; color: #333;">Improvement Suggestions</h3>
520
+ <ul style="margin: 0; padding-left: 20px;">
521
+ """
522
+
523
+ # Add suggestions
524
+ for suggestion in result["suggestions"]:
525
+ html += f"""
526
+ <li style="margin-bottom: 8px;">{suggestion}</li>
527
+ """
528
+
529
+ html += """
530
+ </ul>
531
+ </div>
532
+ </div>
533
+ """
534
+
535
+ return html
536
 
537
+ def seo_analysis(url):
538
+ """Run the SEO analysis and return results in structured format"""
539
+ checker = SEOChecker()
540
+ result = checker.analyze_seo(url)
541
+
542
+ if result["status"] == "error":
543
+ return result["message"], "", ""
544
+
545
+ # Format text report
546
+ text_report = f"SEO Score: {result['score']}/100 for {result['url']}\n\n"
547
+ text_report += "--- SEO CHECKS ---\n"
548
+
549
+ for check in result["checks"]:
550
+ icon = "✓" if check["type"] == "good" else "ℹ" if check["type"] == "info" else "⚠" if check["type"] == "warning" else "✕"
551
+ text_report += f"{icon} {check['message']}\n"
552
+
553
+ text_report += "\n--- SUGGESTIONS ---\n"
554
+ for i, suggestion in enumerate(result["suggestions"], 1):
555
+ text_report += f"{i}. {suggestion}\n"
556
+
557
+ # Format HTML report
558
+ html_report = format_result_html(result)
559
+
560
+ # Generate chart data
561
+ chart_data = checker.generate_chart(result)
562
+ chart_html = ""
563
+
564
+ if chart_data:
565
+ # Create simple chart
566
+ categories = list(chart_data.keys())
567
+ values = [int(v) * 100 for v in chart_data.values()]
568
+
569
+ plt.figure(figsize=(10, 6))
570
+ colors = ['#4caf50' if v == 100 else '#f57c00' for v in values]
571
+
572
+ y_pos = np.arange(len(categories))
573
+ plt.barh(y_pos, values, color=colors)
574
+ plt.yticks(y_pos, [c.replace('_', ' ').title() for c in categories])
575
+ plt.xlim(0, 100)
576
+ plt.title('SEO Category Performance')
577
+ plt.xlabel('Score (%)')
578
+
579
+ for i, v in enumerate(values):
580
+ plt.text(v + 5, i, f"{v}%" if v > 0 else "0%", va='center')
581
+
582
+ # Save to file
583
+ chart_file = "seo_chart.png"
584
+ plt.tight_layout()
585
+ plt.savefig(chart_file)
586
+ plt.close()
587
+
588
+ # Create HTML image reference
589
+ chart_html = f'<img src="file={chart_file}" alt="SEO Performance Chart" style="width:100%;max-width:800px;">'
590
+
591
+ return text_report, html_report, chart_html
592
 
593
+ def generate_example_report():
594
+ """Generate an example report for the demo"""
595
+ checker = SEOChecker()
596
+ sample_urls = [
597
+ "https://example.com",
598
+ "https://websitelayout.net",
599
+ "https://yahoo.com"
600
+ ]
601
+
602
+ # Select a random sample URL
603
+ import random
604
+ sample_url = random.choice(sample_urls)
605
+
606
+ # Run analysis
607
+ result = checker.analyze_seo(sample_url)
608
+
609
+ if result["status"] == "error":
610
+ return f"Error analyzing {sample_url}: {result.get('message', 'Unknown error')}", "", ""
611
+
612
+ # Format text report
613
+ text_report = f"SAMPLE REPORT - URL: {sample_url}\n\n"
614
+ text_report += f"SEO Score: {result['score']}/100\n\n"
615
+ text_report += "--- KEY FINDINGS ---\n"
616
+
617
+ for check in result["checks"][:5]: # Just show top 5 findings
618
+ icon = "✓" if check["type"] == "good" else "ℹ" if check["type"] == "info" else "⚠" if check["type"] == "warning" else "✕"
619
+ text_report += f"{icon} {check['message']}\n"
620
+
621
+ text_report += "\n(This is an example report - enter your own URL for a full analysis)"
622
+
623
+ # HTML report
624
+ html_report = format_result_html(result)
625
+ html_report += '<div style="background-color: #e3f2fd; color: #0d47a1; padding: 10px; border-radius: 4px; margin-top: 20px; text-align: center;">This is an example report - enter your own URL for a full analysis</div>'
626
+
627
+ # Generate chart data
628
+ chart_data = checker.generate_chart(result)
629
+ chart_html = ""
630
+
631
+ return text_report, html_report, chart_html
632
 
633
+ # Set up the Gradio interface
634
+ def create_interface():
635
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal"), css="""
636
+ .container { max-width: 1200px; margin: 0 auto; }
637
+ .header { margin-bottom: 20px; text-align: center; }
638
+ .header h1 { margin-bottom: 5px; color: #1e88e5; }
639
+ .header p { color: #555; }
640
+ .footer { margin-top: 30px; text-align: center; color: #777; font-size: 12px; }
641
+ .score-box { display: flex; align-items: center; gap: 20px; padding: 20px; margin-bottom: 20px; }
642
+ .url-input { margin-bottom: 20px; }
643
+ .report-container { border-radius: 10px; overflow: hidden; }
644
+ """) as demo:
645
+ gr.HTML("""
646
+ <div class="header">
647
+ <h1>Advanced SEO Website Analyzer</h1>
648
+ <p>Perform a comprehensive SEO audit of any website with detailed insights and recommendations</p>
649
+ </div>
650
+ """)
651
+
652
+ with gr.Row(equal_height=True):
653
+ with gr.Column():
654
+ url_input = gr.Textbox(
655
+ label="Website URL to Analyze",
656
+ placeholder="Enter URL (e.g., example.com or https://example.com)",
657
+ scale=3
658
+ )
659
+
660
+ with gr.Row():
661
+ analyze_btn = gr.Button("Analyze Website", variant="primary", scale=2)
662
+ example_btn = gr.Button("See Example Report", scale=1)
663
+
664
+ text_output = gr.Textbox(
665
+ label="Text Summary",
666
+ placeholder="SEO analysis results will appear here...",
667
+ lines=10,
668
+ max_lines=20
669
+ )
670
+
671
+ with gr.Column():
672
+ html_output = gr.HTML(
673
+ label="Visual Report",
674
+ value='<div style="height: 400px; display: flex; justify-content: center; align-items: center; background-color: #f5f5f5; border-radius: 8px;"><p style="color: #777;">Enter a URL and click "Analyze Website" to see a detailed report here.</p></div>'
675
+ )
676
+
677
+ with gr.Row():
678
+ chart_output = gr.HTML(
679
+ label="Performance Chart",
680
+ value=""
681
+ )
682
+
683
+ gr.HTML("""
684
+ <div class="footer">
685
+ <p>© 2025 SEO Website Analyzer | Provides quick, comprehensive SEO analysis</p>
686
+ </div>
687
+ """)
688
+
689
+ # Connect the components
690
+ analyze_btn.click(
691
+ fn=seo_analysis,
692
+ inputs=url_input,
693
+ outputs=[text_output, html_output, chart_output]
694
+ )
695
+
696
+ example_btn.click(
697
+ fn=generate_example_report,
698
+ inputs=[],
699
+ outputs=[text_output, html_output, chart_output]
700
+ )
701
+
702
+ return demo
703
 
704
+ # Run the app
705
+ if __name__ == "__main__":
706
+ demo = create_interface()
707
+ demo.launch(share=True)