Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,20 +1,13 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import requests
|
3 |
from bs4 import BeautifulSoup
|
4 |
-
import re
|
5 |
-
from urllib.parse import urlparse
|
6 |
from itertools import combinations
|
|
|
|
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
posiciones_dominio = []
|
11 |
-
posiciones_dominio_exacto = []
|
12 |
-
url_objetivo = dominio
|
13 |
-
dominio_objetivo = urlparse(url_objetivo).netloc
|
14 |
-
|
15 |
-
table_html = "<summary>Ver Resultados</summary><table border='1'><tr><th>Posición</th><th>Título</th><th>URL</th></tr>"
|
16 |
|
17 |
-
|
|
|
18 |
|
19 |
for start in range(0, num_results, 10):
|
20 |
url = f"https://www.google.com/search?q={query}&hl={hl}&start={start}"
|
@@ -26,69 +19,81 @@ def buscar_google(query, dominio, hl='es', num_results=100):
|
|
26 |
search_results = soup.find_all('div', attrs={'class': 'tF2Cxc'})
|
27 |
all_results.extend(search_results)
|
28 |
|
|
|
|
|
29 |
for i, result in enumerate(all_results[:num_results]):
|
30 |
header = result.find('h3')
|
31 |
header = header.text if header else "Sin título"
|
32 |
link = result.find('a', href=True)['href']
|
33 |
-
link_clean =
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
mensaje = ""
|
50 |
-
emoji = ""
|
51 |
-
if posiciones_dominio:
|
52 |
-
first_position = posiciones_dominio[0]
|
53 |
-
if first_position == 1:
|
54 |
-
emoji = f"{posiciones_dominio} 🥇"
|
55 |
-
elif first_position <= 3:
|
56 |
-
emoji = f"{posiciones_dominio} 😎"
|
57 |
-
elif first_position <= 5:
|
58 |
-
emoji = f"{posiciones_dominio} 😆"
|
59 |
-
elif first_position <= 10:
|
60 |
-
emoji = f"{posiciones_dominio} 🙂"
|
61 |
-
elif first_position <= 100:
|
62 |
-
emoji = f"{posiciones_dominio} 😐"
|
63 |
-
else:
|
64 |
-
emoji = f"No está en el top 100 😭"
|
65 |
-
|
66 |
-
if len(posiciones_dominio) > 1 and 1 in posiciones_dominio:
|
67 |
-
mensaje = f"Parasitación SEO: {url_objetivo} se encuentra en las posiciones {posiciones_dominio}"
|
68 |
-
elif len(posiciones_dominio) > 1:
|
69 |
-
mensaje = f"Canibalización: {url_objetivo} se encuentra en las posiciones {posiciones_dominio}"
|
70 |
-
elif len(posiciones_dominio) == 1 and len(posiciones_dominio_exacto) == 0:
|
71 |
-
mensaje = f"Canibalización: URL diferente del dominio en la posición {posiciones_dominio[0]}"
|
72 |
-
elif len(posiciones_dominio) == 1:
|
73 |
-
mensaje = f"Sin canibalización: {url_objetivo} se encuentra en la posición {posiciones_dominio[0]}"
|
74 |
else:
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from bs4 import BeautifulSoup
|
|
|
|
|
2 |
from itertools import combinations
|
3 |
+
from IPython.display import display, HTML
|
4 |
+
import requests
|
5 |
|
6 |
+
# Paleta de colores
|
7 |
+
color_palette = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#bcbd22', '#17becf', '#ff6666', '#4dff4d', '#6666ff', '#ffcc00', '#993399', '#00cc99', '#ff5050', '#33adff', '#ff9966']
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
def buscar_google(query, hl='es', num_results=10):
|
10 |
+
all_results = []
|
11 |
|
12 |
for start in range(0, num_results, 10):
|
13 |
url = f"https://www.google.com/search?q={query}&hl={hl}&start={start}"
|
|
|
19 |
search_results = soup.find_all('div', attrs={'class': 'tF2Cxc'})
|
20 |
all_results.extend(search_results)
|
21 |
|
22 |
+
# Obtener la lista de títulos y URLs de los resultados de búsqueda
|
23 |
+
serp_data = []
|
24 |
for i, result in enumerate(all_results[:num_results]):
|
25 |
header = result.find('h3')
|
26 |
header = header.text if header else "Sin título"
|
27 |
link = result.find('a', href=True)['href']
|
28 |
+
link_clean = link.split('&')[0].split('=')[1] if 'url?q=' in link else link
|
29 |
+
serp_data.append({"Posición": i + 1, "Título": header, "URL": link_clean})
|
30 |
+
|
31 |
+
return serp_data
|
32 |
+
|
33 |
+
def calcular_coincidencias(serp1, serp2):
|
34 |
+
urls_serp1 = {entry["URL"] for entry in serp1}
|
35 |
+
urls_serp2 = {entry["URL"] for entry in serp2}
|
36 |
+
common_urls = urls_serp1.intersection(urls_serp2)
|
37 |
+
|
38 |
+
# Verificar si los dos primeros resultados coinciden
|
39 |
+
if serp1 and serp2 and serp1[0]["URL"] == serp2[0]["URL"]:
|
40 |
+
return min(100, round(len(common_urls) / len(urls_serp1) * 100 + 5))
|
41 |
+
elif serp1 and serp2 and serp1[1]["URL"] == serp2[1]["URL"]:
|
42 |
+
return min(100, round(len(common_urls) / len(urls_serp1) * 100 + 3))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
else:
|
44 |
+
return min(100, round(len(common_urls) / len(urls_serp1) * 100))
|
45 |
+
|
46 |
+
def generar_html_con_colores(serp_results, color_palette):
|
47 |
+
html_table = "<table border='1'><tr><th>Posición</th>"
|
48 |
+
|
49 |
+
for keyword in serp_results:
|
50 |
+
html_table += f"<th>{keyword}</th>"
|
51 |
+
html_table += "</tr>"
|
52 |
+
|
53 |
+
# Crear un diccionario para rastrear los colores asignados a cada URL
|
54 |
+
url_colors = {}
|
55 |
+
|
56 |
+
# Usar la longitud máxima
|
57 |
+
max_length = max(len(serp_results[keyword]) for keyword in serp_results)
|
58 |
+
for i in range(max_length):
|
59 |
+
row_values = [str(i + 1)]
|
60 |
+
|
61 |
+
for keyword in serp_results:
|
62 |
+
if i < len(serp_results[keyword]):
|
63 |
+
serp_data_entry = serp_results[keyword][i]
|
64 |
+
url = serp_data_entry["URL"]
|
65 |
+
|
66 |
+
# Asignar un color solo si la URL se repite
|
67 |
+
if sum(1 for s in serp_results.values() for entry in s if entry["URL"] == url) > 1:
|
68 |
+
if url not in url_colors:
|
69 |
+
url_colors[url] = color_palette[len(url_colors) % len(color_palette)]
|
70 |
+
color = url_colors[url]
|
71 |
+
row_values.extend([f"<span style='color: {color}'>{url}</span>"])
|
72 |
+
else:
|
73 |
+
row_values.extend([url]) # Dejar sin color si la URL no se repite
|
74 |
+
else:
|
75 |
+
row_values.extend([""]) # Si no hay resultado, llenar con celdas vacías
|
76 |
+
|
77 |
+
html_table += "<tr>" + "".join([f"<td>{value}</td>" for value in row_values]) + "</tr>"
|
78 |
+
|
79 |
+
html_table += "</table>"
|
80 |
+
return html_table
|
81 |
+
|
82 |
+
# Lista de keywords
|
83 |
+
keywords = ["lavanderia ponferrada", "lavanderia autoservicio ponferrada", "lavanderia bierzo"]
|
84 |
+
|
85 |
+
# Realizar la búsqueda y obtener los resultados para cada keyword
|
86 |
+
serp_results = {}
|
87 |
+
for keyword in keywords:
|
88 |
+
serp_results[keyword] = buscar_google(keyword)
|
89 |
+
|
90 |
+
# Calcular la coincidencia entre las URLs
|
91 |
+
coincidencia_matrix = {}
|
92 |
+
for keyword1, keyword2 in combinations(keywords, 2):
|
93 |
+
coincidencia = calcular_coincidencias(serp_results[keyword1], serp_results[keyword2])
|
94 |
+
coincidencia_matrix[f"{keyword1} & {keyword2}"] = coincidencia
|
95 |
+
print(f"{keyword1} & {keyword2}: {coincidencia}%")
|
96 |
+
|
97 |
+
# Mostrar la tabla HTML con colores
|
98 |
+
html_table_with_colors = generar_html_con_colores(serp_results, color_palette)
|
99 |
+
display(HTML(html_table_with_colors))
|