Spaces:
Runtime error
Runtime error
File size: 2,181 Bytes
d6afb45 7ee1b98 d6afb45 f586a70 56e3a34 d6afb45 b878468 d6afb45 17502e7 c0e818a 39c8f59 43954cf 17502e7 5bef3e9 43954cf a7b88de 43954cf d6afb45 6005136 17502e7 d6afb45 f586a70 39c8f59 f96a4c2 43954cf f96a4c2 8eb0cc4 e832cec f586a70 6005136 d6afb45 56e3a34 f586a70 56e3a34 26f9624 792d4ad f586a70 176890c d6afb45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import gradio as gr
import urllib.request
import requests
import bs4
import lxml
def find_all(url,q=None,num=None):
rawp = []
source = urllib.request.urlopen(url).read()
soup = bs4.BeautifulSoup(source,'lxml')
# title of the page
print(soup.title)
# get attributes:
print(soup.title.name)
# get values:
print(soup.title.string)
# beginning navigation:
print(soup.title.parent.name)
# getting specific values:
#print(soup.p)
#print(soup.find_all('p'))
for tag in soup.find_all():
print(tag.findChildren("a" , recursive=False))
try:
#n = tag.get(tag.string)
rawp.append({tag.name:tag.string,"parent":tag.parent.name})
except Exception as e:
print (e)
rawp.append({tag.name:tag.string})
#rawp.append(tag.string)
for url in soup.find_all('a'):
print(url.get('href'))
#print(soup.get_text())
return rawp
def find_it(url,q=None,num=None):
out = []
source = urllib.request.urlopen(url).read()
soup = bs4.BeautifulSoup(source,'lxml')
for p in soup.find_all(f'{q}'):
print(p.findChildren())
#out.append(p)
out.append([{q:p.string,"parent":p.parent.name}])
#out.append(p.parent.name)
for url in soup.find_all('a'):
print(url.get('href'))
print(soup.get_text())
return out
def find_it2(url):
response = requests.get(url,a1=None,q2=None,q3=None)
try:
response.raise_for_status()
soup = BeautifulSoup(response.content, 'lxml')
out = 'URL Links:\n'.join([p.text for p in soup.find_all('a')])
return out
except Exception as e:
print (e)
return e
with gr.Blocks() as app:
with gr.Row():
inp = gr.Textbox()
q = gr.Textbox(value="p")
num = gr.Number(value=1)
with gr.Row():
all_btn = gr.Button("Load")
find_btn = gr.Button("Find")
with gr.Row():
rawp = gr.JSON()
outp = gr.JSON()
all_btn.click(find_all,[inp,q,num],[rawp])
find_btn.click(find_it,[inp,q,num],[outp])
app.launch()
|