import gradio as gr import urllib.request import requests import bs4 import lxml def find_it1(url,q=None,num=None): out = [] rawp = [] source = urllib.request.urlopen(url).read() soup = bs4.BeautifulSoup(source,'lxml') # title of the page print(soup.title) # get attributes: print(soup.title.name) # get values: print(soup.title.string) # beginning navigation: print(soup.title.parent.name) # getting specific values: print(soup.p) print(soup.find_all('p')) for p in soup.find_all(f'{q}'): #print(paragraph.string) #print(str(paragraph.text)) out.append(f'{p}\n') out.append(f'{p.string}\n') out.append(f'{p.text}\n') out.append("\n") #print([str(tag) for tag in soup.find_all()]) for tag in soup.find_all(): rawp.append(f'{tag}\n') for url in soup.find_all('a'): print(url.get('href')) print(soup.get_text()) return rawp, out def find_it2(url): response = requests.get(url,a1=None,q2=None,q3=None) try: response.raise_for_status() soup = BeautifulSoup(response.content, 'lxml') out = 'URL Links:\n'.join([p.text for p in soup.find_all('a')]) return out except Exception as e: print (e) return e with gr.Blocks() as app: with gr.Row(): inp = gr.Textbox() q = gr.Textbox(value="p") num = gr.Number(value=1) btn = gr.Button() with gr.Row(): rawp = gr.Textbox() outp = gr.JSON() btn.click(find_it1,[inp,q,num],[rawp,outp]) app.launch()