Omnibus commited on
Commit
f586a70
1 Parent(s): 26f9624

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -21
app.py CHANGED
@@ -4,38 +4,21 @@ import requests
4
  import bs4
5
  import lxml
6
 
7
- def find_it1(url,q=None,num=None):
8
- out = []
9
  rawp = []
10
  source = urllib.request.urlopen(url).read()
11
  soup = bs4.BeautifulSoup(source,'lxml')
12
  # title of the page
13
  print(soup.title)
14
-
15
  # get attributes:
16
  print(soup.title.name)
17
-
18
  # get values:
19
  print(soup.title.string)
20
-
21
  # beginning navigation:
22
  print(soup.title.parent.name)
23
-
24
  # getting specific values:
25
  print(soup.p)
26
  print(soup.find_all('p'))
27
- for p in soup.find_all(f'{q}'):
28
-
29
- #print(paragraph.string)
30
- #print(str(paragraph.text))
31
- #out.append(f'{p}\n')
32
- #out.append(f'{p.string}\n')
33
- #out.append(f'{p.text}\n')
34
- #out.append("\n")
35
- out.append(p)
36
- out.append(p.string)
37
- out.append(p.text)
38
- #print([str(tag) for tag in soup.find_all()])
39
  for tag in soup.find_all():
40
  rawp.append(tag)
41
  for url in soup.find_all('a'):
@@ -44,7 +27,25 @@ def find_it1(url,q=None,num=None):
44
  print(soup.get_text())
45
 
46
 
47
- return rawp, out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  def find_it2(url):
50
  response = requests.get(url,a1=None,q2=None,q3=None)
@@ -63,10 +64,15 @@ with gr.Blocks() as app:
63
  inp = gr.Textbox()
64
  q = gr.Textbox(value="p")
65
  num = gr.Number(value=1)
66
- btn = gr.Button()
 
 
67
  with gr.Row():
68
  rawp = gr.JSON()
69
  outp = gr.JSON()
70
- btn.click(find_it1,[inp,q,num],[rawp,outp])
 
 
 
71
  app.launch()
72
 
 
4
  import bs4
5
  import lxml
6
 
7
+ def find_all(url,q=None,num=None):
 
8
  rawp = []
9
  source = urllib.request.urlopen(url).read()
10
  soup = bs4.BeautifulSoup(source,'lxml')
11
  # title of the page
12
  print(soup.title)
 
13
  # get attributes:
14
  print(soup.title.name)
 
15
  # get values:
16
  print(soup.title.string)
 
17
  # beginning navigation:
18
  print(soup.title.parent.name)
 
19
  # getting specific values:
20
  print(soup.p)
21
  print(soup.find_all('p'))
 
 
 
 
 
 
 
 
 
 
 
 
22
  for tag in soup.find_all():
23
  rawp.append(tag)
24
  for url in soup.find_all('a'):
 
27
  print(soup.get_text())
28
 
29
 
30
+ return rawp
31
+
32
+
33
+ def find_it(url,q=None,num=None):
34
+ out = []
35
+ source = urllib.request.urlopen(url).read()
36
+ soup = bs4.BeautifulSoup(source,'lxml')
37
+ for p in soup.find_all(f'{q}'):
38
+ out.append(p)
39
+ out.append(p.string)
40
+ out.append(p.parent)
41
+ for tag in soup.find_all():
42
+ rawp.append(tag.name)
43
+ rawp.append(tag.string)
44
+ for url in soup.find_all('a'):
45
+ print(url.get('href'))
46
+
47
+ print(soup.get_text())
48
+ return out
49
 
50
  def find_it2(url):
51
  response = requests.get(url,a1=None,q2=None,q3=None)
 
64
  inp = gr.Textbox()
65
  q = gr.Textbox(value="p")
66
  num = gr.Number(value=1)
67
+ with gr.Row():
68
+ all_btn = gr.Button("Load")
69
+ find_btn = gr.Button("Find")
70
  with gr.Row():
71
  rawp = gr.JSON()
72
  outp = gr.JSON()
73
+
74
+ all_btn.click(find_all,[inp,q,num],[rawp])
75
+ find_btn.click(find_it,[inp,q,num],[outp])
76
+
77
  app.launch()
78