Omnibus commited on
Commit
20cc82f
1 Parent(s): 07bc596

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -48,7 +48,7 @@ def parse_action(string: str):
48
 
49
  VERBOSE = True
50
  MAX_HISTORY = 100
51
- MAX_DATA = 10000
52
 
53
  def format_prompt(message, history):
54
  prompt = "<s>"
@@ -289,7 +289,8 @@ def find_all(purpose,task,history, url):
289
  print (f"trying URL:: {url}")
290
  try:
291
  if url != "" and url != None:
292
- rawp = []
 
293
  source = requests.get(url)
294
  #source = urllib.request.urlopen(url).read()
295
  soup = bs4.BeautifulSoup(source.content,'lxml')
@@ -303,18 +304,22 @@ def find_all(purpose,task,history, url):
303
  print(soup.title.parent.name)
304
  #rawp.append([tag.name for tag in soup.find_all()] )
305
  print([tag.name for tag in soup.find_all()])
306
- rawp=soup
 
 
 
 
307
  c=0
308
- rl = len(rawp)
309
- print(rl)
310
- for i in str(rawp):
311
- if i == " " or i==",":
312
- c +=1
313
-
314
- print (c)
315
  if rl > MAX_DATA:
316
  print("compressing...")
317
- rawp = compress_data(rl,purpose,task,rawp)
318
  print (rawp)
319
  history += "observation: the search results are:\n {}\n".format(rawp)
320
  task = "complete?"
 
48
 
49
  VERBOSE = True
50
  MAX_HISTORY = 100
51
+ MAX_DATA = 1000
52
 
53
  def format_prompt(message, history):
54
  prompt = "<s>"
 
289
  print (f"trying URL:: {url}")
290
  try:
291
  if url != "" and url != None:
292
+ #rawp = []
293
+ out = []
294
  source = requests.get(url)
295
  #source = urllib.request.urlopen(url).read()
296
  soup = bs4.BeautifulSoup(source.content,'lxml')
 
304
  print(soup.title.parent.name)
305
  #rawp.append([tag.name for tag in soup.find_all()] )
306
  print([tag.name for tag in soup.find_all()])
307
+ rawp=(f'RAW TEXT RETURNED:\n*********\n{soup.text}\n*********\n')
308
+ out.append(rawp)
309
+ q=("a","p","span","content","article")
310
+ for p in soup.find_all(f'{q}'):
311
+ out.append([{q:p.string,"additional":z,"parent":p.parent.name,"previous":[b for b in p.previous],"first-child":[b.name for b in p.children],"content":p}])
312
  c=0
313
+ rl = len(out)
314
+ print(f'rl:: {rl}')
315
+ for ea in out:
316
+ for i in str(ea):
317
+ if i == " " or i==",":
318
+ c +=1
319
+ print (f'c:: {c}')
320
  if rl > MAX_DATA:
321
  print("compressing...")
322
+ rawp = compress_data(c,purpose,task,out)
323
  print (rawp)
324
  history += "observation: the search results are:\n {}\n".format(rawp)
325
  task = "complete?"