Find-it-Auto

Runtime error

File size: 11,077 Bytes

d6afb45
 
7ee1b98
d6afb45
 
2a7d1fa
 
 
 
 
46d853f
2a7d1fa
 
 
 
 
57c61e4
2a7d1fa
 
 
 
 
 
 
 
 
 
 
 
 
 
5039147
 
2a7d1fa
 
5039147
2a7d1fa
5039147
 
2a7d1fa
5039147
 
 
 
2a7d1fa
 
 
 
d513153
2a7d1fa
f4c1a18
2a7d1fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29fe941
 
2a7d1fa
 
 
 
 
 
 
 
 
 
 
f7222e9
2a7d1fa
 
 
 
 
 
 
f4c1a18
2a7d1fa
 
 
 
 
 
 
 
 
 
 
f4c1a18
2a7d1fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4c1a18
2a7d1fa
9a97394
2a7d1fa
 
 
 
 
f4c1a18
2a7d1fa
 
 
 
 
 
 
 
 
f4c1a18
2a7d1fa
 
 
 
f4c1a18
 
 
2a7d1fa
f4c1a18
2a7d1fa
 
 
 
 
 
 
 
 
9a97394
2a7d1fa
 
 
 
 
 
 
 
 
f4c1a18
2a7d1fa
267d21f
f4c1a18
9a97394
2a7d1fa
 
 
 
 
 
f4c1a18
2a7d1fa
 
 
 
 
f4c1a18
 
2a7d1fa
6a02a37
f4c1a18
 
2a7d1fa
f4c1a18
 
2a7d1fa
d513153
 
 
f4c1a18
2a7d1fa
 
f4c1a18
2a7d1fa
 
 
 
 
 
 
 
 
 
f4c1a18
2a7d1fa
 
 
 
 
 
 
 
 
 
f4c1a18
2a7d1fa
8832566
df4b728
2a7d1fa
fc6479b
20cc82f
df4b728
9a97394
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4c1a18
9a97394
ec90841
9a97394
 
2a7d1fa
f4c1a18
 
2a7d1fa
 
455d65b
f4c1a18
2a7d1fa
 
 
 
f4c1a18
 
f586a70
 
2a7d1fa
 
e1bf925
 
 
 
dc21b34
e1bf925
 
 
f4c1a18
 
e1bf925
 
f4c1a18
e1bf925
 
 
 
 
 
 
 
 
 
5d28c4c
f4c1a18
e1bf925
f4c1a18
e1bf925
f4c1a18
e1bf925
 
9a97394
e1bf925
f4c1a18
 
 
 
 
 
 
e1bf925
 
 
 
 
 
 
 
 
 
 
f4c1a18
e1bf925
 
 
 
 
f4c1a18
e1bf925
9a97394
 
 
 
 
e1bf925
f4c1a18
 
e1bf925
 
 
 
2a7d1fa
f4c1a18
3996b5c
f4c1a18
 
2a7d1fa
 
d6afb45
f4c1a18
2a7d1fa
c5ef786
2a7d1fa
 
 
f4c1a18
2a7d1fa
d6afb45
 
f6ad611
 
 
 
 
 
f586a70
 
 
56e3a34
26f9624
792d4ad
4551e44
f586a70
 
4551e44
f586a70
176890c
f4c1a18

import gradio as gr
import urllib.request
import requests
import bs4
import lxml
import os
#import subprocess
from huggingface_hub import InferenceClient,HfApi
import random
import json
import datetime
#from query import tasks
from prompts import (
    FINDER,
    COMPRESS_HISTORY_PROMPT,
    COMPRESS_DATA_PROMPT,
    COMPRESS_DATA_PROMPT_SMALL,
    LOG_PROMPT,
    LOG_RESPONSE,
    PREFIX,
    TASK_PROMPT,
)
api=HfApi()



client = InferenceClient(
    "mistralai/Mixtral-8x7B-Instruct-v0.1"
)

def parse_action(string: str):
    print("PARSING:")
    print(string)
    assert string.startswith("action:")
    idx = string.find("action_input=")
    print(idx)
    if idx == -1:
        print ("idx == -1")
        print (string[8:])
        return string[8:], None

    print ("last return:")
    print (string[8 : idx - 1])
    print (string[idx + 13 :].strip("'").strip('"'))
    return string[8 : idx - 1], string[idx + 13 :].strip("'").strip('"')



VERBOSE = True
MAX_HISTORY = 100
MAX_DATA = 20000

def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt


def run_gpt(
    prompt_template,
    stop_tokens,
    max_tokens,
    seed,
    purpose,
    **prompt_kwargs,
):
    timestamp=datetime.datetime.now()

    print(seed)
    generate_kwargs = dict(
        temperature=0.9,
        max_new_tokens=max_tokens,
        top_p=0.95,
        repetition_penalty=1.0,
        do_sample=True,
        seed=seed,
    )
    
    content = PREFIX.format(
        timestamp=timestamp,
        purpose=purpose,
    ) + prompt_template.format(**prompt_kwargs)
    if VERBOSE:
        print(LOG_PROMPT.format(content))
    
    
    #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
    #formatted_prompt = format_prompt(f'{content}', **prompt_kwargs['history'])

    stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
    resp = ""
    for response in stream:
        resp += response.token.text
        #yield resp

    if VERBOSE:
        print(LOG_RESPONSE.format(resp))
    return resp

def compress_data(c,purpose, task, history, result):
    seed=random.randint(1,1000000000)
    
    print (c)
    #tot=len(purpose)
    #print(tot)
    divr=int(c)/MAX_DATA
    divi=int(divr)+1 if divr != int(divr) else int(divr)
    chunk = int(int(c)/divr)
    print(f'chunk:: {chunk}')
    print(f'divr:: {divr}')
    print (f'divi:: {divi}')
    out = []
    #out=""
    s=0
    e=chunk
    print(f'e:: {e}')
    new_history=""
    task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
    for z in range(divi):
        print(f's:e :: {s}:{e}')
        
        hist = history[s:e]
        
        resp = run_gpt(
            COMPRESS_DATA_PROMPT,
            stop_tokens=["observation:", "task:", "action:", "thought:"],
            max_tokens=2048,
            seed=seed,
            purpose=purpose,
            task=task,
            knowledge=new_history,
            history=hist,
        ).strip('\n')
        new_history = resp
        print (resp)
        out+=resp
        e=e+chunk
        s=s+chunk
    '''
    resp = run_gpt(
        COMPRESS_DATA_PROMPT,
        stop_tokens=["observation:", "task:", "action:", "thought:"],
        max_tokens=2048,
        seed=seed,
        purpose=purpose,
        task=task,
        knowledge=new_history,
        history=result,
    )
    '''
    print ("final" + resp)
    history = "result: {}\n".format(resp)
    return history




def compress_history(purpose, task, history):
    resp = run_gpt(
        COMPRESS_HISTORY_PROMPT,
        stop_tokens=["observation:", "task:", "action:", "thought:"],
        max_tokens=1024,
        seed=random.randint(1,1000000000),
        purpose=purpose,
        task=task,
        history=history,
    )
    history = "observation: {}\n".format(resp)
    return history


def call_main(purpose, task, history, action_input, result):
    resp = run_gpt(
        FINDER,
        stop_tokens=["observation:", "task:"],
        max_tokens=2048,
        seed=random.randint(1,1000000000),
        purpose=purpose,
        task=task,
        history=history,
    )
    lines = resp.strip().strip("\n").split("\n")
    #history=""
    for line in lines:
        if line == "":
            continue
        if line.startswith("thought: "):
            history += "{}\n".format(line)
            
        if line.startswith("action: "):
            action_name, action_input = parse_action(line)
            print(f'ACTION::{action_name} -- INPUT :: {action_input}')
            #history += "{}\n".format(line)
            return action_name, action_input, history, task, result
        else:
            pass
            #history += "{}\n".format(line)
            #assert False, "unknown action: {}".format(line)
            #return "UPDATE-TASK", None, history, task
    if "VERBOSE":
        print(history)
    return "MAIN", None, history, task, result


def call_set_task(purpose, task, history, action_input, result):
    task = run_gpt(
        TASK_PROMPT,
        stop_tokens=[],
        max_tokens=1024,
        seed=random.randint(1,1000000000),
        purpose=purpose,
        task=task,
        history=history,
    ).strip("\n")
    history += "observation: task has been updated to: {}\n".format(task)
    return "MAIN", None, history, task, result



###########################################################
def search_all(url):
    source=""
    return source



def find_all(purpose,task,history, url, result):
    return_list=[]
    print (url)
    print (f"trying URL:: {url}")        
    try:
        if url != "" and url != None:    
            out = []
            source = requests.get(url)
            if source.status_code ==200:
                soup = bs4.BeautifulSoup(source.content,'lxml')
               
                rawp=(f'RAW TEXT RETURNED: {soup.text}')
                cnt=0
                cnt+=len(rawp)
                out.append(rawp)
                out.append("HTML fragments: ")
                q=("a","p","span","content","article")
                for p in soup.find_all("a"):
                    out.append([{"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string}])
                c=0
                out = str(out)
                rl = len(out)
                print(f'rl:: {rl}')
                for i in str(out):
                    if i == " " or i=="," or i=="\n" or i=="/" or i=="." or i=="<":
                        c +=1
                print (f'c:: {c}')
                if c > MAX_HISTORY:
                    print("compressing...")
                    rawp = compress_data(c,purpose,task,out,result)  
                    result += rawp
                else:
                    rawp = out 
                    
                #print (rawp)
                #print (f'out:: {out}')
                history += "observation: the search results are:\n {}\n".format(rawp)
                task = "compile report or complete?"
                return "MAIN", None, history, task, result
            else:
                history += f"observation: That URL string returned an error: {source.status_code}, I should try a different URL string\n"
                #result="Still Working..."
                return "MAIN", None, history, task, result
            
        else: 
            history += "observation: An Error occured\nI need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n"
            return "MAIN", None, history, task, result
    except Exception as e:
        print (e)
        history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n"
        return "MAIN", None, history, task, result

        #else:
    #    history = "observation: The search query I used did not return a valid response"
        
    return "MAIN", None, history, task, result



#################################

NAME_TO_FUNC = {
    "MAIN": call_main,
    "UPDATE-TASK": call_set_task,
    "SEARCH_ENGINE": find_all,
    "SCRAPE_WEBSITE": find_all,
}


def run_action(purpose, task, history, action_name, action_input,result):
    if "COMPLETE" in action_name:
        print("Complete - Exiting")
        #exit(0) 
        return "COMPLETE", None, history, task, result

    # compress the history when it is long
    if len(history.split("\n")) > MAX_HISTORY:
        if VERBOSE:
            print("COMPRESSING HISTORY")
        history = compress_history(purpose, task, history)
    if action_name in NAME_TO_FUNC:
        
        assert action_name in NAME_TO_FUNC

        print(f"RUN: {action_name}  ACTION_INPUT: {action_input}")
        return NAME_TO_FUNC[action_name](purpose, task, history, action_input, result)
    else:
        history += "observation: The TOOL I tried to use returned an error, I need to select a tool from: (UPDATE-TASK, SEARCH_ENGINE, SCRAPE_WEBSITE, COMPLETE)\n"

        return "MAIN", None, history, task, result

def run(purpose,history):
    yield "Searching..."
    task=None
    result=""
    #history = ""
    if not history:
        history = ""
    else:
        history=str(history)
    action_name = "MAIN"
    action_input = None
    while True:
        print("")
        print("")
        print("---")
        #print("purpose:", purpose)
        print("task:", task)
        print("---")
        #print(history)
        print("---")

        action_name, action_input, history, task, result = run_action(
            purpose,
            task,
            history,
            action_name,
            action_input,
            result
        )
            
        if not result:
            yield "More Searching..."
        else:
            yield result
        if action_name == "COMPLETE":
            break
    return result




examples =[
    "What is the current weather in Florida?",
    "Find breaking news about Texas",
    "Find the best deals on flippers for scuba diving",
    "Teach me to fly a helicopter"
]


gr.ChatInterface(
    fn=run,
    chatbot=gr.Chatbot(show_label=False, show_share_button=True, show_copy_button=True, likeable=True, layout="panel", height="800px"),
    title="Mixtral 46.7B Powered <br> Search",
    examples=examples,
    concurrency_limit=20,
).launch()
'''
with gr.Blocks() as app:
    with gr.Row():
        with gr.Column(scale=1):
            inp = gr.Textbox()
        with gr.Column(scale=2):
            q = gr.Textbox(value="p")
        with gr.Column(scale=2):
            num = gr.Textbox()
    with gr.Row():
        all_btn = gr.Button("Load")
        find_btn = gr.Button("Find")
    with gr.Row():
        rawp = gr.JSON()
        outp = gr.JSON()
        outl = gr.Textbox()
    
    all_btn.click(find_all,[inp,q,num],[rawp])
    find_btn.click(find_it,[inp,q,num],[outp,outl])
    
app.launch()
'''