RSS-AGGREGATOR

Build error

App Files Files Community

Omnibus commited on Jan 25, 2024

Commit

51bd6f0

verified ·

1 Parent(s): 7d5895e

Create app.py

Browse files

Files changed (1) hide show

app.py +238 -0

app.py ADDED Viewed

	@@ -0,0 +1,238 @@

+import gradio as gr
+import requests
+import bs4
+import lxml
+import os
+from huggingface_hub import InferenceClient,HfApi
+import random
+import json
+import datetime
+import xmltodict
+"""
+from prompts import (
+    COMPRESS_HISTORY_PROMPT,
+    COMPRESS_DATA_PROMPT,
+    COMPRESS_DATA_PROMPT_SMALL,
+    PREFIX,
+    TASK_PROMPT,
+)
+api=HfApi()
+client = InferenceClient(
+    "mistralai/Mixtral-8x7B-Instruct-v0.1"
+)
+def parse_action(string: str):
+    print("PARSING:")
+    print(string)
+    assert string.startswith("action:")
+    idx = string.find("action_input=")
+    print(idx)
+    if idx == -1:
+        print ("idx == -1")
+        print (string[8:])
+        return string[8:], None
+    print ("last return:")
+    print (string[8 : idx - 1])
+    print (string[idx + 13 :].strip("'").strip('"'))
+    return string[8 : idx - 1], string[idx + 13 :].strip("'").strip('"')
+MAX_HISTORY = 100
+MAX_DATA = 20000
+def format_prompt(message, history):
+  prompt = "<s>"
+  for user_prompt, bot_response in history:
+    prompt += f"[INST] {user_prompt} [/INST]"
+    prompt += f" {bot_response}</s> "
+  prompt += f"[INST] {message} [/INST]"
+  return prompt
+def run_gpt(
+    prompt_template,
+    stop_tokens,
+    max_tokens,
+    seed,
+    purpose,
+    **prompt_kwargs,
+):
+    timestamp=datetime.datetime.now()
+    print(seed)
+    generate_kwargs = dict(
+        temperature=0.9,
+        max_new_tokens=max_tokens,
+        top_p=0.95,
+        repetition_penalty=1.0,
+        do_sample=True,
+        seed=seed,
+    )
+    content = PREFIX.format(
+        timestamp=timestamp,
+        purpose=purpose,
+    ) + prompt_template.format(**prompt_kwargs)
+    if VERBOSE:
+        print(LOG_PROMPT.format(content))
+    #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
+    #formatted_prompt = format_prompt(f'{content}', **prompt_kwargs['history'])
+    stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    resp = ""
+    for response in stream:
+        resp += response.token.text
+        #yield resp
+    if VERBOSE:
+        print(LOG_RESPONSE.format(resp))
+    return resp
+def compress_data(c,purpose, task, history, result):
+    seed=random.randint(1,1000000000)
+    print (c)
+    divr=int(c)/MAX_DATA
+    divi=int(divr)+1 if divr != int(divr) else int(divr)
+    chunk = int(int(c)/divr)
+    print(f'chunk:: {chunk}')
+    print(f'divr:: {divr}')
+    print (f'divi:: {divi}')
+    out = []
+    #out=""
+    s=0
+    e=chunk
+    print(f'e:: {e}')
+    new_history=""
+    task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
+    for z in range(divi):
+        print(f's:e :: {s}:{e}')
+        hist = history[s:e]
+        resp = run_gpt(
+            COMPRESS_DATA_PROMPT,
+            stop_tokens=["observation:", "task:", "action:", "thought:"],
+            max_tokens=2048,
+            seed=seed,
+            purpose=purpose,
+            task=task,
+            knowledge=new_history,
+            history=hist,
+        )
+        new_history = resp
+        print (resp)
+        out+=resp
+        e=e+chunk
+        s=s+chunk
+    '''
+    resp = run_gpt(
+        COMPRESS_DATA_PROMPT,
+        stop_tokens=["observation:", "task:", "action:", "thought:"],
+        max_tokens=2048,
+        seed=seed,
+        purpose=purpose,
+        task=task,
+        knowledge=new_history,
+        history=result,
+    )
+    '''
+    print ("final" + resp)
+    history = "result: {}\n".format(resp)
+    return history
+def find_all(purpose,task,history, rss_url, result):
+    return_list=[]
+    #if action_input in query.tasks:
+    print (f"trying URL:: {rss_url}")
+    lod=""
+    try:
+        if rss_url != "" and rss_url != None:
+            #rawp = []
+            out = []
+            r = requests.get(f'{rss_url}')
+            if ".json" in rss_url:
+                lod = json.loads(r.text)
+            if ".xml" in rss_url:
+                lod = xmltodict.parse(r.content)
+            if ".rss" in rss_url:
+                lod = xmltodict.parse(r.content)
+            else:
+                try:
+                    lod = xmltodict.parse(r.content)
+                except Exception as e:
+                    history+=f"observation: could not complete RSS Search due to this error:\n{e}"
+                    return "MAIN", None, history, task, result
+            rawp = lod
+            print(f'RAWP::\n{rawp}')
+            cnt=0
+            cnt+=len(rawp)
+            out.append(rawp)
+            out = str(out)
+            rl = len(out)
+            print(f'rl:: {rl}')
+            c=0
+            for i in str(out):
+                c +=1
+            print (f'c:: {c}')
+            if c > MAX_HISTORY:
+                print("compressing...")
+                rawp = compress_data(c,purpose,task,out,result)
+            else:
+                rawp = out
+            result += rawp
+            print (rawp)
+            print (f'out:: {out}')
+            history = "observation: the search results are:\n {}\n".format(rawp)
+            task = "compile report and return action: COMPLETE"
+            return "MAIN", None, history, task, result
+        else:
+            history += "observation: An Error occured\nI need to trigger a search using the following syntax:\naction: READ-RSS action_input=URL\n"
+            return "UPDATE-TASK", None, history, task, result
+    except Exception as e:
+        print (e)
+        history += "observation: I need to trigger a search using the following syntax:\naction: READ-RSS action_input=URL\n"
+        return "UPDATE-TASK", None, history, task, result
+    return "MAIN", None, history, task, result
+"""
+def find_rss():
+    with open ('feeds.json','r') as j:
+        cont = json.loads(j.read())
+        print(cont)
+        for ea in cont:
+            print (ea)
+        '''
+        r = requests.get(f'{rss_url}')
+        if ".json" in rss_url:
+            lod = json.loads(r.text)
+        if ".xml" in rss_url:
+            lod = xmltodict.parse(r.content)
+        if ".rss" in rss_url:
+            lod = xmltodict.parse(r.content)
+        else:
+            try:
+                lod = xmltodict.parse(r.content)
+            except Exception as e:
+                history+=f"observation: could not complete RSS Search due to this error:\n{e}"
+                return "MAIN", None, history, task, result
+        rawp = lod
+        '''
+with gr.Blocks() as app:
+    with gr.Row():
+        rss_search = gr.Textbox(label="search rss (xml,json)")
+        search_btn=gr.Button("find rss")
+    with gr.Row():
+        rss = gr.Textbox(label="rss")
+        btn = gr.Button("load rss")
+    r_btn=gr.Button("read")
+    with gr.Row():
+        out_json = gr.JSON()
+        fil = gr.Textbox()
+    r_btn.click(find_rss,None,None)
+    #r_btn.click(read_rss,None,[out_json,fil])
+    #search_btn.click(find_rss,rss_search,out_json)
+    #btn.click(get_rss,rss,out_json)
+app.launch()