Connecting to front
Browse files- app.py +121 -232
- climateqa/engine/chains/answer_rag.py +40 -85
- climateqa/engine/chains/{intent_routing.py → intent_categorization.py} +10 -10
- climateqa/engine/chains/prompts.py +2 -2
- climateqa/engine/chains/{query_transform.py → query_transformation.py} +0 -0
- climateqa/engine/chains/retriever.py +15 -13
- climateqa/engine/graph.py +111 -5
- climateqa/engine/reranker.py +1 -1
- front/__init__.py +0 -0
- front/callbacks.py +0 -0
- front/utils.py +142 -0
- requirements.txt +7 -4
- sandbox/20240310 - CQA - Semantic Routing 1.ipynb +0 -0
- style.css +102 -0
app.py
CHANGED
|
@@ -4,7 +4,7 @@ embeddings_function = get_embeddings_function()
|
|
| 4 |
from climateqa.papers.openalex import OpenAlex
|
| 5 |
from sentence_transformers import CrossEncoder
|
| 6 |
|
| 7 |
-
reranker = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
|
| 8 |
oa = OpenAlex()
|
| 9 |
|
| 10 |
import gradio as gr
|
|
@@ -29,16 +29,19 @@ from utils import create_user_id
|
|
| 29 |
|
| 30 |
# ClimateQ&A imports
|
| 31 |
from climateqa.engine.llm import get_llm
|
| 32 |
-
from climateqa.engine.chains.answer_rag import make_rag_chain
|
| 33 |
from climateqa.engine.vectorstore import get_pinecone_vectorstore
|
| 34 |
from climateqa.engine.retriever import ClimateQARetriever
|
|
|
|
| 35 |
from climateqa.engine.embeddings import get_embeddings_function
|
| 36 |
from climateqa.engine.chains.prompts import audience_prompts
|
| 37 |
from climateqa.sample_questions import QUESTIONS
|
| 38 |
from climateqa.constants import POSSIBLE_REPORTS
|
| 39 |
from climateqa.utils import get_image_from_azure_blob_storage
|
| 40 |
from climateqa.engine.keywords import make_keywords_chain
|
| 41 |
-
from climateqa.engine.chains.answer_rag import make_rag_papers_chain
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
# Load environment variables in local mode
|
| 44 |
try:
|
|
@@ -81,48 +84,21 @@ user_id = create_user_id()
|
|
| 81 |
|
| 82 |
|
| 83 |
|
| 84 |
-
def parse_output_llm_with_sources(output):
|
| 85 |
-
# Split the content into a list of text and "[Doc X]" references
|
| 86 |
-
content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)
|
| 87 |
-
parts = []
|
| 88 |
-
for part in content_parts:
|
| 89 |
-
if part.startswith("Doc"):
|
| 90 |
-
subparts = part.split(",")
|
| 91 |
-
subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts]
|
| 92 |
-
subparts = [f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" for subpart in subparts]
|
| 93 |
-
parts.append("".join(subparts))
|
| 94 |
-
else:
|
| 95 |
-
parts.append(part)
|
| 96 |
-
content_parts = "".join(parts)
|
| 97 |
-
return content_parts
|
| 98 |
-
|
| 99 |
-
|
| 100 |
# Create vectorstore and retriever
|
| 101 |
vectorstore = get_pinecone_vectorstore(embeddings_function)
|
| 102 |
llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
|
|
|
|
|
|
|
| 103 |
|
| 104 |
|
| 105 |
-
def make_pairs(lst):
|
| 106 |
-
"""from a list of even lenght, make tupple pairs"""
|
| 107 |
-
return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)]
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
def serialize_docs(docs):
|
| 111 |
-
new_docs = []
|
| 112 |
-
for doc in docs:
|
| 113 |
-
new_doc = {}
|
| 114 |
-
new_doc["page_content"] = doc.page_content
|
| 115 |
-
new_doc["metadata"] = doc.metadata
|
| 116 |
-
new_docs.append(new_doc)
|
| 117 |
-
return new_docs
|
| 118 |
-
|
| 119 |
|
| 120 |
|
| 121 |
async def chat(query,history,audience,sources,reports):
|
| 122 |
"""taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
|
| 123 |
(messages in gradio format, messages in langchain format, source documents)"""
|
| 124 |
|
| 125 |
-
|
|
|
|
| 126 |
|
| 127 |
if audience == "Children":
|
| 128 |
audience_prompt = audience_prompts["children"]
|
|
@@ -139,59 +115,39 @@ async def chat(query,history,audience,sources,reports):
|
|
| 139 |
|
| 140 |
if len(reports) == 0:
|
| 141 |
reports = []
|
| 142 |
-
|
| 143 |
-
retriever = ClimateQARetriever(vectorstore=vectorstore,sources = sources,min_size = 200,reports = reports,k_summary = 3,k_total = 15,threshold=0.5)
|
| 144 |
-
rag_chain = make_rag_chain(retriever,llm)
|
| 145 |
|
| 146 |
-
inputs = {"
|
| 147 |
-
result =
|
| 148 |
# result = rag_chain.stream(inputs)
|
| 149 |
|
| 150 |
-
path_reformulation = "/logs/reformulation/final_output"
|
| 151 |
-
path_keywords = "/logs/keywords/final_output"
|
| 152 |
-
path_retriever = "/logs/find_documents/final_output"
|
| 153 |
-
path_answer = "/logs/answer/streamed_output_str/-"
|
| 154 |
|
|
|
|
| 155 |
docs_html = ""
|
| 156 |
output_query = ""
|
| 157 |
output_language = ""
|
| 158 |
output_keywords = ""
|
| 159 |
gallery = []
|
|
|
|
| 160 |
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
|
|
|
| 165 |
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
output_language = op['value']["language"] # str
|
| 169 |
-
output_query = op["value"]["question"]
|
| 170 |
-
except Exception as e:
|
| 171 |
-
raise gr.Error(f"ClimateQ&A Error: {e} - The error has been noted, try another question and if the error remains, you can contact us :)")
|
| 172 |
-
|
| 173 |
-
if op["path"] == path_keywords:
|
| 174 |
-
try:
|
| 175 |
-
output_keywords = op['value']["keywords"] # str
|
| 176 |
-
output_keywords = " AND ".join(output_keywords)
|
| 177 |
-
except Exception as e:
|
| 178 |
-
pass
|
| 179 |
-
|
| 180 |
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
for i, d in enumerate(docs, 1):
|
| 186 |
-
docs_html.append(make_html_source(d, i))
|
| 187 |
-
docs_html = "".join(docs_html)
|
| 188 |
-
except TypeError:
|
| 189 |
-
print("No documents found")
|
| 190 |
-
print("op: ",op)
|
| 191 |
-
continue
|
| 192 |
|
| 193 |
-
|
| 194 |
-
new_token = op['value'] # str
|
| 195 |
# time.sleep(0.01)
|
| 196 |
previous_answer = history[-1][1]
|
| 197 |
previous_answer = previous_answer if previous_answer is not None else ""
|
|
@@ -199,10 +155,47 @@ async def chat(query,history,audience,sources,reports):
|
|
| 199 |
answer_yet = parse_output_llm_with_sources(answer_yet)
|
| 200 |
history[-1] = (query,answer_yet)
|
| 201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
|
| 203 |
|
| 204 |
-
else:
|
| 205 |
-
continue
|
| 206 |
|
| 207 |
history = [tuple(x) for x in history]
|
| 208 |
yield history,docs_html,output_query,output_language,gallery,output_query,output_keywords
|
|
@@ -276,68 +269,6 @@ async def chat(query,history,audience,sources,reports):
|
|
| 276 |
yield history,docs_html,output_query,output_language,gallery,output_query,output_keywords
|
| 277 |
|
| 278 |
|
| 279 |
-
def make_html_source(source,i):
|
| 280 |
-
meta = source.metadata
|
| 281 |
-
# content = source.page_content.split(":",1)[1].strip()
|
| 282 |
-
content = source.page_content.strip()
|
| 283 |
-
|
| 284 |
-
toc_levels = []
|
| 285 |
-
for j in range(2):
|
| 286 |
-
level = meta[f"toc_level{j}"]
|
| 287 |
-
if level != "N/A":
|
| 288 |
-
toc_levels.append(level)
|
| 289 |
-
else:
|
| 290 |
-
break
|
| 291 |
-
toc_levels = " > ".join(toc_levels)
|
| 292 |
-
|
| 293 |
-
if len(toc_levels) > 0:
|
| 294 |
-
name = f"<b>{toc_levels}</b><br/>{meta['name']}"
|
| 295 |
-
else:
|
| 296 |
-
name = meta['name']
|
| 297 |
-
|
| 298 |
-
if meta["chunk_type"] == "text":
|
| 299 |
-
|
| 300 |
-
card = f"""
|
| 301 |
-
<div class="card" id="doc{i}">
|
| 302 |
-
<div class="card-content">
|
| 303 |
-
<h2>Doc {i} - {meta['short_name']} - Page {int(meta['page_number'])}</h2>
|
| 304 |
-
<p>{content}</p>
|
| 305 |
-
</div>
|
| 306 |
-
<div class="card-footer">
|
| 307 |
-
<span>{name}</span>
|
| 308 |
-
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
|
| 309 |
-
<span role="img" aria-label="Open PDF">🔗</span>
|
| 310 |
-
</a>
|
| 311 |
-
</div>
|
| 312 |
-
</div>
|
| 313 |
-
"""
|
| 314 |
-
|
| 315 |
-
else:
|
| 316 |
-
|
| 317 |
-
if meta["figure_code"] != "N/A":
|
| 318 |
-
title = f"{meta['figure_code']} - {meta['short_name']}"
|
| 319 |
-
else:
|
| 320 |
-
title = f"{meta['short_name']}"
|
| 321 |
-
|
| 322 |
-
card = f"""
|
| 323 |
-
<div class="card card-image">
|
| 324 |
-
<div class="card-content">
|
| 325 |
-
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
| 326 |
-
<p>{content}</p>
|
| 327 |
-
<p class='ai-generated'>AI-generated description</p>
|
| 328 |
-
</div>
|
| 329 |
-
<div class="card-footer">
|
| 330 |
-
<span>{name}</span>
|
| 331 |
-
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
|
| 332 |
-
<span role="img" aria-label="Open PDF">🔗</span>
|
| 333 |
-
</a>
|
| 334 |
-
</div>
|
| 335 |
-
</div>
|
| 336 |
-
"""
|
| 337 |
-
|
| 338 |
-
return card
|
| 339 |
-
|
| 340 |
-
|
| 341 |
|
| 342 |
# else:
|
| 343 |
# docs_string = "No relevant passages found in the climate science reports (IPCC and IPBES)"
|
|
@@ -390,54 +321,54 @@ papers_cols_widths = {
|
|
| 390 |
papers_cols = list(papers_cols_widths.keys())
|
| 391 |
papers_cols_widths = list(papers_cols_widths.values())
|
| 392 |
|
| 393 |
-
async def find_papers(query, keywords,after):
|
| 394 |
|
| 395 |
-
|
| 396 |
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
|
| 411 |
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
|
| 418 |
|
| 419 |
-
|
| 420 |
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
|
| 425 |
-
|
| 426 |
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
|
| 431 |
-
|
| 432 |
|
| 433 |
-
|
| 434 |
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
|
| 442 |
|
| 443 |
|
|
@@ -560,9 +491,6 @@ with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main
|
|
| 560 |
|
| 561 |
|
| 562 |
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
#---------------------------------------------------------------------------------------
|
| 567 |
# OTHER TABS
|
| 568 |
#---------------------------------------------------------------------------------------
|
|
@@ -571,25 +499,25 @@ with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main
|
|
| 571 |
with gr.Tab("Figures",elem_id = "tab-images",elem_classes = "max-height other-tabs"):
|
| 572 |
gallery_component = gr.Gallery()
|
| 573 |
|
| 574 |
-
with gr.Tab("Papers (beta)",elem_id = "tab-papers",elem_classes = "max-height other-tabs"):
|
| 575 |
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
|
| 583 |
-
|
| 584 |
|
| 585 |
-
|
| 586 |
-
|
| 587 |
|
| 588 |
-
|
| 589 |
-
|
| 590 |
|
| 591 |
-
|
| 592 |
-
|
| 593 |
|
| 594 |
|
| 595 |
|
|
@@ -609,13 +537,13 @@ with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main
|
|
| 609 |
|
| 610 |
(textbox
|
| 611 |
.submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
|
| 612 |
-
.then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component
|
| 613 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
|
| 614 |
)
|
| 615 |
|
| 616 |
(examples_hidden
|
| 617 |
.change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
|
| 618 |
-
.then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component
|
| 619 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
|
| 620 |
)
|
| 621 |
|
|
@@ -630,47 +558,8 @@ with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main
|
|
| 630 |
|
| 631 |
dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
|
| 632 |
|
| 633 |
-
query_papers.submit(generate_keywords,[query_papers], [keywords_papers])
|
| 634 |
-
search_papers.click(find_papers,[query_papers,keywords_papers,after], [papers_dataframe,citations_network,papers_summary])
|
| 635 |
-
|
| 636 |
-
# # textbox.submit(predict_climateqa,[textbox,bot],[None,bot,sources_textbox])
|
| 637 |
-
# (textbox
|
| 638 |
-
# .submit(answer_user, [textbox,examples_hidden, bot], [textbox, bot],queue = False)
|
| 639 |
-
# .success(change_tab,None,tabs)
|
| 640 |
-
# .success(fetch_sources,[textbox,dropdown_sources], [textbox,sources_textbox,docs_textbox,output_query,output_language])
|
| 641 |
-
# .success(answer_bot, [textbox,bot,docs_textbox,output_query,output_language,dropdown_audience], [textbox,bot],queue = True)
|
| 642 |
-
# .success(lambda x : textbox,[textbox],[textbox])
|
| 643 |
-
# )
|
| 644 |
-
|
| 645 |
-
# (examples_hidden
|
| 646 |
-
# .change(answer_user_example, [textbox,examples_hidden, bot], [textbox, bot],queue = False)
|
| 647 |
-
# .success(change_tab,None,tabs)
|
| 648 |
-
# .success(fetch_sources,[textbox,dropdown_sources], [textbox,sources_textbox,docs_textbox,output_query,output_language])
|
| 649 |
-
# .success(answer_bot, [textbox,bot,docs_textbox,output_query,output_language,dropdown_audience], [textbox,bot],queue=True)
|
| 650 |
-
# .success(lambda x : textbox,[textbox],[textbox])
|
| 651 |
-
# )
|
| 652 |
-
# submit_button.click(answer_user, [textbox, bot], [textbox, bot], queue=True).then(
|
| 653 |
-
# answer_bot, [textbox,bot,dropdown_audience,dropdown_sources], [textbox,bot,sources_textbox]
|
| 654 |
-
# )
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
# with Modal(visible=True) as first_modal:
|
| 658 |
-
# gr.Markdown("# Welcome to ClimateQ&A !")
|
| 659 |
-
|
| 660 |
-
# gr.Markdown("### Examples")
|
| 661 |
-
|
| 662 |
-
# examples = gr.Examples(
|
| 663 |
-
# ["Yo ça roule","ça boume"],
|
| 664 |
-
# [examples_hidden],
|
| 665 |
-
# examples_per_page=8,
|
| 666 |
-
# run_on_click=False,
|
| 667 |
-
# elem_id="examples",
|
| 668 |
-
# api_name="examples",
|
| 669 |
-
# )
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
# submit.click(lambda: Modal(visible=True), None, config_modal)
|
| 673 |
-
|
| 674 |
|
| 675 |
demo.queue()
|
| 676 |
|
|
|
|
| 4 |
from climateqa.papers.openalex import OpenAlex
|
| 5 |
from sentence_transformers import CrossEncoder
|
| 6 |
|
| 7 |
+
# reranker = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
|
| 8 |
oa = OpenAlex()
|
| 9 |
|
| 10 |
import gradio as gr
|
|
|
|
| 29 |
|
| 30 |
# ClimateQ&A imports
|
| 31 |
from climateqa.engine.llm import get_llm
|
|
|
|
| 32 |
from climateqa.engine.vectorstore import get_pinecone_vectorstore
|
| 33 |
from climateqa.engine.retriever import ClimateQARetriever
|
| 34 |
+
from climateqa.engine.reranker import get_reranker
|
| 35 |
from climateqa.engine.embeddings import get_embeddings_function
|
| 36 |
from climateqa.engine.chains.prompts import audience_prompts
|
| 37 |
from climateqa.sample_questions import QUESTIONS
|
| 38 |
from climateqa.constants import POSSIBLE_REPORTS
|
| 39 |
from climateqa.utils import get_image_from_azure_blob_storage
|
| 40 |
from climateqa.engine.keywords import make_keywords_chain
|
| 41 |
+
# from climateqa.engine.chains.answer_rag import make_rag_papers_chain
|
| 42 |
+
from climateqa.engine.graph import make_graph_agent,display_graph
|
| 43 |
+
|
| 44 |
+
from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox
|
| 45 |
|
| 46 |
# Load environment variables in local mode
|
| 47 |
try:
|
|
|
|
| 84 |
|
| 85 |
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
# Create vectorstore and retriever
|
| 88 |
vectorstore = get_pinecone_vectorstore(embeddings_function)
|
| 89 |
llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
|
| 90 |
+
reranker = get_reranker("nano")
|
| 91 |
+
agent = make_graph_agent(llm,vectorstore,reranker)
|
| 92 |
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
|
| 96 |
async def chat(query,history,audience,sources,reports):
|
| 97 |
"""taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
|
| 98 |
(messages in gradio format, messages in langchain format, source documents)"""
|
| 99 |
|
| 100 |
+
date_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 101 |
+
print(f">> NEW QUESTION ({date_now}) : {query}")
|
| 102 |
|
| 103 |
if audience == "Children":
|
| 104 |
audience_prompt = audience_prompts["children"]
|
|
|
|
| 115 |
|
| 116 |
if len(reports) == 0:
|
| 117 |
reports = []
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
+
inputs = {"user_input": query,"audience": audience_prompt,"sources":sources}
|
| 120 |
+
result = agent.astream_events(inputs,version = "v1") #{"callbacks":[MyCustomAsyncHandler()]})
|
| 121 |
# result = rag_chain.stream(inputs)
|
| 122 |
|
| 123 |
+
# path_reformulation = "/logs/reformulation/final_output"
|
| 124 |
+
# path_keywords = "/logs/keywords/final_output"
|
| 125 |
+
# path_retriever = "/logs/find_documents/final_output"
|
| 126 |
+
# path_answer = "/logs/answer/streamed_output_str/-"
|
| 127 |
|
| 128 |
+
docs = []
|
| 129 |
docs_html = ""
|
| 130 |
output_query = ""
|
| 131 |
output_language = ""
|
| 132 |
output_keywords = ""
|
| 133 |
gallery = []
|
| 134 |
+
start_streaming = False
|
| 135 |
|
| 136 |
+
steps_display = {
|
| 137 |
+
"categorize_intent":("🔄️ Analyzing user message",True),
|
| 138 |
+
"transform_query":("🔄️ Thinking step by step to answer the question",True),
|
| 139 |
+
"retrieve_documents":("🔄️ Searching in the knowledge base",False),
|
| 140 |
+
}
|
| 141 |
|
| 142 |
+
try:
|
| 143 |
+
async for event in result:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
+
if event["event"] == "on_chat_model_stream":
|
| 146 |
+
if start_streaming == False:
|
| 147 |
+
start_streaming = True
|
| 148 |
+
history[-1] = (query,"")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
+
new_token = event["data"]["chunk"].content
|
|
|
|
| 151 |
# time.sleep(0.01)
|
| 152 |
previous_answer = history[-1][1]
|
| 153 |
previous_answer = previous_answer if previous_answer is not None else ""
|
|
|
|
| 155 |
answer_yet = parse_output_llm_with_sources(answer_yet)
|
| 156 |
history[-1] = (query,answer_yet)
|
| 157 |
|
| 158 |
+
|
| 159 |
+
elif event["name"] == "retrieve_documents" and event["event"] == "on_chain_end":
|
| 160 |
+
try:
|
| 161 |
+
docs = event["data"]["output"]["documents"]
|
| 162 |
+
docs_html = []
|
| 163 |
+
for i, d in enumerate(docs, 1):
|
| 164 |
+
docs_html.append(make_html_source(d, i))
|
| 165 |
+
docs_html = "".join(docs_html)
|
| 166 |
+
except Exception as e:
|
| 167 |
+
print(f"Error getting documents: {e}")
|
| 168 |
+
print(event)
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
for event_name,(event_description,display_output) in steps_display.items():
|
| 172 |
+
if event["name"] == event_name:
|
| 173 |
+
if event["event"] == "on_chain_start":
|
| 174 |
+
# answer_yet = f"<p><span class='loader'></span>{event_description}</p>"
|
| 175 |
+
# answer_yet = make_toolbox(event_description, "", checked = False)
|
| 176 |
+
answer_yet = event_description
|
| 177 |
+
history[-1] = (query,answer_yet)
|
| 178 |
+
# elif event["event"] == "on_chain_end":
|
| 179 |
+
# answer_yet = ""
|
| 180 |
+
# history[-1] = (query,answer_yet)
|
| 181 |
+
# if display_output:
|
| 182 |
+
# print(event["data"]["output"])
|
| 183 |
+
|
| 184 |
+
# if op['path'] == path_reformulation: # reforulated question
|
| 185 |
+
# try:
|
| 186 |
+
# output_language = op['value']["language"] # str
|
| 187 |
+
# output_query = op["value"]["question"]
|
| 188 |
+
# except Exception as e:
|
| 189 |
+
# raise gr.Error(f"ClimateQ&A Error: {e} - The error has been noted, try another question and if the error remains, you can contact us :)")
|
| 190 |
+
|
| 191 |
+
# if op["path"] == path_keywords:
|
| 192 |
+
# try:
|
| 193 |
+
# output_keywords = op['value']["keywords"] # str
|
| 194 |
+
# output_keywords = " AND ".join(output_keywords)
|
| 195 |
+
# except Exception as e:
|
| 196 |
+
# pass
|
| 197 |
|
| 198 |
|
|
|
|
|
|
|
| 199 |
|
| 200 |
history = [tuple(x) for x in history]
|
| 201 |
yield history,docs_html,output_query,output_language,gallery,output_query,output_keywords
|
|
|
|
| 269 |
yield history,docs_html,output_query,output_language,gallery,output_query,output_keywords
|
| 270 |
|
| 271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
# else:
|
| 274 |
# docs_string = "No relevant passages found in the climate science reports (IPCC and IPBES)"
|
|
|
|
| 321 |
papers_cols = list(papers_cols_widths.keys())
|
| 322 |
papers_cols_widths = list(papers_cols_widths.values())
|
| 323 |
|
| 324 |
+
# async def find_papers(query, keywords,after):
|
| 325 |
|
| 326 |
+
# summary = ""
|
| 327 |
|
| 328 |
+
# df_works = oa.search(keywords,after = after)
|
| 329 |
+
# df_works = df_works.dropna(subset=["abstract"])
|
| 330 |
+
# df_works = oa.rerank(query,df_works,reranker)
|
| 331 |
+
# df_works = df_works.sort_values("rerank_score",ascending=False)
|
| 332 |
+
# G = oa.make_network(df_works)
|
| 333 |
|
| 334 |
+
# height = "750px"
|
| 335 |
+
# network = oa.show_network(G,color_by = "rerank_score",notebook=False,height = height)
|
| 336 |
+
# network_html = network.generate_html()
|
| 337 |
|
| 338 |
+
# network_html = network_html.replace("'", "\"")
|
| 339 |
+
# css_to_inject = "<style>#mynetwork { border: none !important; } .card { border: none !important; }</style>"
|
| 340 |
+
# network_html = network_html + css_to_inject
|
| 341 |
|
| 342 |
|
| 343 |
+
# network_html = f"""<iframe style="width: 100%; height: {height};margin:0 auto" name="result" allow="midi; geolocation; microphone; camera;
|
| 344 |
+
# display-capture; encrypted-media;" sandbox="allow-modals allow-forms
|
| 345 |
+
# allow-scripts allow-same-origin allow-popups
|
| 346 |
+
# allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
|
| 347 |
+
# allowpaymentrequest="" frameborder="0" srcdoc='{network_html}'></iframe>"""
|
| 348 |
|
| 349 |
|
| 350 |
+
# docs = df_works["content"].head(15).tolist()
|
| 351 |
|
| 352 |
+
# df_works = df_works.reset_index(drop = True).reset_index().rename(columns = {"index":"doc"})
|
| 353 |
+
# df_works["doc"] = df_works["doc"] + 1
|
| 354 |
+
# df_works = df_works[papers_cols]
|
| 355 |
|
| 356 |
+
# yield df_works,network_html,summary
|
| 357 |
|
| 358 |
+
# chain = make_rag_papers_chain(llm)
|
| 359 |
+
# result = chain.astream_log({"question": query,"docs": docs,"language":"English"})
|
| 360 |
+
# path_answer = "/logs/StrOutputParser/streamed_output/-"
|
| 361 |
|
| 362 |
+
# async for op in result:
|
| 363 |
|
| 364 |
+
# op = op.ops[0]
|
| 365 |
|
| 366 |
+
# if op['path'] == path_answer: # reforulated question
|
| 367 |
+
# new_token = op['value'] # str
|
| 368 |
+
# summary += new_token
|
| 369 |
+
# else:
|
| 370 |
+
# continue
|
| 371 |
+
# yield df_works,network_html,summary
|
| 372 |
|
| 373 |
|
| 374 |
|
|
|
|
| 491 |
|
| 492 |
|
| 493 |
|
|
|
|
|
|
|
|
|
|
| 494 |
#---------------------------------------------------------------------------------------
|
| 495 |
# OTHER TABS
|
| 496 |
#---------------------------------------------------------------------------------------
|
|
|
|
| 499 |
with gr.Tab("Figures",elem_id = "tab-images",elem_classes = "max-height other-tabs"):
|
| 500 |
gallery_component = gr.Gallery()
|
| 501 |
|
| 502 |
+
# with gr.Tab("Papers (beta)",elem_id = "tab-papers",elem_classes = "max-height other-tabs"):
|
| 503 |
|
| 504 |
+
# with gr.Row():
|
| 505 |
+
# with gr.Column(scale=1):
|
| 506 |
+
# query_papers = gr.Textbox(placeholder="Question",show_label=False,lines = 1,interactive = True,elem_id="query-papers")
|
| 507 |
+
# keywords_papers = gr.Textbox(placeholder="Keywords",show_label=False,lines = 1,interactive = True,elem_id="keywords-papers")
|
| 508 |
+
# after = gr.Slider(minimum=1950,maximum=2023,step=1,value=1960,label="Publication date",show_label=True,interactive=True,elem_id="date-papers")
|
| 509 |
+
# search_papers = gr.Button("Search",elem_id="search-papers",interactive=True)
|
| 510 |
|
| 511 |
+
# with gr.Column(scale=7):
|
| 512 |
|
| 513 |
+
# with gr.Tab("Summary",elem_id="papers-summary-tab"):
|
| 514 |
+
# papers_summary = gr.Markdown(visible=True,elem_id="papers-summary")
|
| 515 |
|
| 516 |
+
# with gr.Tab("Relevant papers",elem_id="papers-results-tab"):
|
| 517 |
+
# papers_dataframe = gr.Dataframe(visible=True,elem_id="papers-table",headers = papers_cols)
|
| 518 |
|
| 519 |
+
# with gr.Tab("Citations network",elem_id="papers-network-tab"):
|
| 520 |
+
# citations_network = gr.HTML(visible=True,elem_id="papers-citations-network")
|
| 521 |
|
| 522 |
|
| 523 |
|
|
|
|
| 537 |
|
| 538 |
(textbox
|
| 539 |
.submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
|
| 540 |
+
.then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component],concurrency_limit = 8,api_name = "chat_textbox")
|
| 541 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
|
| 542 |
)
|
| 543 |
|
| 544 |
(examples_hidden
|
| 545 |
.change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
|
| 546 |
+
.then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component],concurrency_limit = 8,api_name = "chat_examples")
|
| 547 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
|
| 548 |
)
|
| 549 |
|
|
|
|
| 558 |
|
| 559 |
dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
|
| 560 |
|
| 561 |
+
# query_papers.submit(generate_keywords,[query_papers], [keywords_papers])
|
| 562 |
+
# search_papers.click(find_papers,[query_papers,keywords_papers,after], [papers_dataframe,citations_network,papers_summary])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
|
| 564 |
demo.queue()
|
| 565 |
|
climateqa/engine/chains/answer_rag.py
CHANGED
|
@@ -2,15 +2,11 @@ from operator import itemgetter
|
|
| 2 |
|
| 3 |
from langchain_core.prompts import ChatPromptTemplate
|
| 4 |
from langchain_core.output_parsers import StrOutputParser
|
| 5 |
-
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableBranch
|
| 6 |
from langchain_core.prompts.prompt import PromptTemplate
|
| 7 |
from langchain_core.prompts.base import format_document
|
| 8 |
|
| 9 |
-
from climateqa.engine.chains.
|
| 10 |
-
from climateqa.engine.prompts import
|
| 11 |
-
from climateqa.engine.prompts import papers_prompt_template
|
| 12 |
-
from climateqa.engine.utils import pass_values, flatten_dict,prepare_chain,rename_chain
|
| 13 |
-
from climateqa.engine.keywords import make_keywords_chain
|
| 14 |
|
| 15 |
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
|
| 16 |
|
|
@@ -40,105 +36,64 @@ def get_text_docs(x):
|
|
| 40 |
def get_image_docs(x):
|
| 41 |
return [doc for doc in x if doc.metadata["chunk_type"] == "image"]
|
| 42 |
|
| 43 |
-
|
| 44 |
-
def make_rag_chain(retriever,llm):
|
| 45 |
-
|
| 46 |
-
# Construct the prompt
|
| 47 |
prompt = ChatPromptTemplate.from_template(answer_prompt_template)
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
keywords = make_keywords_chain(llm)
|
| 56 |
-
keywords = {"keywords":itemgetter("question") | keywords}
|
| 57 |
-
keywords = prepare_chain(keywords,"keywords")
|
| 58 |
-
|
| 59 |
-
# ------- CHAIN 1
|
| 60 |
-
# Retrieved documents
|
| 61 |
-
find_documents = {"docs": itemgetter("question") | retriever} | RunnablePassthrough()
|
| 62 |
-
find_documents = prepare_chain(find_documents,"find_documents")
|
| 63 |
-
|
| 64 |
-
# ------- CHAIN 2
|
| 65 |
-
# Construct inputs for the llm
|
| 66 |
-
input_documents = {
|
| 67 |
-
"context":lambda x : _combine_documents(x["docs"]),
|
| 68 |
-
**pass_values(["question","audience","language","keywords"])
|
| 69 |
-
}
|
| 70 |
-
|
| 71 |
-
# ------- CHAIN 3
|
| 72 |
-
# Bot answer
|
| 73 |
-
llm_final = rename_chain(llm,"answer")
|
| 74 |
-
|
| 75 |
-
answer_with_docs = {
|
| 76 |
-
"answer": input_documents | prompt | llm_final | StrOutputParser(),
|
| 77 |
-
**pass_values(["question","audience","language","query","docs","keywords"]),
|
| 78 |
-
}
|
| 79 |
-
|
| 80 |
-
answer_without_docs = {
|
| 81 |
-
"answer": prompt_without_docs | llm_final | StrOutputParser(),
|
| 82 |
-
**pass_values(["question","audience","language","query","docs","keywords"]),
|
| 83 |
-
}
|
| 84 |
-
|
| 85 |
-
# def has_images(x):
|
| 86 |
-
# image_docs = [doc for doc in x["docs"] if doc.metadata["chunk_type"]=="image"]
|
| 87 |
-
# return len(image_docs) > 0
|
| 88 |
-
|
| 89 |
-
def has_docs(x):
|
| 90 |
-
return len(x["docs"]) > 0
|
| 91 |
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
|
| 97 |
|
| 98 |
-
|
| 99 |
-
# Build the final chain
|
| 100 |
-
rag_chain = reformulation | keywords | find_documents | answer
|
| 101 |
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
| 103 |
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
-
|
| 106 |
|
| 107 |
-
prompt = ChatPromptTemplate.from_template(papers_prompt_template)
|
| 108 |
|
| 109 |
-
input_documents = {
|
| 110 |
-
"context":lambda x : _combine_documents(x["docs"]),
|
| 111 |
-
**pass_values(["question","language"])
|
| 112 |
-
}
|
| 113 |
|
| 114 |
-
chain = input_documents | prompt | llm | StrOutputParser()
|
| 115 |
-
chain = rename_chain(chain,"answer")
|
| 116 |
|
| 117 |
-
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
|
|
|
|
|
|
| 120 |
|
|
|
|
| 121 |
|
| 122 |
|
| 123 |
|
| 124 |
-
def make_illustration_chain(llm):
|
| 125 |
|
| 126 |
-
prompt_with_images = ChatPromptTemplate.from_template(answer_prompt_images_template)
|
| 127 |
|
| 128 |
-
input_description_images = {
|
| 129 |
-
"images":lambda x : _combine_documents(get_image_docs(x["docs"])),
|
| 130 |
-
**pass_values(["question","audience","language","answer"]),
|
| 131 |
-
}
|
| 132 |
|
| 133 |
-
|
| 134 |
-
return illustration_chain
|
| 135 |
|
|
|
|
| 136 |
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
answer = "\n".join([x["question"] for x in state["questions"]])
|
| 142 |
-
return {"answer":answer}
|
| 143 |
-
|
| 144 |
-
return answer_rag
|
|
|
|
| 2 |
|
| 3 |
from langchain_core.prompts import ChatPromptTemplate
|
| 4 |
from langchain_core.output_parsers import StrOutputParser
|
|
|
|
| 5 |
from langchain_core.prompts.prompt import PromptTemplate
|
| 6 |
from langchain_core.prompts.base import format_document
|
| 7 |
|
| 8 |
+
from climateqa.engine.chains.prompts import answer_prompt_template,answer_prompt_without_docs_template,answer_prompt_images_template
|
| 9 |
+
from climateqa.engine.chains.prompts import papers_prompt_template
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
|
| 12 |
|
|
|
|
| 36 |
def get_image_docs(x):
|
| 37 |
return [doc for doc in x if doc.metadata["chunk_type"] == "image"]
|
| 38 |
|
| 39 |
+
def make_rag_chain(llm):
|
|
|
|
|
|
|
|
|
|
| 40 |
prompt = ChatPromptTemplate.from_template(answer_prompt_template)
|
| 41 |
+
chain = ({
|
| 42 |
+
"context":lambda x : _combine_documents(x["documents"]),
|
| 43 |
+
"query":itemgetter("query"),
|
| 44 |
+
"language":itemgetter("language"),
|
| 45 |
+
"audience":itemgetter("audience"),
|
| 46 |
+
} | prompt | llm | StrOutputParser())
|
| 47 |
+
return chain
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
+
def make_rag_chain_without_docs(llm):
|
| 50 |
+
prompt = ChatPromptTemplate.from_template(answer_prompt_without_docs_template)
|
| 51 |
+
chain = prompt | llm | StrOutputParser()
|
| 52 |
+
return chain
|
| 53 |
|
| 54 |
|
| 55 |
+
def make_rag_node(llm,with_docs = True):
|
|
|
|
|
|
|
| 56 |
|
| 57 |
+
if with_docs:
|
| 58 |
+
rag_chain = make_rag_chain(llm)
|
| 59 |
+
else:
|
| 60 |
+
rag_chain = make_rag_chain_without_docs(llm)
|
| 61 |
|
| 62 |
+
async def answer_rag(state,config):
|
| 63 |
+
answer = await rag_chain.ainvoke(state,config)
|
| 64 |
+
return {"answer":answer}
|
| 65 |
|
| 66 |
+
return answer_rag
|
| 67 |
|
|
|
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
|
|
|
|
|
|
| 70 |
|
| 71 |
+
# def make_rag_papers_chain(llm):
|
| 72 |
|
| 73 |
+
# prompt = ChatPromptTemplate.from_template(papers_prompt_template)
|
| 74 |
+
# input_documents = {
|
| 75 |
+
# "context":lambda x : _combine_documents(x["docs"]),
|
| 76 |
+
# **pass_values(["question","language"])
|
| 77 |
+
# }
|
| 78 |
|
| 79 |
+
# chain = input_documents | prompt | llm | StrOutputParser()
|
| 80 |
+
# chain = rename_chain(chain,"answer")
|
| 81 |
|
| 82 |
+
# return chain
|
| 83 |
|
| 84 |
|
| 85 |
|
|
|
|
| 86 |
|
|
|
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
+
# def make_illustration_chain(llm):
|
|
|
|
| 90 |
|
| 91 |
+
# prompt_with_images = ChatPromptTemplate.from_template(answer_prompt_images_template)
|
| 92 |
|
| 93 |
+
# input_description_images = {
|
| 94 |
+
# "images":lambda x : _combine_documents(get_image_docs(x["docs"])),
|
| 95 |
+
# **pass_values(["question","audience","language","answer"]),
|
| 96 |
+
# }
|
| 97 |
|
| 98 |
+
# illustration_chain = input_description_images | prompt_with_images | llm | StrOutputParser()
|
| 99 |
+
# return illustration_chain
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/chains/{intent_routing.py → intent_categorization.py}
RENAMED
|
@@ -7,7 +7,7 @@ from langchain_core.utils.function_calling import convert_to_openai_function
|
|
| 7 |
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
|
| 8 |
|
| 9 |
|
| 10 |
-
class
|
| 11 |
"""Analyzing the user message input"""
|
| 12 |
|
| 13 |
language: str = Field(
|
|
@@ -37,31 +37,31 @@ class IntentRouter(BaseModel):
|
|
| 37 |
|
| 38 |
|
| 39 |
|
| 40 |
-
def
|
| 41 |
|
| 42 |
-
openai_functions = [convert_to_openai_function(
|
| 43 |
-
|
| 44 |
|
| 45 |
prompt = ChatPromptTemplate.from_messages([
|
| 46 |
("system", "You are a helpful assistant, you will analyze, translate and reformulate the user input message using the function provided"),
|
| 47 |
("user", "input: {input}")
|
| 48 |
])
|
| 49 |
|
| 50 |
-
chain = prompt |
|
| 51 |
return chain
|
| 52 |
|
| 53 |
|
| 54 |
-
def
|
| 55 |
|
| 56 |
-
|
| 57 |
|
| 58 |
-
def
|
| 59 |
-
output =
|
| 60 |
if "language" not in output: output["language"] = "English"
|
| 61 |
output["query"] = state["user_input"]
|
| 62 |
return output
|
| 63 |
|
| 64 |
-
return
|
| 65 |
|
| 66 |
|
| 67 |
|
|
|
|
| 7 |
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
|
| 8 |
|
| 9 |
|
| 10 |
+
class IntentCategorizer(BaseModel):
|
| 11 |
"""Analyzing the user message input"""
|
| 12 |
|
| 13 |
language: str = Field(
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
|
| 40 |
+
def make_intent_categorization_chain(llm):
|
| 41 |
|
| 42 |
+
openai_functions = [convert_to_openai_function(IntentCategorizer)]
|
| 43 |
+
llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"IntentCategorizer"})
|
| 44 |
|
| 45 |
prompt = ChatPromptTemplate.from_messages([
|
| 46 |
("system", "You are a helpful assistant, you will analyze, translate and reformulate the user input message using the function provided"),
|
| 47 |
("user", "input: {input}")
|
| 48 |
])
|
| 49 |
|
| 50 |
+
chain = prompt | llm_with_functions | JsonOutputFunctionsParser()
|
| 51 |
return chain
|
| 52 |
|
| 53 |
|
| 54 |
+
def make_intent_categorization_node(llm):
|
| 55 |
|
| 56 |
+
categorization_chain = make_intent_categorization_chain(llm)
|
| 57 |
|
| 58 |
+
def categorize_message(state):
|
| 59 |
+
output = categorization_chain.invoke({"input":state["user_input"]})
|
| 60 |
if "language" not in output: output["language"] = "English"
|
| 61 |
output["query"] = state["user_input"]
|
| 62 |
return output
|
| 63 |
|
| 64 |
+
return categorize_message
|
| 65 |
|
| 66 |
|
| 67 |
|
climateqa/engine/chains/prompts.py
CHANGED
|
@@ -56,7 +56,7 @@ Passages:
|
|
| 56 |
{context}
|
| 57 |
|
| 58 |
-----------------------
|
| 59 |
-
Question: {
|
| 60 |
Answer in {language} with the passages citations:
|
| 61 |
"""
|
| 62 |
|
|
@@ -137,7 +137,7 @@ Guidelines:
|
|
| 137 |
- If the question is not related to environmental issues, never never answer it. Say it's not your role.
|
| 138 |
- Make paragraphs by starting new lines to make your answers more readable.
|
| 139 |
|
| 140 |
-
Question: {
|
| 141 |
Answer in {language}:
|
| 142 |
"""
|
| 143 |
|
|
|
|
| 56 |
{context}
|
| 57 |
|
| 58 |
-----------------------
|
| 59 |
+
Question: {query} - Explained to {audience}
|
| 60 |
Answer in {language} with the passages citations:
|
| 61 |
"""
|
| 62 |
|
|
|
|
| 137 |
- If the question is not related to environmental issues, never never answer it. Say it's not your role.
|
| 138 |
- Make paragraphs by starting new lines to make your answers more readable.
|
| 139 |
|
| 140 |
+
Question: {query}
|
| 141 |
Answer in {language}:
|
| 142 |
"""
|
| 143 |
|
climateqa/engine/chains/{query_transform.py → query_transformation.py}
RENAMED
|
File without changes
|
climateqa/engine/chains/retriever.py
CHANGED
|
@@ -45,7 +45,7 @@ def suppress_output():
|
|
| 45 |
|
| 46 |
|
| 47 |
|
| 48 |
-
def make_retriever_node(vectorstore,reranker):
|
| 49 |
|
| 50 |
def retrieve_documents(state):
|
| 51 |
|
|
@@ -53,15 +53,12 @@ def make_retriever_node(vectorstore,reranker):
|
|
| 53 |
questions = state["questions"]
|
| 54 |
|
| 55 |
# Use sources from the user input or from the LLM detection
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
| 57 |
auto_mode = "auto" in sources_input
|
| 58 |
-
|
| 59 |
-
# Constants
|
| 60 |
-
k_final = 15
|
| 61 |
-
k_before_reranking = 100
|
| 62 |
-
k_summary = 5
|
| 63 |
-
rerank_by_question = True
|
| 64 |
-
|
| 65 |
# There are several options to get the final top k
|
| 66 |
# Option 1 - Get 100 documents by question and rerank by question
|
| 67 |
# Option 2 - Get 100/n documents by question and rerank the total
|
|
@@ -96,9 +93,14 @@ def make_retriever_node(vectorstore,reranker):
|
|
| 96 |
docs_question = retriever.get_relevant_documents(question)
|
| 97 |
|
| 98 |
# Rerank
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
# If rerank by question we select the top documents for each question
|
| 103 |
if rerank_by_question:
|
| 104 |
docs_question = docs_question[:k_by_question[i]]
|
|
@@ -112,7 +114,7 @@ def make_retriever_node(vectorstore,reranker):
|
|
| 112 |
|
| 113 |
# Sorting the list in descending order by rerank_score
|
| 114 |
# Then select the top k
|
| 115 |
-
docs = sorted(docs, key=lambda x: x.metadata["
|
| 116 |
docs = docs[:k_final]
|
| 117 |
|
| 118 |
new_state = {"documents":docs}
|
|
|
|
| 45 |
|
| 46 |
|
| 47 |
|
| 48 |
+
def make_retriever_node(vectorstore,reranker,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
|
| 49 |
|
| 50 |
def retrieve_documents(state):
|
| 51 |
|
|
|
|
| 53 |
questions = state["questions"]
|
| 54 |
|
| 55 |
# Use sources from the user input or from the LLM detection
|
| 56 |
+
if "sources_input" not in state or state["sources_input"] is None:
|
| 57 |
+
sources_input = ["auto"]
|
| 58 |
+
else:
|
| 59 |
+
sources_input = state["sources_input"]
|
| 60 |
auto_mode = "auto" in sources_input
|
| 61 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
# There are several options to get the final top k
|
| 63 |
# Option 1 - Get 100 documents by question and rerank by question
|
| 64 |
# Option 2 - Get 100/n documents by question and rerank the total
|
|
|
|
| 93 |
docs_question = retriever.get_relevant_documents(question)
|
| 94 |
|
| 95 |
# Rerank
|
| 96 |
+
if reranker is not None:
|
| 97 |
+
with suppress_output():
|
| 98 |
+
docs_question = rerank_docs(reranker,docs_question,question)
|
| 99 |
+
else:
|
| 100 |
+
# Add a default reranking score
|
| 101 |
+
for doc in docs_question:
|
| 102 |
+
doc.metadata["reranking_score"] = doc.metadata["similarity_score"]
|
| 103 |
+
|
| 104 |
# If rerank by question we select the top documents for each question
|
| 105 |
if rerank_by_question:
|
| 106 |
docs_question = docs_question[:k_by_question[i]]
|
|
|
|
| 114 |
|
| 115 |
# Sorting the list in descending order by rerank_score
|
| 116 |
# Then select the top k
|
| 117 |
+
docs = sorted(docs, key=lambda x: x.metadata["reranking_score"], reverse=True)
|
| 118 |
docs = docs[:k_final]
|
| 119 |
|
| 120 |
new_state = {"documents":docs}
|
climateqa/engine/graph.py
CHANGED
|
@@ -4,14 +4,20 @@ from contextlib import contextmanager
|
|
| 4 |
|
| 5 |
from langchain.schema import Document
|
| 6 |
from langgraph.graph import END, StateGraph
|
|
|
|
|
|
|
| 7 |
from typing_extensions import TypedDict
|
| 8 |
from typing import List
|
| 9 |
|
|
|
|
|
|
|
| 10 |
from .chains.answer_chitchat import make_chitchat_node
|
| 11 |
from .chains.answer_ai_impact import make_ai_impact_node
|
| 12 |
-
from .chains.
|
| 13 |
from .chains.translation import make_translation_node
|
| 14 |
-
from .chains.
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
class GraphState(TypedDict):
|
|
@@ -24,9 +30,109 @@ class GraphState(TypedDict):
|
|
| 24 |
query: str
|
| 25 |
questions : List[dict]
|
| 26 |
answer: str
|
| 27 |
-
audience: str
|
| 28 |
-
sources_input: str
|
| 29 |
documents: List[Document]
|
| 30 |
|
| 31 |
def search(state):
|
| 32 |
-
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
from langchain.schema import Document
|
| 6 |
from langgraph.graph import END, StateGraph
|
| 7 |
+
from langchain_core.runnables.graph import CurveStyle, NodeColors, MermaidDrawMethod
|
| 8 |
+
|
| 9 |
from typing_extensions import TypedDict
|
| 10 |
from typing import List
|
| 11 |
|
| 12 |
+
from IPython.display import display, HTML, Image
|
| 13 |
+
|
| 14 |
from .chains.answer_chitchat import make_chitchat_node
|
| 15 |
from .chains.answer_ai_impact import make_ai_impact_node
|
| 16 |
+
from .chains.query_transformation import make_query_transform_node
|
| 17 |
from .chains.translation import make_translation_node
|
| 18 |
+
from .chains.intent_categorization import make_intent_categorization_node
|
| 19 |
+
from .chains.retriever import make_retriever_node
|
| 20 |
+
from .chains.answer_rag import make_rag_node
|
| 21 |
|
| 22 |
|
| 23 |
class GraphState(TypedDict):
|
|
|
|
| 30 |
query: str
|
| 31 |
questions : List[dict]
|
| 32 |
answer: str
|
| 33 |
+
audience: str = "experts"
|
| 34 |
+
sources_input: List[str] = ["auto"]
|
| 35 |
documents: List[Document]
|
| 36 |
|
| 37 |
def search(state):
|
| 38 |
+
return {}
|
| 39 |
+
|
| 40 |
+
def route_intent(state):
|
| 41 |
+
intent = state["intent"]
|
| 42 |
+
if intent in ["chitchat","esg"]:
|
| 43 |
+
return "answer_chitchat"
|
| 44 |
+
elif intent == "ai_impact":
|
| 45 |
+
return "answer_ai_impact"
|
| 46 |
+
else:
|
| 47 |
+
# Search route
|
| 48 |
+
return "search"
|
| 49 |
+
|
| 50 |
+
def route_translation(state):
|
| 51 |
+
if state["language"].lower() == "english":
|
| 52 |
+
return "transform_query"
|
| 53 |
+
else:
|
| 54 |
+
return "translate_query"
|
| 55 |
+
|
| 56 |
+
def route_based_on_relevant_docs(state,threshold_docs=0.2):
|
| 57 |
+
docs = [x for x in state["documents"] if x.metadata["reranking_score"] > threshold_docs]
|
| 58 |
+
if len(docs) > 0:
|
| 59 |
+
return "answer_rag"
|
| 60 |
+
else:
|
| 61 |
+
return "answer_rag_no_docs"
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def make_id_dict(values):
|
| 65 |
+
return {k:k for k in values}
|
| 66 |
+
|
| 67 |
+
def make_graph_agent(llm,vectorstore,reranker,threshold_docs = 0.2):
|
| 68 |
+
|
| 69 |
+
workflow = StateGraph(GraphState)
|
| 70 |
+
|
| 71 |
+
# Define the node functions
|
| 72 |
+
categorize_intent = make_intent_categorization_node(llm)
|
| 73 |
+
transform_query = make_query_transform_node(llm)
|
| 74 |
+
translate_query = make_translation_node(llm)
|
| 75 |
+
answer_chitchat = make_chitchat_node(llm)
|
| 76 |
+
answer_ai_impact = make_ai_impact_node(llm)
|
| 77 |
+
retrieve_documents = make_retriever_node(vectorstore,reranker)
|
| 78 |
+
answer_rag = make_rag_node(llm,with_docs=True)
|
| 79 |
+
answer_rag_no_docs = make_rag_node(llm,with_docs=False)
|
| 80 |
+
|
| 81 |
+
# Define the nodes
|
| 82 |
+
workflow.add_node("categorize_intent", categorize_intent)
|
| 83 |
+
workflow.add_node("search", search)
|
| 84 |
+
workflow.add_node("transform_query", transform_query)
|
| 85 |
+
workflow.add_node("translate_query", translate_query)
|
| 86 |
+
workflow.add_node("answer_chitchat", answer_chitchat)
|
| 87 |
+
workflow.add_node("answer_ai_impact", answer_ai_impact)
|
| 88 |
+
workflow.add_node("retrieve_documents",retrieve_documents)
|
| 89 |
+
workflow.add_node("answer_rag",answer_rag)
|
| 90 |
+
workflow.add_node("answer_rag_no_docs",answer_rag_no_docs)
|
| 91 |
+
|
| 92 |
+
# Entry point
|
| 93 |
+
workflow.set_entry_point("categorize_intent")
|
| 94 |
+
|
| 95 |
+
# CONDITIONAL EDGES
|
| 96 |
+
workflow.add_conditional_edges(
|
| 97 |
+
"categorize_intent",
|
| 98 |
+
route_intent,
|
| 99 |
+
make_id_dict(["answer_chitchat","answer_ai_impact","search"])
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
workflow.add_conditional_edges(
|
| 103 |
+
"search",
|
| 104 |
+
route_translation,
|
| 105 |
+
make_id_dict(["translate_query","transform_query"])
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
workflow.add_conditional_edges(
|
| 109 |
+
"retrieve_documents",
|
| 110 |
+
lambda x : route_based_on_relevant_docs(x,threshold_docs=threshold_docs),
|
| 111 |
+
make_id_dict(["answer_rag","answer_rag_no_docs"])
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
# Define the edges
|
| 115 |
+
workflow.add_edge("translate_query", "transform_query")
|
| 116 |
+
workflow.add_edge("transform_query", "retrieve_documents")
|
| 117 |
+
workflow.add_edge("retrieve_documents", "answer_rag")
|
| 118 |
+
workflow.add_edge("answer_rag", END)
|
| 119 |
+
workflow.add_edge("answer_rag_no_docs", END)
|
| 120 |
+
workflow.add_edge("answer_chitchat", END)
|
| 121 |
+
workflow.add_edge("answer_ai_impact", END)
|
| 122 |
+
|
| 123 |
+
# Compile
|
| 124 |
+
app = workflow.compile()
|
| 125 |
+
return app
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def display_graph(app):
|
| 131 |
+
|
| 132 |
+
display(
|
| 133 |
+
Image(
|
| 134 |
+
app.get_graph(xray = True).draw_mermaid_png(
|
| 135 |
+
draw_method=MermaidDrawMethod.API,
|
| 136 |
+
)
|
| 137 |
+
)
|
| 138 |
+
)
|
climateqa/engine/reranker.py
CHANGED
|
@@ -34,7 +34,7 @@ def rerank_docs(reranker,docs,query):
|
|
| 34 |
for result in results.results:
|
| 35 |
doc_id = result.document.doc_id
|
| 36 |
doc = docs[doc_id]
|
| 37 |
-
doc.metadata["
|
| 38 |
doc.metadata["query_used_for_retrieval"] = query
|
| 39 |
docs_reranked.append(doc)
|
| 40 |
return docs_reranked
|
|
|
|
| 34 |
for result in results.results:
|
| 35 |
doc_id = result.document.doc_id
|
| 36 |
doc = docs[doc_id]
|
| 37 |
+
doc.metadata["reranking_score"] = result.score
|
| 38 |
doc.metadata["query_used_for_retrieval"] = query
|
| 39 |
docs_reranked.append(doc)
|
| 40 |
return docs_reranked
|
front/__init__.py
ADDED
|
File without changes
|
front/callbacks.py
ADDED
|
File without changes
|
front/utils.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
def make_pairs(lst):
|
| 5 |
+
"""from a list of even lenght, make tupple pairs"""
|
| 6 |
+
return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)]
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def serialize_docs(docs):
|
| 10 |
+
new_docs = []
|
| 11 |
+
for doc in docs:
|
| 12 |
+
new_doc = {}
|
| 13 |
+
new_doc["page_content"] = doc.page_content
|
| 14 |
+
new_doc["metadata"] = doc.metadata
|
| 15 |
+
new_docs.append(new_doc)
|
| 16 |
+
return new_docs
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def parse_output_llm_with_sources(output):
|
| 21 |
+
# Split the content into a list of text and "[Doc X]" references
|
| 22 |
+
content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)
|
| 23 |
+
parts = []
|
| 24 |
+
for part in content_parts:
|
| 25 |
+
if part.startswith("Doc"):
|
| 26 |
+
subparts = part.split(",")
|
| 27 |
+
subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts]
|
| 28 |
+
subparts = [f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" for subpart in subparts]
|
| 29 |
+
parts.append("".join(subparts))
|
| 30 |
+
else:
|
| 31 |
+
parts.append(part)
|
| 32 |
+
content_parts = "".join(parts)
|
| 33 |
+
return content_parts
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def make_html_source(source,i):
|
| 37 |
+
meta = source.metadata
|
| 38 |
+
# content = source.page_content.split(":",1)[1].strip()
|
| 39 |
+
content = source.page_content.strip()
|
| 40 |
+
|
| 41 |
+
toc_levels = []
|
| 42 |
+
for j in range(2):
|
| 43 |
+
level = meta[f"toc_level{j}"]
|
| 44 |
+
if level != "N/A":
|
| 45 |
+
toc_levels.append(level)
|
| 46 |
+
else:
|
| 47 |
+
break
|
| 48 |
+
toc_levels = " > ".join(toc_levels)
|
| 49 |
+
|
| 50 |
+
if len(toc_levels) > 0:
|
| 51 |
+
name = f"<b>{toc_levels}</b><br/>{meta['name']}"
|
| 52 |
+
else:
|
| 53 |
+
name = meta['name']
|
| 54 |
+
|
| 55 |
+
score = meta['reranking_score']
|
| 56 |
+
if score > 0.8:
|
| 57 |
+
color = "score-green"
|
| 58 |
+
elif score > 0.4:
|
| 59 |
+
color = "score-orange"
|
| 60 |
+
else:
|
| 61 |
+
color = "score-red"
|
| 62 |
+
|
| 63 |
+
relevancy_score = f"<p class=relevancy-score>Relevancy score: <span class='{color}'>{score:.1%}</span></p>"
|
| 64 |
+
|
| 65 |
+
if meta["chunk_type"] == "text":
|
| 66 |
+
|
| 67 |
+
card = f"""
|
| 68 |
+
<div class="card" id="doc{i}">
|
| 69 |
+
<div class="card-content">
|
| 70 |
+
<h2>Doc {i} - {meta['short_name']} - Page {int(meta['page_number'])}</h2>
|
| 71 |
+
<p>{content}</p>
|
| 72 |
+
{relevancy_score}
|
| 73 |
+
</div>
|
| 74 |
+
<div class="card-footer">
|
| 75 |
+
<span>{name}</span>
|
| 76 |
+
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
|
| 77 |
+
<span role="img" aria-label="Open PDF">🔗</span>
|
| 78 |
+
</a>
|
| 79 |
+
</div>
|
| 80 |
+
</div>
|
| 81 |
+
"""
|
| 82 |
+
|
| 83 |
+
else:
|
| 84 |
+
|
| 85 |
+
if meta["figure_code"] != "N/A":
|
| 86 |
+
title = f"{meta['figure_code']} - {meta['short_name']}"
|
| 87 |
+
else:
|
| 88 |
+
title = f"{meta['short_name']}"
|
| 89 |
+
|
| 90 |
+
card = f"""
|
| 91 |
+
<div class="card card-image">
|
| 92 |
+
<div class="card-content">
|
| 93 |
+
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
| 94 |
+
<p>{content}</p>
|
| 95 |
+
<p class='ai-generated'>AI-generated description</p>
|
| 96 |
+
{relevancy_score}
|
| 97 |
+
</div>
|
| 98 |
+
<div class="card-footer">
|
| 99 |
+
<span>{name}</span>
|
| 100 |
+
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
|
| 101 |
+
<span role="img" aria-label="Open PDF">🔗</span>
|
| 102 |
+
</a>
|
| 103 |
+
</div>
|
| 104 |
+
</div>
|
| 105 |
+
"""
|
| 106 |
+
|
| 107 |
+
return card
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):
|
| 112 |
+
|
| 113 |
+
if checked:
|
| 114 |
+
span = "<span class='checkmark'>✓</span>"
|
| 115 |
+
else:
|
| 116 |
+
span = "<span class='loader'></span>"
|
| 117 |
+
|
| 118 |
+
# toolbox = f"""
|
| 119 |
+
# <div class="dropdown">
|
| 120 |
+
# <label for="{elem_id}" class="dropdown-toggle">
|
| 121 |
+
# {span}
|
| 122 |
+
# {tool_name}
|
| 123 |
+
# <span class="caret"></span>
|
| 124 |
+
# </label>
|
| 125 |
+
# <input type="checkbox" id="{elem_id}" hidden/>
|
| 126 |
+
# <div class="dropdown-content">
|
| 127 |
+
# <p>{description}</p>
|
| 128 |
+
# </div>
|
| 129 |
+
# </div>
|
| 130 |
+
# """
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
toolbox = f"""
|
| 134 |
+
<div class="dropdown">
|
| 135 |
+
<label for="{elem_id}" class="dropdown-toggle">
|
| 136 |
+
{span}
|
| 137 |
+
{tool_name}
|
| 138 |
+
</label>
|
| 139 |
+
</div>
|
| 140 |
+
"""
|
| 141 |
+
|
| 142 |
+
return toolbox
|
requirements.txt
CHANGED
|
@@ -2,13 +2,16 @@ gradio==4.19.1
|
|
| 2 |
azure-storage-file-share==12.11.1
|
| 3 |
azure-storage-blob
|
| 4 |
python-dotenv==1.0.0
|
| 5 |
-
langchain==0.1
|
| 6 |
-
langchain_openai==0.
|
| 7 |
-
|
|
|
|
| 8 |
sentence-transformers==2.6.0
|
| 9 |
huggingface-hub
|
| 10 |
-
msal
|
| 11 |
pyalex==0.13
|
| 12 |
networkx==3.2.1
|
| 13 |
pyvis==0.3.2
|
| 14 |
flashrank==0.2.5
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
azure-storage-file-share==12.11.1
|
| 3 |
azure-storage-blob
|
| 4 |
python-dotenv==1.0.0
|
| 5 |
+
langchain==0.2.1
|
| 6 |
+
langchain_openai==0.1.7
|
| 7 |
+
langgraph==0.0.55
|
| 8 |
+
pinecone-client==4.1.0
|
| 9 |
sentence-transformers==2.6.0
|
| 10 |
huggingface-hub
|
|
|
|
| 11 |
pyalex==0.13
|
| 12 |
networkx==3.2.1
|
| 13 |
pyvis==0.3.2
|
| 14 |
flashrank==0.2.5
|
| 15 |
+
rerankers==0.3.0
|
| 16 |
+
torch==2.3.0
|
| 17 |
+
nvidia-cudnn-cu12==8.9.2.26
|
sandbox/20240310 - CQA - Semantic Routing 1.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
style.css
CHANGED
|
@@ -363,3 +363,105 @@ span.chatbot > p > img{
|
|
| 363 |
.a-doc-ref{
|
| 364 |
text-decoration: none !important;
|
| 365 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
.a-doc-ref{
|
| 364 |
text-decoration: none !important;
|
| 365 |
}
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
.dropdown {
|
| 369 |
+
position: relative;
|
| 370 |
+
display:inline-block;
|
| 371 |
+
margin-bottom: 10px;
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
.dropdown-toggle {
|
| 375 |
+
background-color: #f2f2f2;
|
| 376 |
+
color: black;
|
| 377 |
+
padding: 10px;
|
| 378 |
+
font-size: 16px;
|
| 379 |
+
cursor: pointer;
|
| 380 |
+
display: block;
|
| 381 |
+
width: 400px; /* Adjust width as needed */
|
| 382 |
+
position: relative;
|
| 383 |
+
display: flex;
|
| 384 |
+
align-items: center; /* Vertically center the contents */
|
| 385 |
+
justify-content: left;
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
.dropdown-toggle .caret {
|
| 389 |
+
content: "";
|
| 390 |
+
position: absolute;
|
| 391 |
+
right: 10px;
|
| 392 |
+
top: 50%;
|
| 393 |
+
border-left: 5px solid transparent;
|
| 394 |
+
border-right: 5px solid transparent;
|
| 395 |
+
border-top: 5px solid black;
|
| 396 |
+
transform: translateY(-50%);
|
| 397 |
+
}
|
| 398 |
+
|
| 399 |
+
input[type="checkbox"] {
|
| 400 |
+
display: none !important;
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
input[type="checkbox"]:checked + .dropdown-content {
|
| 404 |
+
display: block;
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
.dropdown-content {
|
| 408 |
+
display: none;
|
| 409 |
+
position: absolute;
|
| 410 |
+
background-color: #f9f9f9;
|
| 411 |
+
min-width: 300px;
|
| 412 |
+
box-shadow: 0 8px 16px 0 rgba(0,0,0,0.2);
|
| 413 |
+
z-index: 1;
|
| 414 |
+
padding: 12px;
|
| 415 |
+
border: 1px solid #ccc;
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
input[type="checkbox"]:checked + .dropdown-toggle + .dropdown-content {
|
| 419 |
+
display: block;
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
input[type="checkbox"]:checked + .dropdown-toggle .caret {
|
| 423 |
+
border-top: 0;
|
| 424 |
+
border-bottom: 5px solid black;
|
| 425 |
+
}
|
| 426 |
+
|
| 427 |
+
.loader {
|
| 428 |
+
border: 1px solid #d0d0d0 !important; /* Light grey background */
|
| 429 |
+
border-top: 1px solid #3498db !important; /* Blue color */
|
| 430 |
+
border-right: 1px solid #3498db !important; /* Blue color */
|
| 431 |
+
border-radius: 50%;
|
| 432 |
+
width: 20px;
|
| 433 |
+
height: 20px;
|
| 434 |
+
animation: spin 2s linear infinite;
|
| 435 |
+
display:inline-block;
|
| 436 |
+
margin-right:10px !important;
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
.checkmark{
|
| 440 |
+
color:green !important;
|
| 441 |
+
font-size:18px;
|
| 442 |
+
margin-right:10px !important;
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
@keyframes spin {
|
| 446 |
+
0% { transform: rotate(0deg); }
|
| 447 |
+
100% { transform: rotate(360deg); }
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
.relevancy-score{
|
| 452 |
+
margin-top:10px !important;
|
| 453 |
+
font-size:10px !important;
|
| 454 |
+
font-style:italic;
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
.score-green{
|
| 458 |
+
color:green !important;
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
.score-orange{
|
| 462 |
+
color:orange !important;
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
.score-orange{
|
| 466 |
+
color:red !important;
|
| 467 |
+
}
|