Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

timeki commited on 16 days ago

Commit

175604a

1 Parent(s): bf59b4c

Dupliactes workflow to separate POC from Prod and simpify retrieval

Browse files

Files changed (10) hide show

app.py +47 -18
climateqa/chat.py +4 -3
climateqa/engine/chains/answer_rag.py +1 -1
climateqa/engine/chains/query_transformation.py +1 -1
climateqa/engine/chains/retrieve_documents.py +85 -58
climateqa/engine/graph.py +99 -29
climateqa/handle_stream_events.py +3 -3
front/tabs/chat_interface.py +1 -1
front/tabs/tab_examples.py +1 -1
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from climateqa.engine.embeddings import get_embeddings_function
 from climateqa.engine.llm import get_llm
 from climateqa.engine.vectorstore import get_pinecone_vectorstore
 from climateqa.engine.reranker import get_reranker
-from climateqa.engine.graph import make_graph_agent
 from climateqa.engine.chains.retrieve_papers import find_papers
 from climateqa.chat import start_chat, chat_stream, finish_chat
@@ -69,12 +69,19 @@ vectorstore_region = get_pinecone_vectorstore(embeddings_function, index_name=os
 llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
 reranker = get_reranker("nano")
-agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, vectorstore_region = vectorstore_region, reranker=reranker, threshold_docs=0)#TODO put back default 0.2
 async def chat(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
     async for event in chat_stream(agent, query, history, audience, sources, reports, relevant_content_sources_selection, search_only, share_client, user_id):
         yield event
 # --------------------------------------------------------------------
@@ -205,7 +212,7 @@ def event_handling(
     new_sources_hmtl = gr.State([])
     for button in [config_button, close_config_modal]:
         button.click(
@@ -213,18 +220,38 @@ def event_handling(
             inputs=[config_open],
             outputs=[config_modal, config_open]
         )
-    # Event for textbox
-    (textbox
-        .submit(start_chat, [textbox, chatbot, search_only], [textbox, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{textbox.elem_id}")
-        .then(chat, [textbox, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{textbox.elem_id}")
-        .then(finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}")
-    )
-    # Event for examples_hidden
-    (examples_hidden
-        .change(start_chat, [examples_hidden, chatbot, search_only], [examples_hidden, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{examples_hidden.elem_id}")
-        .then(chat, [examples_hidden, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{examples_hidden.elem_id}")
-        .then(finish_chat, None, [examples_hidden], api_name=f"finish_chat_{examples_hidden.elem_id}")
-    )
     new_sources_hmtl.change(lambda x : x, inputs = [new_sources_hmtl], outputs = [sources_textbox])
     current_graphs.change(lambda x: x, inputs=[current_graphs], outputs=[graphs_container])
@@ -234,10 +261,12 @@ def event_handling(
     for component in [sources_textbox, figures_cards, current_graphs, papers_html]:
         component.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs, papers_html], [tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
     # Search for papers
     for component in [textbox, examples_hidden]:
         component.submit(find_papers, [component, after, dropdown_external_sources], [papers_html, citations_network, papers_summary])
 def main_ui():
     # config_open = gr.State(True)
@@ -246,12 +275,12 @@ def main_ui():
         with gr.Tabs():
             cqa_components = cqa_tab(tab_name = "ClimateQ&A")
-            # local_cqa_components = cqa_tab(tab_name = "Beta - POC Adapt'Action")
             create_about_tab()
         event_handling(cqa_components, config_components, tab_name = 'ClimateQ&A')
-        # event_handling(local_cqa_components, config_components, tab_name = 'Beta - POC Adapt\'Action')
         demo.queue()

 from climateqa.engine.llm import get_llm
 from climateqa.engine.vectorstore import get_pinecone_vectorstore
 from climateqa.engine.reranker import get_reranker
+from climateqa.engine.graph import make_graph_agent,make_graph_agent_poc
 from climateqa.engine.chains.retrieve_papers import find_papers
 from climateqa.chat import start_chat, chat_stream, finish_chat
 llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
 reranker = get_reranker("nano")
+agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, vectorstore_region = vectorstore_region, reranker=reranker, threshold_docs=0.2)
+agent_poc = make_graph_agent_poc(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, vectorstore_region = vectorstore_region, reranker=reranker, threshold_docs=0)#TODO put back default 0.2
 async def chat(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
+    print("chat cqa - message received")
     async for event in chat_stream(agent, query, history, audience, sources, reports, relevant_content_sources_selection, search_only, share_client, user_id):
         yield event
+async def chat_poc(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
+    print("chat poc - message received")
+    async for event in chat_stream(agent_poc, query, history, audience, sources, reports, relevant_content_sources_selection, search_only, share_client, user_id):
+        yield event
 # --------------------------------------------------------------------
     new_sources_hmtl = gr.State([])
+    print("textbox id : ", textbox.elem_id)
     for button in [config_button, close_config_modal]:
         button.click(
             inputs=[config_open],
             outputs=[config_modal, config_open]
         )
+    if tab_name == "ClimateQ&A":
+        print("chat cqa - message sent")
+        # Event for textbox
+        (textbox
+            .submit(start_chat, [textbox, chatbot, search_only], [textbox, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{textbox.elem_id}")
+            .then(chat, [textbox, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{textbox.elem_id}")
+            .then(finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}")
+        )
+        # Event for examples_hidden
+        (examples_hidden
+            .change(start_chat, [examples_hidden, chatbot, search_only], [examples_hidden, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{examples_hidden.elem_id}")
+            .then(chat, [examples_hidden, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{examples_hidden.elem_id}")
+            .then(finish_chat, None, [textbox], api_name=f"finish_chat_{examples_hidden.elem_id}")
+        )
+    elif tab_name == "Beta - POC Adapt'Action":
+        print("chat poc - message sent")
+        # Event for textbox
+        (textbox
+            .submit(start_chat, [textbox, chatbot, search_only], [textbox, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{textbox.elem_id}")
+            .then(chat_poc, [textbox, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{textbox.elem_id}")
+            .then(finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}")
+        )
+        # Event for examples_hidden
+        (examples_hidden
+            .change(start_chat, [examples_hidden, chatbot, search_only], [examples_hidden, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{examples_hidden.elem_id}")
+            .then(chat_poc, [examples_hidden, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{examples_hidden.elem_id}")
+            .then(finish_chat, None, [textbox], api_name=f"finish_chat_{examples_hidden.elem_id}")
+        )
     new_sources_hmtl.change(lambda x : x, inputs = [new_sources_hmtl], outputs = [sources_textbox])
     current_graphs.change(lambda x: x, inputs=[current_graphs], outputs=[graphs_container])
     for component in [sources_textbox, figures_cards, current_graphs, papers_html]:
         component.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs, papers_html], [tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
     # Search for papers
     for component in [textbox, examples_hidden]:
         component.submit(find_papers, [component, after, dropdown_external_sources], [papers_html, citations_network, papers_summary])
 def main_ui():
     # config_open = gr.State(True)
         with gr.Tabs():
             cqa_components = cqa_tab(tab_name = "ClimateQ&A")
+            local_cqa_components = cqa_tab(tab_name = "Beta - POC Adapt'Action")
             create_about_tab()
         event_handling(cqa_components, config_components, tab_name = 'ClimateQ&A')
+        event_handling(local_cqa_components, config_components, tab_name = 'Beta - POC Adapt\'Action')
         demo.queue()

climateqa/chat.py CHANGED Viewed

@@ -119,6 +119,7 @@ async def chat_stream(
     start_streaming = False
     graphs_html = ""
     used_documents = []
     answer_message_content = ""
     # Define processing steps
@@ -138,8 +139,8 @@ async def chat_stream(
                 # Handle document retrieval
                 if event["event"] == "on_chain_end" and event["name"] in ["retrieve_documents","retrieve_local_data"] and event["data"]["output"] != None:
-                    history, used_documents = handle_retrieved_documents(
-                        event, history, used_documents
                     )
                 if event["event"] == "on_chain_end" and event["name"] == "answer_search" :
                     docs = event["data"]["input"]["documents"]
@@ -180,7 +181,7 @@ async def chat_stream(
                 # Handle query transformation
                 if event["name"] == "transform_query" and event["event"] == "on_chain_end":
                     if hasattr(history[-1], "content"):
-                        sub_questions = [q["question"] for q in event["data"]["output"]["questions_list"]]
                         history[-1].content += "Decompose question into sub-questions:\n\n - " + "\n - ".join(sub_questions)
             yield history, docs_html, output_query, output_language, related_contents, graphs_html

     start_streaming = False
     graphs_html = ""
     used_documents = []
+    retrieved_contents = []
     answer_message_content = ""
     # Define processing steps
                 # Handle document retrieval
                 if event["event"] == "on_chain_end" and event["name"] in ["retrieve_documents","retrieve_local_data"] and event["data"]["output"] != None:
+                    history, used_documents, retrieved_contents = handle_retrieved_documents(
+                        event, history, used_documents, retrieved_contents
                     )
                 if event["event"] == "on_chain_end" and event["name"] == "answer_search" :
                     docs = event["data"]["input"]["documents"]
                 # Handle query transformation
                 if event["name"] == "transform_query" and event["event"] == "on_chain_end":
                     if hasattr(history[-1], "content"):
+                        sub_questions = [q["question"] + "-> relevant sources : " + str(q["sources"]) for q in event["data"]["output"]["questions_list"]]
                         history[-1].content += "Decompose question into sub-questions:\n\n - " + "\n - ".join(sub_questions)
             yield history, docs_html, output_query, output_language, related_contents, graphs_html

climateqa/engine/chains/answer_rag.py CHANGED Viewed

@@ -61,7 +61,7 @@ def make_rag_node(llm,with_docs = True):
         rag_chain = make_rag_chain(llm)
     else:
         rag_chain = make_rag_chain_without_docs(llm)
     async def answer_rag(state,config):
         print("---- Answer RAG ----")
         start_time = time.time()

         rag_chain = make_rag_chain(llm)
     else:
         rag_chain = make_rag_chain_without_docs(llm)
     async def answer_rag(state,config):
         print("---- Answer RAG ----")
         start_time = time.time()

climateqa/engine/chains/query_transformation.py CHANGED Viewed

@@ -60,7 +60,7 @@ from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
 ROUTING_INDEX = {
-    "IPx":["IPCC", "IPBS", "IPOS"],
     "POC": ["AcclimaTerra", "PCAET","Biodiv"],
     "OpenAlex":["OpenAlex"],
 }

 ROUTING_INDEX = {
+    "IPx":["IPCC", "IPBES", "IPOS"],
     "POC": ["AcclimaTerra", "PCAET","Biodiv"],
     "OpenAlex":["OpenAlex"],
 }

climateqa/engine/chains/retrieve_documents.py CHANGED Viewed

@@ -15,7 +15,9 @@ from ..utils import log_event
 from langchain_core.vectorstores import VectorStore
 from typing import List
 from langchain_core.documents.base import Document
 def divide_into_parts(target, parts):
@@ -272,12 +274,27 @@ def concatenate_documents(index, source_type, docs_question_dict, k_by_question,
 # The chain callback is not necessary, but it propagates the langchain callbacks to the astream_events logger to display intermediate results
 # @chain
-async def retrieve_documents(state,config, source_type, vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5, k_images=5):
     """
     Unpack the first question of the remaining questions, and retrieve and rerank corresponding documents, based on the question and selected_sources
     Args:
         state (dict): The current state containing documents, related content, relevant content sources, remaining questions and n_questions.
         config (dict): Configuration settings for logging and other purposes.
         vectorstore (object): The vector store used to retrieve relevant documents.
         reranker (object): The reranker used to rerank the retrieved documents.
@@ -290,35 +307,6 @@ async def retrieve_documents(state,config, source_type, vectorstore,reranker,llm
     Returns:
         dict: The updated state containing the retrieved and reranked documents, related content, and remaining questions.
     """
-    # TODO split les questions selon le type de sources dans le state question + conditions sur le nombre de questions traités par type de source
-    docs = state.get("documents", [])
-    related_content = state.get("related_content", [])
-    search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
-    search_only = state["search_only"]
-    reports = state["reports"]
-    # Get the current question
-    # current_question = state["questions_list"][0]
-    # remaining_questions = state["remaining_questions"][1:]
-    current_question_id = None
-    print("Questions Indexs", list(range(len(state["questions_list"]))), "- Handled questions : " ,state["handled_questions_index"])
-    for i in range(len(state["questions_list"])):
-        current_question = state["questions_list"][i]
-        if i not in state["handled_questions_index"] and current_question["source_type"] == source_type:
-            current_question_id = i
-            break
-    # TODO filter on source_type
-    k_by_question = k_final // state["n_questions"]["total"]
-    k_summary_by_question = _get_k_summary_by_question(state["n_questions"]["total"])
-    k_images_by_question = _get_k_images_by_question(state["n_questions"]["total"])
     sources = current_question["sources"]
     question = current_question["question"]
     index = current_question["index"]
@@ -329,8 +317,7 @@ async def retrieve_documents(state,config, source_type, vectorstore,reranker,llm
     print(f"""---- Retrieve documents from {current_question["source_type"]}----""")
-    # if index == "Vector": # always true for now #TODO rename to IPx
-    if source_type == "IPx": # always true for now #TODO rename to IPx
         docs_question_dict = await get_IPCC_relevant_documents(
             query  = question,
             vectorstore=vectorstore,
@@ -359,7 +346,6 @@ async def retrieve_documents(state,config, source_type, vectorstore,reranker,llm
             k_images= k_by_question
         )
     # Rerank
     if reranker is not None and rerank_by_question:
         with suppress_output():
@@ -381,35 +367,72 @@ async def retrieve_documents(state,config, source_type, vectorstore,reranker,llm
     docs_question = _add_sources_used_in_metadata(docs_question,sources,question,index)
     images_question = _add_sources_used_in_metadata(images_question,sources,question,index)
-    # Add to the list of docs
-    # docs.extend(docs_question)
-    # related_content.extend(images_question)
-    docs = docs_question
-    related_content = images_question
-    new_state = {"documents":docs, "related_contents": related_content, "handled_questions_index": [current_question_id]}
-    print("Updated state with question ", current_question_id, " added ", len(docs), " documents")
-    return new_state
 def make_IPx_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
     @chain
     async def retrieve_IPx_docs(state, config):
         source_type = "IPx"
         # return {"documents":[], "related_contents": [], "handled_questions_index": list(range(len(state["questions_list"])))} # TODO Remove
-        state =  await retrieve_documents(
-            state = state,
-            config= config,
             source_type=source_type,
             vectorstore=vectorstore,
-            reranker= reranker,
-            llm=llm,
             rerank_by_question=rerank_by_question,
-            k_final=k_final,
-            k_before_reranking=k_before_reranking,
-            k_summary=k_summary
         )
         return state
@@ -420,19 +443,23 @@ def make_POC_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_
     @chain
     async def retrieve_POC_docs_node(state, config):
         source_type = "POC"
-        state =  await retrieve_documents(
-            state = state,
-            config= config,
             source_type=source_type,
             vectorstore=vectorstore,
-            reranker= reranker,
-            llm=llm,
             rerank_by_question=rerank_by_question,
-            k_final=k_final,
-            k_before_reranking=k_before_reranking,
-            k_summary=k_summary
-        )
         return state
     return retrieve_POC_docs_node

 from langchain_core.vectorstores import VectorStore
 from typing import List
 from langchain_core.documents.base import Document
+import asyncio
+from typing import Any, Dict, List, Tuple
 def divide_into_parts(target, parts):
 # The chain callback is not necessary, but it propagates the langchain callbacks to the astream_events logger to display intermediate results
 # @chain
+async def retrieve_documents(
+    current_question: Dict[str, Any],
+    config: Dict[str, Any],
+    source_type: str,
+    vectorstore: VectorStore,
+    reranker: Any,
+    search_figures: bool = False,
+    search_only: bool = False,
+    reports: list = [],
+    rerank_by_question: bool = True,
+    k_images_by_question: int = 5,
+    k_before_reranking: int = 100,
+    k_by_question: int = 5,
+    k_summary_by_question: int = 3
+) -> Tuple[List[Document], List[Document]]:
     """
     Unpack the first question of the remaining questions, and retrieve and rerank corresponding documents, based on the question and selected_sources
     Args:
         state (dict): The current state containing documents, related content, relevant content sources, remaining questions and n_questions.
+        current_question (dict): The current question being processed.
         config (dict): Configuration settings for logging and other purposes.
         vectorstore (object): The vector store used to retrieve relevant documents.
         reranker (object): The reranker used to rerank the retrieved documents.
     Returns:
         dict: The updated state containing the retrieved and reranked documents, related content, and remaining questions.
     """
     sources = current_question["sources"]
     question = current_question["question"]
     index = current_question["index"]
     print(f"""---- Retrieve documents from {current_question["source_type"]}----""")
+    if source_type == "IPx":
         docs_question_dict = await get_IPCC_relevant_documents(
             query  = question,
             vectorstore=vectorstore,
             k_images= k_by_question
         )
     # Rerank
     if reranker is not None and rerank_by_question:
         with suppress_output():
     docs_question = _add_sources_used_in_metadata(docs_question,sources,question,index)
     images_question = _add_sources_used_in_metadata(images_question,sources,question,index)
+    return docs_question, images_question
+async def retrieve_documents_for_all_questions(state, config, source_type, to_handle_questions_index, vectorstore, reranker, rerank_by_question=True, k_final=15, k_before_reranking=100):
+    """
+    Retrieve documents in parallel for all questions.
+    """
+    # to_handle_questions_index = [x for x in state["questions_list"] if x["source_type"] == "IPx"]
+    # TODO split les questions selon le type de sources dans le state question + conditions sur le nombre de questions traités par type de source
+    docs = state.get("documents", [])
+    related_content = state.get("related_content", [])
+    search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
+    search_only = state["search_only"]
+    reports = state["reports"]
+    k_by_question = k_final // state["n_questions"]["total"]
+    k_summary_by_question = _get_k_summary_by_question(state["n_questions"]["total"])
+    k_images_by_question = _get_k_images_by_question(state["n_questions"]["total"])
+    k_before_reranking=100
+    tasks = [
+        retrieve_documents(
+            current_question=question,
+            config=config,
+            source_type=source_type,
+            vectorstore=vectorstore,
+            reranker=reranker,
+            search_figures=search_figures,
+            search_only=search_only,
+            reports=reports,
+            rerank_by_question=rerank_by_question,
+            k_images_by_question=k_images_by_question,
+            k_before_reranking=k_before_reranking,
+            k_by_question=k_by_question,
+            k_summary_by_question=k_summary_by_question
+        )
+        for i, question in enumerate(state["questions_list"]) if i in to_handle_questions_index
+    ]
+    results = await asyncio.gather(*tasks)
+    # Combine results
+    new_state = {"documents": [], "related_contents": [], "handled_questions_index": to_handle_questions_index}
+    for docs_question, images_question in results:
+        new_state["documents"].extend(docs_question)
+        new_state["related_contents"].extend(images_question)
+    return new_state
 def make_IPx_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
     @chain
     async def retrieve_IPx_docs(state, config):
         source_type = "IPx"
+        IPx_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
         # return {"documents":[], "related_contents": [], "handled_questions_index": list(range(len(state["questions_list"])))} # TODO Remove
+        state = await retrieve_documents_for_all_questions(
+            state=state,
+            config=config,
             source_type=source_type,
+            to_handle_questions_index=IPx_questions_index,
             vectorstore=vectorstore,
+            reranker=reranker,
             rerank_by_question=rerank_by_question,
+            k_final=k_final,
+            k_before_reranking=k_before_reranking,
         )
         return state
     @chain
     async def retrieve_POC_docs_node(state, config):
+        if "POC region" not in state["relevant_content_sources_selection"]  :
+            return {}
         source_type = "POC"
+        POC_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
+        state = await retrieve_documents_for_all_questions(
+            state=state,
+            config=config,
             source_type=source_type,
+            to_handle_questions_index=POC_questions_index,
             vectorstore=vectorstore,
+            reranker=reranker,
             rerank_by_question=rerank_by_question,
+            k_final=k_final,
+            k_before_reranking=k_before_reranking,
+        )
         return state
     return retrieve_POC_docs_node

climateqa/engine/graph.py CHANGED Viewed

@@ -95,10 +95,10 @@ def route_based_on_relevant_docs(state,threshold_docs=0.2):
 def route_continue_retrieve_documents(state):
     index_question_ipx = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
     questions_ipx_finished = all(elem in state["handled_questions_index"] for elem in index_question_ipx)
-    if questions_ipx_finished and state["search_only"]:
-        return END
-    elif questions_ipx_finished:
-        return "answer_search"
     else:
         return "retrieve_documents"
@@ -113,10 +113,10 @@ def route_continue_retrieve_documents(state):
 def route_continue_retrieve_local_documents(state):
     index_question_poc = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
     questions_poc_finished = all(elem in state["handled_questions_index"] for elem in index_question_poc)
-    if questions_poc_finished and state["search_only"]:
-        return END
-    elif questions_poc_finished:
-        return "answer_search"
     else:
         return "retrieve_local_data"
@@ -139,8 +139,7 @@ def route_retrieve_documents(state):
     if "Graphs (OurWorldInData)" in state["relevant_content_sources_selection"]  :
         sources_to_retrieve.append("retrieve_graphs")
-    if "POC region" in state["relevant_content_sources_selection"]  :
-        sources_to_retrieve.append("retrieve_local_data")
     if sources_to_retrieve == []:
         return END
     return sources_to_retrieve
@@ -160,7 +159,7 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
     answer_ai_impact = make_ai_impact_node(llm)
     retrieve_documents = make_IPx_retriever_node(vectorstore_ipcc, reranker, llm)
     retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
-    retrieve_local_data = make_POC_retriever_node(vectorstore_region, reranker, llm)
     answer_rag = make_rag_node(llm, with_docs=True)
     answer_rag_no_docs = make_rag_node(llm, with_docs=False)
     chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
@@ -175,7 +174,7 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
     workflow.add_node("answer_chitchat", answer_chitchat)
     workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
     workflow.add_node("retrieve_graphs", retrieve_graphs)
-    workflow.add_node("retrieve_local_data", retrieve_local_data)
     workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
     workflow.add_node("retrieve_documents", retrieve_documents)
     workflow.add_node("answer_rag", answer_rag)
@@ -202,17 +201,92 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
         route_translation,
         make_id_dict(["translate_query","transform_query"])
     )
     workflow.add_conditional_edges(
-        "retrieve_documents",
-        # lambda state : "retrieve_documents" if len(state["remaining_questions"]) > 0 else "answer_search",
-        route_continue_retrieve_documents,
-        make_id_dict([END,"retrieve_documents","answer_search"])
     )
     workflow.add_conditional_edges(
-        "retrieve_local_data",
-        # lambda state : "retrieve_documents" if len(state["remaining_questions"]) > 0 else "answer_search",
-        route_continue_retrieve_local_documents,
-        make_id_dict([END,"retrieve_local_data","answer_search"])
     )
     workflow.add_conditional_edges(
@@ -223,19 +297,13 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
     workflow.add_conditional_edges(
         "transform_query",
         route_retrieve_documents,
-        make_id_dict(["retrieve_graphs","retrieve_local_data", END])
     )
-    # workflow.add_conditional_edges(
-    #     "transform_query",
-    #     lambda state : "retrieve_graphs" if "POC region" in state["relevant_content_sources_selection"]  else END,
-    #     make_id_dict(["retrieve_local_data", END])
-    # )
     # Define the edges
     workflow.add_edge("translate_query", "transform_query")
     workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
     # workflow.add_edge("transform_query", END) # TODO remove
     workflow.add_edge("retrieve_graphs", END)
@@ -243,7 +311,9 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
     workflow.add_edge("answer_rag_no_docs", END)
     workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
     workflow.add_edge("retrieve_graphs_chitchat", END)
-    # workflow.add_edge("retrieve_local_data", "answer_search")
     # Compile
     app = workflow.compile()

 def route_continue_retrieve_documents(state):
     index_question_ipx = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
     questions_ipx_finished = all(elem in state["handled_questions_index"] for elem in index_question_ipx)
+    # if questions_ipx_finished and state["search_only"]:
+    #     return END
+    if questions_ipx_finished:
+        return "end_retrieve_IPx_documents"
     else:
         return "retrieve_documents"
 def route_continue_retrieve_local_documents(state):
     index_question_poc = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
     questions_poc_finished = all(elem in state["handled_questions_index"] for elem in index_question_poc)
+    # if questions_poc_finished and state["search_only"]:
+    #     return END
+    if questions_poc_finished or ("POC region" not in state["relevant_content_sources_selection"]):
+        return "end_retrieve_local_documents"
     else:
         return "retrieve_local_data"
     if "Graphs (OurWorldInData)" in state["relevant_content_sources_selection"]  :
         sources_to_retrieve.append("retrieve_graphs")
     if sources_to_retrieve == []:
         return END
     return sources_to_retrieve
     answer_ai_impact = make_ai_impact_node(llm)
     retrieve_documents = make_IPx_retriever_node(vectorstore_ipcc, reranker, llm)
     retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
+    # retrieve_local_data = make_POC_retriever_node(vectorstore_region, reranker, llm)
     answer_rag = make_rag_node(llm, with_docs=True)
     answer_rag_no_docs = make_rag_node(llm, with_docs=False)
     chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
     workflow.add_node("answer_chitchat", answer_chitchat)
     workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
     workflow.add_node("retrieve_graphs", retrieve_graphs)
+    # workflow.add_node("retrieve_local_data", retrieve_local_data)
     workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
     workflow.add_node("retrieve_documents", retrieve_documents)
     workflow.add_node("answer_rag", answer_rag)
         route_translation,
         make_id_dict(["translate_query","transform_query"])
     )
+    workflow.add_conditional_edges(
+        "answer_search",
+        lambda x : route_based_on_relevant_docs(x,threshold_docs=threshold_docs),
+        make_id_dict(["answer_rag","answer_rag_no_docs"])
+    )
+    workflow.add_conditional_edges(
+        "transform_query",
+        route_retrieve_documents,
+        make_id_dict(["retrieve_graphs", END])
+    )
+    # Define the edges
+    workflow.add_edge("translate_query", "transform_query")
+    workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
+    # workflow.add_edge("transform_query", "retrieve_local_data")
+    # workflow.add_edge("transform_query", END) # TODO remove
+    workflow.add_edge("retrieve_graphs", END)
+    workflow.add_edge("answer_rag", END)
+    workflow.add_edge("answer_rag_no_docs", END)
+    workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
+    workflow.add_edge("retrieve_graphs_chitchat", END)
+    # workflow.add_edge("retrieve_local_data", "answer_search")
+    workflow.add_edge("retrieve_documents", "answer_search")
+    # Compile
+    app = workflow.compile()
+    return app
+def make_graph_agent_poc(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_region, reranker, threshold_docs=0.2):
+    workflow = StateGraph(GraphState)
+    # Define the node functions
+    categorize_intent = make_intent_categorization_node(llm)
+    transform_query = make_query_transform_node(llm)
+    translate_query = make_translation_node(llm)
+    answer_chitchat = make_chitchat_node(llm)
+    answer_ai_impact = make_ai_impact_node(llm)
+    retrieve_documents = make_IPx_retriever_node(vectorstore_ipcc, reranker, llm)
+    retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
+    retrieve_local_data = make_POC_retriever_node(vectorstore_region, reranker, llm)
+    answer_rag = make_rag_node(llm, with_docs=True)
+    answer_rag_no_docs = make_rag_node(llm, with_docs=False)
+    chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
+    # Define the nodes
+    # workflow.add_node("set_defaults", set_defaults)
+    workflow.add_node("categorize_intent", categorize_intent)
+    workflow.add_node("answer_climate", dummy)
+    workflow.add_node("answer_search", answer_search)
+    # workflow.add_node("end_retrieve_local_documents", dummy)
+    # workflow.add_node("end_retrieve_IPx_documents", dummy)
+    workflow.add_node("transform_query", transform_query)
+    workflow.add_node("translate_query", translate_query)
+    workflow.add_node("answer_chitchat", answer_chitchat)
+    workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
+    workflow.add_node("retrieve_graphs", retrieve_graphs)
+    workflow.add_node("retrieve_local_data", retrieve_local_data)
+    workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
+    workflow.add_node("retrieve_documents", retrieve_documents)
+    workflow.add_node("answer_rag", answer_rag)
+    workflow.add_node("answer_rag_no_docs", answer_rag_no_docs)
+    # Entry point
+    workflow.set_entry_point("categorize_intent")
+    # CONDITIONAL EDGES
+    workflow.add_conditional_edges(
+        "categorize_intent",
+        route_intent,
+        make_id_dict(["answer_chitchat","answer_climate"])
+    )
     workflow.add_conditional_edges(
+        "chitchat_categorize_intent",
+        chitchat_route_intent,
+        make_id_dict(["retrieve_graphs_chitchat", END])
     )
     workflow.add_conditional_edges(
+        "answer_climate",
+        route_translation,
+        make_id_dict(["translate_query","transform_query"])
     )
     workflow.add_conditional_edges(
     workflow.add_conditional_edges(
         "transform_query",
         route_retrieve_documents,
+        make_id_dict(["retrieve_graphs", END])
     )
     # Define the edges
     workflow.add_edge("translate_query", "transform_query")
     workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
+    workflow.add_edge("transform_query", "retrieve_local_data")
     # workflow.add_edge("transform_query", END) # TODO remove
     workflow.add_edge("retrieve_graphs", END)
     workflow.add_edge("answer_rag_no_docs", END)
     workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
     workflow.add_edge("retrieve_graphs_chitchat", END)
+    workflow.add_edge("retrieve_local_data", "answer_search")
+    workflow.add_edge("retrieve_documents", "answer_search")
     # Compile
     app = workflow.compile()

climateqa/handle_stream_events.py CHANGED Viewed

@@ -22,7 +22,7 @@ def convert_to_docs_to_html(docs: list[dict]) -> str:
             docs_html.append(make_html_source(d, i))
     return "".join(docs_html)
-def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage], used_documents : list[str]) -> tuple[str, list[ChatMessage], list[str]]:
     """
     Handles the retrieved documents and returns the HTML representation of the documents
@@ -35,7 +35,7 @@ def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage],
         tuple[str, list[ChatMessage], list[str]]: The updated HTML representation of the documents, the updated message history and the updated list of used documents
     """
     if "documents" not in event["data"]["output"] or event["data"]["output"]["documents"] == []:
-        return history, used_documents
     try:
         docs = event["data"]["output"]["documents"]
@@ -49,7 +49,7 @@ def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage],
     except Exception as e:
         print(f"Error getting documents: {e}")
         print(event)
-    return history, used_documents
 def stream_answer(history: list[ChatMessage], event : StreamEvent, start_streaming : bool, answer_message_content : str)-> tuple[list[ChatMessage], bool, str]:
     """

             docs_html.append(make_html_source(d, i))
     return "".join(docs_html)
+def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage], used_documents : list[str],related_content:list[str]) -> tuple[str, list[ChatMessage], list[str]]:
     """
     Handles the retrieved documents and returns the HTML representation of the documents
         tuple[str, list[ChatMessage], list[str]]: The updated HTML representation of the documents, the updated message history and the updated list of used documents
     """
     if "documents" not in event["data"]["output"] or event["data"]["output"]["documents"] == []:
+        return history, used_documents, related_content
     try:
         docs = event["data"]["output"]["documents"]
     except Exception as e:
         print(f"Error getting documents: {e}")
         print(event)
+    return history, used_documents, related_content
 def stream_answer(history: list[ChatMessage], event : StreamEvent, start_streaming : bool, answer_message_content : str)-> tuple[list[ChatMessage], bool, str]:
     """

front/tabs/chat_interface.py CHANGED Viewed

@@ -44,7 +44,7 @@ def create_chat_interface():
             scale=12,
             lines=1,
             interactive=True,
-            elem_id="input-textbox"
         )
         config_button = gr.Button("", elem_id="config-button")

             scale=12,
             lines=1,
             interactive=True,
+            elem_id=f"input-textbox"
         )
         config_button = gr.Button("", elem_id="config-button")

front/tabs/tab_examples.py CHANGED Viewed

@@ -3,7 +3,7 @@ from climateqa.sample_questions import QUESTIONS
 def create_examples_tab():
-    examples_hidden = gr.Textbox(visible=False)
     first_key = list(QUESTIONS.keys())[0]
     dropdown_samples = gr.Dropdown(
         choices=QUESTIONS.keys(),

 def create_examples_tab():
+    examples_hidden = gr.Textbox(visible=False, elem_id=f"examples-hidden")
     first_key = list(QUESTIONS.keys())[0]
     dropdown_samples = gr.Dropdown(
         choices=QUESTIONS.keys(),

requirements.txt CHANGED Viewed

@@ -4,7 +4,7 @@ azure-storage-blob
 python-dotenv==1.0.0
 langchain==0.2.1
 langchain_openai==0.1.7
-langgraph==0.0.55
 pinecone-client==4.1.0
 sentence-transformers==2.6.0
 huggingface-hub

 python-dotenv==1.0.0
 langchain==0.2.1
 langchain_openai==0.1.7
+langgraph==0.2.70
 pinecone-client==4.1.0
 sentence-transformers==2.6.0
 huggingface-hub