Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

timeki commited on Jan 13

Commit

be494ba

1 Parent(s): d09f2e9

WIP add regionnal sources

Browse files

Files changed (5) hide show

app.py +6 -7
climateqa/engine/chains/query_transformation.py +24 -11
climateqa/engine/chains/retrieve_documents.py +100 -25
climateqa/engine/graph.py +35 -12
climateqa/engine/reranker.py +5 -0

app.py CHANGED Viewed

@@ -103,11 +103,12 @@ CITATION_TEXT = r"""@misc{climateqa,
 embeddings_function = get_embeddings_function()
 vectorstore = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX"))
 vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_OWID"), text_key="description")
 llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
 reranker = get_reranker("nano")
-agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, reranker=reranker)
 # Function to update modal visibility
 def update_config_modal_visibility(config_open):
@@ -149,7 +150,7 @@ async def chat(
     print(f">> NEW QUESTION ({date_now}) : {query}")
     audience_prompt = init_audience(audience)
-    sources = sources or ["IPCC", "IPBES", "IPOS"]
     reports = reports or []
     # Prepare inputs for agent
@@ -606,9 +607,7 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
                                     outputs=[graphs_container]
                                 )
-    # Other tabs
     with gr.Tab("About", elem_classes="max-height other-tabs"):
         with gr.Row():
             with gr.Column(scale=1):
@@ -629,10 +628,10 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
                         show_copy_button=True,
                         lines=len(CITATION_TEXT.split('\n')),
                     )
-    # Event handlers
     config_modal, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only, dropdown_audience, after, output_query, output_language = create_config_modal(config_open)
     config_button.click(
         fn=update_config_modal_visibility,
         inputs=[config_open],

 embeddings_function = get_embeddings_function()
 vectorstore = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX"))
 vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_OWID"), text_key="description")
+vectorstore_region = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_REGION"))
 llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
 reranker = get_reranker("nano")
+agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, vectorstore_region = vectorstore_region, reranker=reranker)
 # Function to update modal visibility
 def update_config_modal_visibility(config_open):
     print(f">> NEW QUESTION ({date_now}) : {query}")
     audience_prompt = init_audience(audience)
+    sources = sources or ["IPCC", "IPBES"]
     reports = reports or []
     # Prepare inputs for agent
                                     outputs=[graphs_container]
                                 )
     with gr.Tab("About", elem_classes="max-height other-tabs"):
         with gr.Row():
             with gr.Column(scale=1):
                         show_copy_button=True,
                         lines=len(CITATION_TEXT.split('\n')),
                     )
+    # Configuration pannel
     config_modal, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only, dropdown_audience, after, output_query, output_language = create_config_modal(config_open)
+    # Event handlers
     config_button.click(
         fn=update_config_modal_visibility,
         inputs=[config_open],

climateqa/engine/chains/query_transformation.py CHANGED Viewed

@@ -9,7 +9,7 @@ from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
 ROUTING_INDEX = {
-    "Vector":["IPCC","IPBES","IPOS"],
     "OpenAlex":["OpenAlex"],
 }
@@ -69,13 +69,14 @@ class QueryAnalysis(BaseModel):
     #     """
     # )
-    sources: List[Literal["IPCC", "IPBES", "IPOS"]] = Field( #,"OpenAlex"]] = Field(
         ...,
         description="""
             Given a user question choose which documents would be most relevant for answering their question,
             - IPCC is for questions about climate change, energy, impacts, and everything we can find the IPCC reports
             - IPBES is for questions about biodiversity and nature
             - IPOS is for questions about the ocean and deep sea mining
         """,
             # - OpenAlex is for any other questions that are not in the previous categories but could be found in the scientific litterature
     )
@@ -133,6 +134,23 @@ def make_query_rewriter_chain(llm):
 def make_query_transform_node(llm,k_final=15):
     decomposition_chain = make_query_decomposition_chain(llm)
     rewriter_chain = make_query_rewriter_chain(llm)
@@ -140,14 +158,9 @@ def make_query_transform_node(llm,k_final=15):
     def transform_query(state):
         print("---- Transform query ----")
-        if "sources_auto" not in state or state["sources_auto"] is None or state["sources_auto"] is False:
-            auto_mode = False
-        else:
-            auto_mode = True
-        sources_input = state.get("sources_input")
-        if sources_input is None: sources_input = ROUTING_INDEX["Vector"]
         new_state = {}
@@ -159,7 +172,7 @@ def make_query_transform_node(llm,k_final=15):
         questions = []
         for question in new_state["questions"]:
             question_state = {"question":question}
-            analysis_output = rewriter_chain.invoke({"input":question})
             # TODO WARNING llm should always return smthg
             # The case when the llm does not return any sources

 ROUTING_INDEX = {
+    "Vector":["IPCC","IPBES","IPOS", "AcclimaTerra"],
     "OpenAlex":["OpenAlex"],
 }
     #     """
     # )
+    sources: List[Literal["IPCC", "IPBES", "IPOS", "AcclimaTerra"]] = Field( #,"OpenAlex"]] = Field(
         ...,
         description="""
             Given a user question choose which documents would be most relevant for answering their question,
             - IPCC is for questions about climate change, energy, impacts, and everything we can find the IPCC reports
             - IPBES is for questions about biodiversity and nature
             - IPOS is for questions about the ocean and deep sea mining
+            - AcclimaTerra is for questions about any specific place in, or close to, the french region "Nouvelle-Aquitaine"
         """,
             # - OpenAlex is for any other questions that are not in the previous categories but could be found in the scientific litterature
     )
 def make_query_transform_node(llm,k_final=15):
+    """
+    Creates a query transformation node that processes and transforms a given query state.
+    Args:
+        llm: The language model to be used for query decomposition and rewriting.
+        k_final (int, optional): The final number of questions to be generated. Defaults to 15.
+    Returns:
+        function: A function that takes a query state and returns a transformed state.
+    The returned function performs the following steps:
+        1. Checks if the query should be processed in auto mode based on the state.
+        2. Retrieves the input sources from the state or defaults to a predefined routing index.
+        3. Decomposes the query using the decomposition chain.
+        4. Analyzes each decomposed question using the rewriter chain.
+        5. Ensures that the sources returned by the language model are valid.
+        6. Explodes the questions into multiple questions with different sources based on the mode.
+        7. Constructs a new state with the transformed questions and their respective sources.
+    """
     decomposition_chain = make_query_decomposition_chain(llm)
     rewriter_chain = make_query_rewriter_chain(llm)
     def transform_query(state):
         print("---- Transform query ----")
+        auto_mode = state.get("sources_auto", False)
+        sources_input = state.get("sources_input", ROUTING_INDEX["Vector"])
         new_state = {}
         questions = []
         for question in new_state["questions"]:
             question_state = {"question":question}
+            analysis_output = rewriter_chain.invoke({"input":question})
             # TODO WARNING llm should always return smthg
             # The case when the llm does not return any sources

climateqa/engine/chains/retrieve_documents.py CHANGED Viewed

@@ -7,7 +7,7 @@ from langchain_core.runnables import chain
 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.runnables import RunnableLambda
-from ..reranker import rerank_docs
 # from ...knowledge.retriever import ClimateQARetriever
 from ...knowledge.openalex import OpenAlexRetriever
 from .keywords_extraction import make_keywords_extraction_chain
@@ -106,6 +106,48 @@ def _add_metadata_and_score(docs: List) -> Document:
         docs_with_metadata.append(doc)
     return docs_with_metadata
 async def get_IPCC_relevant_documents(
     query: str,
     vectorstore:VectorStore,
@@ -191,12 +233,26 @@ async def get_IPCC_relevant_documents(
     }
 # The chain callback is not necessary, but it propagates the langchain callbacks to the astream_events logger to display intermediate results
 # @chain
-async def retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5, k_images=5):
     """
-    Retrieve and rerank documents based on the current question in the state.
     Args:
         state (dict): The current state containing documents, related content, relevant content sources, remaining questions and n_questions.
@@ -212,7 +268,7 @@ async def retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_qu
     Returns:
         dict: The updated state containing the retrieved and reranked documents, related content, and remaining questions.
     """
-    print("---- Retrieve documents ----")
     docs = state.get("documents", [])
     related_content = state.get("related_content", [])
@@ -237,45 +293,51 @@ async def retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_qu
     await log_event({"question":question,"sources":sources,"index":index},"log_retriever",config)
-    if index == "Vector": # always true for now
         docs_question_dict = await get_IPCC_relevant_documents(
             query  = question,
             vectorstore=vectorstore,
             search_figures = search_figures,
             sources = sources,
             min_size = 200,
-            k_summary = k_summary_by_question,
             k_total = k_before_reranking,
             k_images = k_images_by_question,
             threshold = 0.5,
             search_only = search_only,
             reports = reports,
         )
     # Rerank
-    if reranker is not None:
         with suppress_output():
-            docs_question_summary_reranked = rerank_docs(reranker,docs_question_dict["docs_summaries"],question)
-            docs_question_fulltext_reranked = rerank_docs(reranker,docs_question_dict["docs_full"],question)
-            docs_question_images_reranked = rerank_docs(reranker,docs_question_dict["docs_images"],question)
-            if rerank_by_question:
-                docs_question_summary_reranked = sorted(docs_question_summary_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
-                docs_question_fulltext_reranked = sorted(docs_question_fulltext_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
-                docs_question_images_reranked = sorted(docs_question_images_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
     else:
-        docs_question = docs_question_dict["docs_summaries"] + docs_question_dict["docs_full"]
         # Add a default reranking score
         for doc in docs_question:
             doc.metadata["reranking_score"] = doc.metadata["similarity_score"]
-    docs_question = docs_question_summary_reranked + docs_question_fulltext_reranked
-    docs_question = docs_question[:k_by_question]
-    images_question = docs_question_images_reranked[:k_images]
     if reranker is not None and rerank_by_question:
-        docs_question = sorted(docs_question, key=lambda x: x.metadata["reranking_score"], reverse=True)
     # Add sources used in the metadata
     docs_question = _add_sources_used_in_metadata(docs_question,sources,question,index)
     images_question = _add_sources_used_in_metadata(images_question,sources,question,index)
@@ -288,13 +350,26 @@ async def retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_qu
-def make_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
     @chain
-    async def retrieve_docs(state, config):
-        state =  await retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_question, k_final, k_before_reranking, k_summary)
         return state
-    return retrieve_docs

 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.runnables import RunnableLambda
+from ..reranker import rerank_docs, rerank_and_sort_docs
 # from ...knowledge.retriever import ClimateQARetriever
 from ...knowledge.openalex import OpenAlexRetriever
 from .keywords_extraction import make_keywords_extraction_chain
         docs_with_metadata.append(doc)
     return docs_with_metadata
+async def get_POC_relevant_documents(
+    query: str,
+    vectorstore:VectorStore,
+    sources:list = ["Acclimaterra","PCAET","Plan Biodiversite"],
+    search_figures:bool = False,
+    search_only:bool = False,
+    k_documents:int = 10,
+    threshold:float = 0.6,
+    k_images: int = 5,
+    reports:list = [],
+) :
+    # Prepare base search kwargs
+    filters = {}
+    if len(reports) > 0:
+        filters["short_name"] = {"$in":reports}
+    else:
+        filters["source"] = { "$in": sources}
+    filters_text = {
+        **filters,
+        "chunk_type":"text",
+        # "report_type": {}, # TODO  to be completed to choose the right documents / chapters according to the analysis of the question
+    }
+    docs_question = vectorstore.similarity_search_with_score(query=query,filter = filters_text,k = k_documents)
+    docs_question = [x for x in docs_question if x[1] > threshold]
+    if search_figures:
+        # Images
+        filters_image = {
+            **filters,
+            "chunk_type":"image"
+        }
+        docs_images = vectorstore.similarity_search_with_score(query=query,filter = filters_image,k = k_images)
+    return {
+        "docs_question" : docs_question,
+        "docs_images" : docs_images
+    }
 async def get_IPCC_relevant_documents(
     query: str,
     vectorstore:VectorStore,
     }
+def concatenate_documents(index, source_type, docs_question_dict, k_by_question, k_summary_by_question, k_images_by_question):
+    # Keep the right number of documents - The k_summary documents from SPM are placed in front
+    if source_type == "Vector" :
+        docs_question = docs_question_dict["docs_summaries"][:k_summary_by_question] + docs_question_dict["docs_full"][:k_by_question - k_summary_by_question]
+    elif source_type == "POC" :
+        docs_question = docs_question_dict["docs_question"][:k_by_question]
+    else :
+        docs_question = [doc for key in docs_question_dict.keys() for doc in docs_question_dict[key]]
+    images_question = docs_question_dict["docs_images"][:k_images_by_question]
+    return docs_question, images_question
 # The chain callback is not necessary, but it propagates the langchain callbacks to the astream_events logger to display intermediate results
 # @chain
+async def retrieve_documents(state,config, source_type, vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5, k_images=5):
     """
+    Unpack the first question of the remaining questions, and retrieve and rerank corresponding documents, based on the question and selected_sources
     Args:
         state (dict): The current state containing documents, related content, relevant content sources, remaining questions and n_questions.
     Returns:
         dict: The updated state containing the retrieved and reranked documents, related content, and remaining questions.
     """
+    print(f"---- Retrieve documents from {source_type}----")
     docs = state.get("documents", [])
     related_content = state.get("related_content", [])
     await log_event({"question":question,"sources":sources,"index":index},"log_retriever",config)
+    # if index == "Vector": # always true for now #TODO rename to IPx
+    if source_type == "IPx": # always true for now #TODO rename to IPx
         docs_question_dict = await get_IPCC_relevant_documents(
             query  = question,
             vectorstore=vectorstore,
             search_figures = search_figures,
             sources = sources,
             min_size = 200,
+            k_summary = k_before_reranking-1,
             k_total = k_before_reranking,
             k_images = k_images_by_question,
             threshold = 0.5,
             search_only = search_only,
             reports = reports,
         )
+    if source_type == "POC":
+        docs_question_dict = await get_POC_relevant_documents(
+            query = question,
+            vectorstore=vectorstore,
+            search_figures = search_figures,
+            sources = sources,
+            threshold = 0.5,
+            search_only = search_only,
+            reports = reports,
+        )
     # Rerank
+    if reranker is not None and rerank_by_question:
         with suppress_output():
+            for key in docs_question_dict.keys():
+                docs_question_dict[key] = rerank_and_sort_docs(reranker,docs_question_dict[key],question)
     else:
         # Add a default reranking score
         for doc in docs_question:
             doc.metadata["reranking_score"] = doc.metadata["similarity_score"]
+    # Keep the right number of documents
+    docs_question, images_question = concatenate_documents(index, source_type, docs_question_dict, k_by_question, k_summary_by_question, k_images_by_question)
+    # Rerank the documents to put the most relevant in front
     if reranker is not None and rerank_by_question:
+        docs_question = rerank_and_sort_docs(reranker, docs_question, question)
     # Add sources used in the metadata
     docs_question = _add_sources_used_in_metadata(docs_question,sources,question,index)
     images_question = _add_sources_used_in_metadata(images_question,sources,question,index)
+def make_IPx_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
+    @chain
+    async def retrieve_IPx_docs(state, config):
+        source_type = "IPx"
+        state =  await retrieve_documents(state,config, source_type, vectorstore,reranker,llm,rerank_by_question, k_final, k_before_reranking, k_summary)
+        return state
+    return retrieve_IPx_docs
+def make_POC_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
     @chain
+    async def retrieve_IPx_docs(state, config):
+        source_type = "POC"
+        state =  await retrieve_documents(state,config, source_type, vectorstore,reranker,llm,rerank_by_question, k_final, k_before_reranking, k_summary)
         return state
+    return retrieve_IPx_docs

climateqa/engine/graph.py CHANGED Viewed

@@ -16,7 +16,7 @@ from .chains.answer_ai_impact import make_ai_impact_node
 from .chains.query_transformation import make_query_transform_node
 from .chains.translation import make_translation_node
 from .chains.intent_categorization import make_intent_categorization_node
-from .chains.retrieve_documents import make_retriever_node
 from .chains.answer_rag import make_rag_node
 from .chains.graph_retriever import make_graph_retriever_node
 from .chains.chitchat_categorization import make_chitchat_intent_categorization_node
@@ -46,6 +46,9 @@ class GraphState(TypedDict):
     search_only : bool = False
     reports : List[str] = []
 def search(state): #TODO
     return state
@@ -60,7 +63,7 @@ def route_intent(state):
     #     return "answer_ai_impact"
     else:
         # Search route
-        return "search"
 def chitchat_route_intent(state):
     intent = state["search_graphs_chitchat"]
@@ -82,18 +85,29 @@ def route_based_on_relevant_docs(state,threshold_docs=0.2):
     else:
         return "answer_rag_no_docs"
-def route_retrieve_documents(state):
     if len(state["remaining_questions"]) == 0 and state["search_only"] :
         return END
     elif len(state["remaining_questions"]) > 0:
         return "retrieve_documents"
     else:
         return "answer_search"
 def make_id_dict(values):
     return {k:k for k in values}
-def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, threshold_docs=0.2):
     workflow = StateGraph(GraphState)
@@ -103,8 +117,9 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     translate_query = make_translation_node(llm)
     answer_chitchat = make_chitchat_node(llm)
     answer_ai_impact = make_ai_impact_node(llm)
-    retrieve_documents = make_retriever_node(vectorstore_ipcc, reranker, llm)
     retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
     answer_rag = make_rag_node(llm, with_docs=True)
     answer_rag_no_docs = make_rag_node(llm, with_docs=False)
     chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
@@ -112,13 +127,14 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     # Define the nodes
     # workflow.add_node("set_defaults", set_defaults)
     workflow.add_node("categorize_intent", categorize_intent)
-    workflow.add_node("search", search)
     workflow.add_node("answer_search", answer_search)
     workflow.add_node("transform_query", transform_query)
     workflow.add_node("translate_query", translate_query)
     workflow.add_node("answer_chitchat", answer_chitchat)
     workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
     workflow.add_node("retrieve_graphs", retrieve_graphs)
     workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
     workflow.add_node("retrieve_documents", retrieve_documents)
     workflow.add_node("answer_rag", answer_rag)
@@ -131,7 +147,7 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     workflow.add_conditional_edges(
         "categorize_intent",
         route_intent,
-        make_id_dict(["answer_chitchat","search"])
     )
     workflow.add_conditional_edges(
@@ -141,14 +157,14 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     )
     workflow.add_conditional_edges(
-        "search",
         route_translation,
         make_id_dict(["translate_query","transform_query"])
     )
     workflow.add_conditional_edges(
         "retrieve_documents",
         # lambda state : "retrieve_documents" if len(state["remaining_questions"]) > 0 else "answer_search",
-        route_retrieve_documents,
         make_id_dict([END,"retrieve_documents","answer_search"])
     )
@@ -159,9 +175,16 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     )
     workflow.add_conditional_edges(
         "transform_query",
-        lambda state : "retrieve_graphs" if "Graphs (OurWorldInData)" in state["relevant_content_sources_selection"]  else END,
-        make_id_dict(["retrieve_graphs", END])
     )
     # Define the edges
     workflow.add_edge("translate_query", "transform_query")
@@ -172,7 +195,7 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     workflow.add_edge("answer_rag_no_docs", END)
     workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
     workflow.add_edge("retrieve_graphs_chitchat", END)
     # Compile
     app = workflow.compile()

 from .chains.query_transformation import make_query_transform_node
 from .chains.translation import make_translation_node
 from .chains.intent_categorization import make_intent_categorization_node
+from .chains.retrieve_documents import make_IPx_retriever_node, make_POC_retriever_node
 from .chains.answer_rag import make_rag_node
 from .chains.graph_retriever import make_graph_retriever_node
 from .chains.chitchat_categorization import make_chitchat_intent_categorization_node
     search_only : bool = False
     reports : List[str] = []
+def dummy(state):
+    return state
 def search(state): #TODO
     return state
     #     return "answer_ai_impact"
     else:
         # Search route
+        return "answer_climate"
 def chitchat_route_intent(state):
     intent = state["search_graphs_chitchat"]
     else:
         return "answer_rag_no_docs"
+def route_continue_retrieve_documents(state):
     if len(state["remaining_questions"]) == 0 and state["search_only"] :
         return END
     elif len(state["remaining_questions"]) > 0:
         return "retrieve_documents"
     else:
         return "answer_search"
+def route_retrieve_documents(state):
+    sources_to_retrieve = []
+    if "Graphs (OurWorldInData)" in state["relevant_content_sources_selection"]  :
+        sources_to_retrieve.append("retrieve_graphs")
+    if "POC region" in state["relevant_content_sources_selection"]  :
+        sources_to_retrieve.append("retrieve_local_data")
+    if sources_to_retrieve == []:
+        return END
+    return sources_to_retrieve
 def make_id_dict(values):
     return {k:k for k in values}
+def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_region, reranker, threshold_docs=0.2):
     workflow = StateGraph(GraphState)
     translate_query = make_translation_node(llm)
     answer_chitchat = make_chitchat_node(llm)
     answer_ai_impact = make_ai_impact_node(llm)
+    retrieve_documents = make_IPx_retriever_node(vectorstore_ipcc, reranker, llm)
     retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
+    retrieve_local_data = make_POC_retriever_node(vectorstore_region, reranker, llm)
     answer_rag = make_rag_node(llm, with_docs=True)
     answer_rag_no_docs = make_rag_node(llm, with_docs=False)
     chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
     # Define the nodes
     # workflow.add_node("set_defaults", set_defaults)
     workflow.add_node("categorize_intent", categorize_intent)
+    workflow.add_node("answer_climate", dummy)
     workflow.add_node("answer_search", answer_search)
     workflow.add_node("transform_query", transform_query)
     workflow.add_node("translate_query", translate_query)
     workflow.add_node("answer_chitchat", answer_chitchat)
     workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
     workflow.add_node("retrieve_graphs", retrieve_graphs)
+    workflow.add_node("retrieve_local_data", retrieve_local_data)
     workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
     workflow.add_node("retrieve_documents", retrieve_documents)
     workflow.add_node("answer_rag", answer_rag)
     workflow.add_conditional_edges(
         "categorize_intent",
         route_intent,
+        make_id_dict(["answer_chitchat","answer_climate"])
     )
     workflow.add_conditional_edges(
     )
     workflow.add_conditional_edges(
+        "answer_climate",
         route_translation,
         make_id_dict(["translate_query","transform_query"])
     )
     workflow.add_conditional_edges(
         "retrieve_documents",
         # lambda state : "retrieve_documents" if len(state["remaining_questions"]) > 0 else "answer_search",
+        route_continue_retrieve_documents,
         make_id_dict([END,"retrieve_documents","answer_search"])
     )
     )
     workflow.add_conditional_edges(
         "transform_query",
+        route_retrieve_documents,
+        make_id_dict(["retrieve_graphs","retrieve_local_data", END])
     )
+    # workflow.add_conditional_edges(
+    #     "transform_query",
+    #     lambda state : "retrieve_graphs" if "POC region" in state["relevant_content_sources_selection"]  else END,
+    #     make_id_dict(["retrieve_local_data", END])
+    # )
     # Define the edges
     workflow.add_edge("translate_query", "transform_query")
     workflow.add_edge("answer_rag_no_docs", END)
     workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
     workflow.add_edge("retrieve_graphs_chitchat", END)
+    workflow.add_edge("retrieve_local_data", "answer_search")
     # Compile
     app = workflow.compile()

climateqa/engine/reranker.py CHANGED Viewed

@@ -47,4 +47,9 @@ def rerank_docs(reranker,docs,query):
         doc.metadata["reranking_score"] = result.score
         doc.metadata["query_used_for_retrieval"] = query
         docs_reranked.append(doc)
     return docs_reranked

         doc.metadata["reranking_score"] = result.score
         doc.metadata["query_used_for_retrieval"] = query
         docs_reranked.append(doc)
+    return docs_reranked
+def rerank_and_sort_docs(reranker, docs, query):
+    docs_reranked = rerank_docs(reranker,docs,query)
+    docs_reranked = sorted(docs_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
     return docs_reranked