Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

timeki commited on Mar 27

Commit

6f5e8e0

1 Parent(s): 55db9a8

take the last question as history to understand the question

Browse files

Files changed (4) hide show

climateqa/chat.py +3 -1
climateqa/engine/chains/answer_rag.py +4 -2
climateqa/engine/chains/standalone_question.py +39 -0
climateqa/engine/graph.py +11 -3

climateqa/chat.py CHANGED Viewed

@@ -101,6 +101,7 @@ async def chat_stream(
     audience_prompt = init_audience(audience)
     sources = sources or ["IPCC", "IPBES"]
     reports = reports or []
     # Prepare inputs for agent
     inputs = {
@@ -109,7 +110,8 @@ async def chat_stream(
         "sources_input": sources,
         "relevant_content_sources_selection": relevant_content_sources_selection,
         "search_only": search_only,
-        "reports": reports
     }
     # Get streaming events from agent

     audience_prompt = init_audience(audience)
     sources = sources or ["IPCC", "IPBES"]
     reports = reports or []
+    relevant_history_discussion = history[-2:] if len(history) > 1 else []
     # Prepare inputs for agent
     inputs = {
         "sources_input": sources,
         "relevant_content_sources_selection": relevant_content_sources_selection,
         "search_only": search_only,
+        "reports": reports,
+        "chat_history": relevant_history_discussion,
     }
     # Get streaming events from agent

climateqa/engine/chains/answer_rag.py CHANGED Viewed

@@ -65,6 +65,7 @@ def make_rag_node(llm,with_docs = True):
     async def answer_rag(state,config):
         print("---- Answer RAG ----")
         start_time = time.time()
         print("Sources used : " +  "\n".join([x.metadata["short_name"] + " - page " + str(x.metadata["page_number"])  for x in state["documents"]]))
         answer = await rag_chain.ainvoke(state,config)
@@ -73,9 +74,10 @@ def make_rag_node(llm,with_docs = True):
         elapsed_time = end_time - start_time
         print("RAG elapsed time: ", elapsed_time)
         print("Answer size : ", len(answer))
-        # print(f"\n\nAnswer:\n{answer}")
-        return {"answer":answer}
     return answer_rag

     async def answer_rag(state,config):
         print("---- Answer RAG ----")
         start_time = time.time()
+        chat_history = state.get("chat_history",[])
         print("Sources used : " +  "\n".join([x.metadata["short_name"] + " - page " + str(x.metadata["page_number"])  for x in state["documents"]]))
         answer = await rag_chain.ainvoke(state,config)
         elapsed_time = end_time - start_time
         print("RAG elapsed time: ", elapsed_time)
         print("Answer size : ", len(answer))
+        chat_history.append({"question":state["query"],"answer":answer})
+        return {"answer":answer,"chat_history": chat_history}
     return answer_rag

climateqa/engine/chains/standalone_question.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from langchain.prompts import ChatPromptTemplate
+def make_standalone_question_chain(llm):
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", """You are a helpful assistant that transforms user questions into standalone questions
+        by incorporating context from the chat history if needed. The output should be a self-contained
+        question that can be understood without any additional context.
+        Examples:
+        Chat History: "Let's talk about renewable energy"
+        User Input: "What about solar?"
+        Output: "What are the key aspects of solar energy as a renewable energy source?"
+        Chat History: "What causes global warming?"
+        User Input: "And what are its effects?"
+        Output: "What are the effects of global warming on the environment and society?"
+        """),
+        ("user", """Chat History: {chat_history}
+        User Question: {question}
+        Transform this into a standalone question:""")
+    ])
+    chain = prompt | llm
+    return chain
+def make_standalone_question_node(llm):
+    standalone_chain = make_standalone_question_chain(llm)
+    def transform_to_standalone(state):
+        chat_history = state.get("chat_history", "")
+        output = standalone_chain.invoke({
+            "chat_history": chat_history,
+            "question": state["user_input"]
+        })
+        state["user_input"] = output.content
+        return state
+    return transform_to_standalone

climateqa/engine/graph.py CHANGED Viewed

@@ -23,13 +23,14 @@ from .chains.retrieve_documents import make_IPx_retriever_node, make_POC_retriev
 from .chains.answer_rag import make_rag_node
 from .chains.graph_retriever import make_graph_retriever_node
 from .chains.chitchat_categorization import make_chitchat_intent_categorization_node
-# from .chains.set_defaults import set_defaults
 class GraphState(TypedDict):
     """
     Represents the state of our graph.
     """
     user_input : str
     language : str
     intent : str
     search_graphs_chitchat : bool
@@ -128,6 +129,7 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
     workflow = StateGraph(GraphState)
     # Define the node functions
     categorize_intent = make_intent_categorization_node(llm)
     transform_query = make_query_transform_node(llm)
     translate_query = make_translation_node(llm)
@@ -142,6 +144,7 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
     # Define the nodes
     # workflow.add_node("set_defaults", set_defaults)
     workflow.add_node("categorize_intent", categorize_intent)
     workflow.add_node("answer_climate", dummy)
     workflow.add_node("answer_search", answer_search)
@@ -157,7 +160,7 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
     workflow.add_node("answer_rag_no_docs", answer_rag_no_docs)
     # Entry point
-    workflow.set_entry_point("categorize_intent")
     # CONDITIONAL EDGES
     workflow.add_conditional_edges(
@@ -190,6 +193,7 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
     )
     # Define the edges
     workflow.add_edge("translate_query", "transform_query")
     workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
     # workflow.add_edge("transform_query", "retrieve_local_data")
@@ -228,6 +232,8 @@ def make_graph_agent_poc(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_
     workflow = StateGraph(GraphState)
     # Define the node functions
     categorize_intent = make_intent_categorization_node(llm)
     transform_query = make_query_transform_node(llm)
     translate_query = make_translation_node(llm)
@@ -243,6 +249,7 @@ def make_graph_agent_poc(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_
     # Define the nodes
     # workflow.add_node("set_defaults", set_defaults)
     workflow.add_node("categorize_intent", categorize_intent)
     workflow.add_node("answer_climate", dummy)
     workflow.add_node("answer_search", answer_search)
@@ -260,7 +267,7 @@ def make_graph_agent_poc(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_
     workflow.add_node("answer_rag_no_docs", answer_rag_no_docs)
     # Entry point
-    workflow.set_entry_point("categorize_intent")
     # CONDITIONAL EDGES
     workflow.add_conditional_edges(
@@ -293,6 +300,7 @@ def make_graph_agent_poc(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_
     )
     # Define the edges
     workflow.add_edge("translate_query", "transform_query")
     workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
     workflow.add_edge("transform_query", "retrieve_local_data")

 from .chains.answer_rag import make_rag_node
 from .chains.graph_retriever import make_graph_retriever_node
 from .chains.chitchat_categorization import make_chitchat_intent_categorization_node
+from .chains.standalone_question import make_standalone_question_node
 class GraphState(TypedDict):
     """
     Represents the state of our graph.
     """
     user_input : str
+    chat_history : str
     language : str
     intent : str
     search_graphs_chitchat : bool
     workflow = StateGraph(GraphState)
     # Define the node functions
+    standalone_question_node = make_standalone_question_node(llm)
     categorize_intent = make_intent_categorization_node(llm)
     transform_query = make_query_transform_node(llm)
     translate_query = make_translation_node(llm)
     # Define the nodes
     # workflow.add_node("set_defaults", set_defaults)
+    workflow.add_node("standalone_question", standalone_question_node)
     workflow.add_node("categorize_intent", categorize_intent)
     workflow.add_node("answer_climate", dummy)
     workflow.add_node("answer_search", answer_search)
     workflow.add_node("answer_rag_no_docs", answer_rag_no_docs)
     # Entry point
+    workflow.set_entry_point("standalone_question")
     # CONDITIONAL EDGES
     workflow.add_conditional_edges(
     )
     # Define the edges
+    workflow.add_edge("standalone_question", "categorize_intent")
     workflow.add_edge("translate_query", "transform_query")
     workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
     # workflow.add_edge("transform_query", "retrieve_local_data")
     workflow = StateGraph(GraphState)
     # Define the node functions
+    standalone_question_node = make_standalone_question_node(llm)
     categorize_intent = make_intent_categorization_node(llm)
     transform_query = make_query_transform_node(llm)
     translate_query = make_translation_node(llm)
     # Define the nodes
     # workflow.add_node("set_defaults", set_defaults)
+    workflow.add_node("standalone_question", standalone_question_node)
     workflow.add_node("categorize_intent", categorize_intent)
     workflow.add_node("answer_climate", dummy)
     workflow.add_node("answer_search", answer_search)
     workflow.add_node("answer_rag_no_docs", answer_rag_no_docs)
     # Entry point
+    workflow.set_entry_point("standalone_question")
     # CONDITIONAL EDGES
     workflow.add_conditional_edges(
     )
     # Define the edges
+    workflow.add_edge("standalone_question", "categorize_intent")
     workflow.add_edge("translate_query", "transform_query")
     workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
     workflow.add_edge("transform_query", "retrieve_local_data")