Spaces:

Ekimetrics
/

climate-question-answering

Running

File size: 8,363 Bytes

import os
from datetime import datetime
import gradio as gr
# from .agent import agent
from gradio import ChatMessage
from langgraph.graph.state import CompiledStateGraph
import json

from .handle_stream_events import (
    init_audience,
    handle_retrieved_documents,
    convert_to_docs_to_html,
    stream_answer,
    handle_retrieved_owid_graphs,
)
from .logging import (
    log_interaction
)
        
# Chat functions
def start_chat(query, history, search_only):
    history = history + [ChatMessage(role="user", content=query)]
    if not search_only:
        return (gr.update(interactive=False), gr.update(selected=1), history, [])
    else:
        return (gr.update(interactive=False), gr.update(selected=2), history, [])

def finish_chat():
    return gr.update(interactive=True, value="")

def handle_numerical_data(event):
    if event["name"] == "retrieve_drias_data" and event["event"] == "on_chain_end":
        numerical_data = event["data"]["output"]["drias_data"]
        sql_query = event["data"]["output"]["drias_sql_query"]
        return numerical_data, sql_query
    return None, None
    
# Main chat function
async def chat_stream(
    agent : CompiledStateGraph,
    query: str, 
    history: list[ChatMessage], 
    audience: str, 
    sources: list[str], 
    reports: list[str], 
    relevant_content_sources_selection: list[str], 
    search_only: bool,
    share_client,
    user_id: str
) -> tuple[list, str, str, str, list, str]:
    """Process a chat query and return response with relevant sources and visualizations.
    
    Args:
        query (str): The user's question
        history (list): Chat message history
        audience (str): Target audience type
        sources (list): Knowledge base sources to search
        reports (list): Specific reports to search within sources
        relevant_content_sources_selection (list): Types of content to retrieve (figures, papers, etc)
        search_only (bool): Whether to only search without generating answer
        
    Yields:
        tuple: Contains:
            - history: Updated chat history
            - docs_html: HTML of retrieved documents
            - output_query: Processed query
            - output_language: Detected language
            - related_contents: Related content
            - graphs_html: HTML of relevant graphs
    """
    # Log incoming question
    date_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f">> NEW QUESTION ({date_now}) : {query}")

    audience_prompt = init_audience(audience)
    sources = sources or ["IPCC", "IPBES"]
    reports = reports or []
    relevant_history_discussion = history[-2:] if len(history) > 1 else []

    # Prepare inputs for agent
    inputs = {
        "user_input": query,
        "audience": audience_prompt,
        "sources_input": sources,
        "relevant_content_sources_selection": relevant_content_sources_selection,
        "search_only": search_only,
        "reports": reports,
        "chat_history": relevant_history_discussion,
    }

    # Get streaming events from agent
    result = agent.astream_events(inputs, version="v1")

    # Initialize state variables
    docs = []
    related_contents = []
    docs_html = ""
    new_docs_html = ""
    output_query = ""
    output_language = ""
    output_keywords = ""
    start_streaming = False
    graphs_html = ""    
    used_documents = []
    retrieved_contents = []
    answer_message_content = ""
    vanna_data = {}
    follow_up_examples = gr.Dataset(samples=[])

    # Define processing steps
    steps_display = {
        "categorize_intent": ("🔄️ Analyzing user message", True),
        "transform_query": ("🔄️ Thinking step by step to answer the question", True),
        "retrieve_documents": ("🔄️ Searching in the knowledge base", False),
        "retrieve_local_data": ("🔄️ Searching in the knowledge base", False),
    }

    try:
        # Process streaming events
        async for event in result:

            if "langgraph_node" in event["metadata"]:
                node = event["metadata"]["langgraph_node"]

                # Handle document retrieval
                if event["event"] == "on_chain_end" and event["name"] in ["retrieve_documents","retrieve_local_data"] and event["data"]["output"] != None:
                    history, used_documents, retrieved_contents = handle_retrieved_documents(
                        event, history, used_documents, retrieved_contents
                    )
                # Handle Vanna retrieval
                # if event["event"] == "on_chain_end" and event["name"] in ["retrieve_documents","retrieve_local_data"] and event["data"]["output"] != None:
                #     df_output_vanna, sql_query = handle_numerical_data(
                #         event
                #     )
                #     vanna_data = {"df_output": df_output_vanna, "sql_query": sql_query}
                 
                    
                if event["event"] == "on_chain_end" and event["name"] == "answer_search" :
                    docs = event["data"]["input"]["documents"]
                    docs_html = convert_to_docs_to_html(docs)                    
                    related_contents = event["data"]["input"]["related_contents"]
        
                # Handle intent categorization
                elif (event["event"] == "on_chain_end" and 
                      node == "categorize_intent" and 
                      event["name"] == "_write"):
                    intent = event["data"]["output"]["intent"]
                    output_language = event["data"]["output"].get("language", "English")
                    history[-1].content = f"Language identified: {output_language}\nIntent identified: {intent}"

                # Handle processing steps display
                elif event["name"] in steps_display and event["event"] == "on_chain_start":
                    event_description, display_output = steps_display[node]
                    if (not hasattr(history[-1], 'metadata') or 
                        history[-1].metadata["title"] != event_description):
                        history.append(ChatMessage(
                            role="assistant",
                            content="",
                            metadata={'title': event_description}
                        ))

                # Handle answer streaming
                elif (event["name"] != "transform_query" and 
                      event["event"] == "on_chat_model_stream" and
                      node in ["answer_rag","answer_rag_no_docs", "answer_search", "answer_chitchat"]):
                    history, start_streaming, answer_message_content = stream_answer(
                        history, event, start_streaming, answer_message_content
                    )

                # Handle graph retrieval
                elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
                    graphs_html = handle_retrieved_owid_graphs(event, graphs_html)

                # Handle query transformation
                if event["name"] == "transform_query" and event["event"] == "on_chain_end":
                    if hasattr(history[-1], "content"):
                        sub_questions = [q["question"] + "-> relevant sources : " + str(q["sources"]) for q in event["data"]["output"]["questions_list"]]
                        history[-1].content += "Decompose question into sub-questions:\n\n - " + "\n - ".join(sub_questions)

                # Handle follow up questions
                if event["name"] == "generate_follow_up" and event["event"] == "on_chain_end": 
                    follow_up_examples = event["data"]["output"].get("follow_up_questions", [])
                    follow_up_examples = gr.Dataset(samples= [ [question] for question in follow_up_examples ])

            yield history, docs_html, output_query, output_language, related_contents, graphs_html, follow_up_examples#, vanna_data

    except Exception as e:
        print(f"Event {event} has failed")
        raise gr.Error(str(e))

    # Call the function to log interaction
    log_interaction(history, output_query, sources, docs, share_client, user_id)

    yield history, docs_html, output_query, output_language, related_contents, graphs_html, follow_up_examples#, vanna_data