Spaces:

jrc-ai
/

crisesStorylinesRAG

Running

App Files Files Community

jattokatarratto commited on Dec 12, 2024

Commit

3724ac8

verified ·

1 Parent(s): cc757d0

Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

.gitignore +107 -0
README.md +4 -9
client_v1/__init__.py +0 -0
client_v1/client.py +99 -0
client_v1/example_00.py +154 -0
client_v1/example_01.py +106 -0
client_v1/formatting_utils.py +52 -0
client_v1/jrc_openai.py +26 -0
client_v1/settings.py +22 -0
storylines-app.py +350 -0
storylines-app_plotly_version.py +347 -0
storylines-env.yml +289 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,107 @@

+.idea/
+config.py
+#
+# Project specific excludes
+#
+*.csv
+*.log
+*.key
+*.env
+/.gradio/
+/.gradio/*
+med_news.txt
+example_sergio.sh
+example_sergio_summary.sh
+screenlog.0
+/prove/
+/prove/*
+*.json
+/__pycache__/
+/__pychache__/*
+/vast_api_logs/
+/vast_api_logs/*
+*.tpl
+./.settings/
+*.Rhistory
+*.Rproj
+*.RData
+tomcat
+#
+# Default excludes
+#
+# Binaries
+*.7z
+*.dmg
+*.gz
+*.iso
+*.jar
+*.rar
+*.tar
+*.zip
+*.war
+*.ear
+*.sar
+*.class
+# Maven
+target/
+# IntelliJ project files
+*.iml
+*.iws
+*.ipr
+.idea/
+# eclipse project file
+.settings/
+.classpath
+.project
+# NetBeans specific
+nbproject/private/
+build/
+nbbuild/
+dist/
+nbdist/
+nbactions.xml
+nb-configuration.xml
+# OS
+.DS_Store
+# Misc
+*.swp
+release.properties
+pom.xml.releaseBackup
+pom.xml.tag
+__pycache__
+.Rproj.user
+/bin/

README.md CHANGED Viewed

@@ -1,12 +1,7 @@
 ---
-title: CrisesStorylinesRAG
-emoji: 🦀
-colorFrom: gray
-colorTo: green
 sdk: gradio
-sdk_version: 5.8.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: crisesStorylinesRAG
+app_file: storylines-app.py
 sdk: gradio
+sdk_version: 5.0.1
 ---
+# crisesStorylinesRAG

client_v1/__init__.py ADDED Viewed

File without changes

client_v1/client.py ADDED Viewed

	@@ -0,0 +1,99 @@

+from typing import Any, Coroutine
+import httpx
+from langchain_core.callbacks.manager import (
+    AsyncCallbackManagerForRetrieverRun,
+    CallbackManagerForRetrieverRun,
+)
+from langchain_core.documents import Document
+from langchain_core.retrievers import BaseRetriever
+from pydantic import Field, PrivateAttr, model_validator
+from .settings import EmmRetrieversSettings
+def as_lc_docs(dicts: list[dict]) -> list[Document]:
+    return [
+        Document(page_content=d["page_content"], metadata=d["metadata"]) for d in dicts
+    ]
+# the simple retriver is built with fixed spec/filter/params/route config
+# and the can be used many times with different queries.
+# Note these are cheap to construct.
+class EmmRetrieverV1(BaseRetriever):
+    settings: EmmRetrieversSettings
+    spec: dict
+    filter: dict | None = None
+    params: dict = Field(default_factory=dict)
+    route: str = "/r/rag-minimal/query"
+    add_ref_key: bool = True
+    _client: httpx.Client = PrivateAttr()
+    _aclient: httpx.AsyncClient = PrivateAttr()
+    # ------- interface impl:
+    def _get_relevant_documents(
+        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
+    ) -> list[Document]:
+        r = self._client.post(**self.search_post_kwargs(query))
+        if r.status_code == 422:
+            print("ERROR:\n", r.json())
+        r.raise_for_status()
+        resp = r.json()
+        return self._as_lc_docs(resp["documents"])
+    async def _aget_relevant_documents(
+        self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
+    ) -> Coroutine[Any, Any, list[Document]]:
+        r = await self._aclient.post(**self.search_post_kwargs(query))
+        if r.status_code == 422:
+            print("ERROR:\n", r.json())
+        r.raise_for_status()
+        resp = r.json()
+        return self._as_lc_docs(resp["documents"])
+    # ---------
+    @model_validator(mode="after")
+    def create_clients(self):
+        _auth_headers = {
+            "Authorization": f"Bearer {self.settings.API_KEY.get_secret_value()}"
+        }
+        kwargs = dict(
+            base_url=self.settings.API_BASE,
+            headers=_auth_headers,
+            timeout=self.settings.DEFAULT_TIMEOUT,
+        )
+        self._client = httpx.Client(**kwargs)
+        self._aclient = httpx.AsyncClient(**kwargs)
+        return self
+    @model_validator(mode="after")
+    def apply_default_params(self):
+        self.params = {
+            **{
+                "cluster_name": self.settings.DEFAULT_CLUSTER,
+                "index": self.settings.DEFAULT_INDEX,
+            },
+            **(self.params or {}),
+        }
+        return self
+    def _as_lc_docs(self, dicts: list[dict]) -> list[Document]:
+        docs = as_lc_docs(dicts)
+        if self.add_ref_key:
+            for i, d in enumerate(docs):
+                d.metadata["ref_key"] = i
+        return docs
+    def search_post_kwargs(self, query: str):
+        return dict(
+            url=self.route,
+            params=self.params,
+            json={"query": query, "spec": self.spec, "filter": self.filter},
+        )

client_v1/example_00.py ADDED Viewed

	@@ -0,0 +1,154 @@

+# %%
+from pprint import pprint
+import os
+import httpx
+# from pydantic_settings import BaseSettings, SettingsConfigDict
+# from pydantic import SecretStr
+#
+# model_config = SettingsConfigDict(env_prefix="EMM_RETRIEVERS_", env_file="/eos/jeodpp/home/users/consose/PycharmProjects/disasterStories-prj/.env")
+#
+# class RetrieverSettings(BaseSettings):
+#     api_base: str
+#     api_key: SecretStr
+#
+#     class Config:
+#         config_dict = model_config
+#
+# settings = RetrieverSettings()
+# print(settings.api_base)
+#print(settings.api_key.get_secret_value())
+from client_v1.formatting_utils import fixed_width_wrap, format_docs
+from client_v1.settings import EmmRetrieversSettings
+# %%
+settings = EmmRetrieversSettings()
+settings.API_BASE
+# the test index configuration
+TEST_INDEX = "mine_e_emb-rag_live_test_001"
+INDEX_MIN = "2024-09-14"
+INDEX_MAX = "2024-09-28"
+# instantiate an httpx client once with base url and auth
+client = httpx.Client(
+    base_url=settings.API_BASE,
+    headers={"Authorization": f"Bearer {settings.API_KEY.get_secret_value()}"},
+)
+# %%
+# get your auth info
+client.get("/_cat/token").json()
+EXAMPLE_QUESTION = "What natural disasters are currently occuring?"
+# %%
+r = client.post(
+    "/r/rag-minimal/query",
+    params={"cluster_name": settings.DEFAULT_CLUSTER, "index": TEST_INDEX},
+    json={
+        "query": EXAMPLE_QUESTION,
+        "spec": {"search_k": 20},
+        "filter": {
+            "max_chunk_no": 1,
+            "min_chars": 200,
+            "start_dt": "2024-09-19",
+            "end_dt": "2024-09-20",
+        },
+    },
+)
+r.raise_for_status()
+search_resp = r.json()
+documents = search_resp["documents"]
+print(len(documents))
+titles = [d["metadata"]["title"] for d in documents]
+print("\n".join([f"- {title}" for title in titles]))
+# %%
+# full chunk formatting:
+print(format_docs(documents, fixed_width=True))
+# %%
+# Using the gpt@jrc language models
+from client_v1.jrc_openai import JRCChatOpenAI
+llm_model = JRCChatOpenAI(model="llama-3.1-70b-instruct", openai_api_key=settings.OPENAI_API_KEY.get_secret_value(), openai_api_base=settings.OPENAI_API_BASE_URL)
+resp = llm_model.invoke("What is the JRC?")
+print(resp.content)
+pprint(resp.response_metadata)
+# %%
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
+system_prompt = (
+    "You are an assistant for question-answering tasks. "
+    "Use the following pieces of retrieved context to answer "
+    "the question. If you don't know the answer, say that you "
+    "don't know."
+    "\n\n"
+    "{context}"
+)
+prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", system_prompt),
+        ("human", "{input}"),
+    ]
+)
+rag_chain = prompt | llm_model
+# Add the API key to the LLM model
+#llm_model.api_key = settings.OPENAI_API_KEY.get_secret_value()
+r = rag_chain.invoke({"input": EXAMPLE_QUESTION, "context": format_docs(documents)})
+print(fixed_width_wrap(r.content))
+print("-" * 42)
+pprint(r.response_metadata)
+# %% [markdown]
+# notes:
+# - custom retriever class
+# - multiquery retrieval https://python.langchain.com/docs/how_to/MultiQueryRetriever/
+# - self query https://python.langchain.com/docs/how_to/self_query/
+# %%
+# using prompt hubs
+import langchain.hub
+if hasattr(settings, 'LANGCHAIN_API_KEY'):
+    os.environ["LANGCHAIN_API_KEY"] = settings.LANGCHAIN_API_KEY.get_secret_value()
+    rag_prompt = langchain.hub.pull("rlm/rag-prompt")
+    print(
+        fixed_width_wrap(
+            rag_prompt.format(**{k: "{" + k + "}" for k in rag_prompt.input_variables})
+        )
+    )
+# %%

client_v1/example_01.py ADDED Viewed

	@@ -0,0 +1,106 @@

+# %%
+from pprint import pprint
+import httpx
+from client_v1.formatting_utils import fixed_width_wrap, format_docs
+from client_v1.settings import EmmRetrieversSettings
+# %%
+settings = EmmRetrieversSettings()
+settings.API_BASE
+# the test index configuration
+TEST_INDEX = "mine_e_emb-rag_live_test_001"
+INDEX_MIN = "2024-09-14"
+INDEX_MAX = "2024-09-28"
+# %%
+from client_v1.client import EmmRetrieverV1
+# we can build a concrete retriver by specifying all but the actual `query`
+# here for example we build a retriver for just a specific date
+retriever = EmmRetrieverV1(
+    settings=settings,
+    params={"index": TEST_INDEX},
+    route="/r/rag-minimal/query",
+    spec={"search_k": 20},
+    filter={
+        "max_chunk_no": 1,
+        "min_chars": 200,
+        "start_dt": "2024-09-19",
+        "end_dt": "2024-09-20",
+    },
+)
+# %%
+EXAMPLE_QUESTION = "What natural disasters are currently occuring?"
+docs = retriever.invoke(EXAMPLE_QUESTION)
+docs
+# %%
+# very similar except `metadata` is an attribute
+titles = [d.metadata["title"] for d in docs]
+print("\n".join([f"- {title}" for title in titles]))
+# %%
+print(format_docs(docs))
+# %%
+# Using the gpt@jrc language models
+from client_v1.jrc_openai import JRCChatOpenAI
+llm_model = JRCChatOpenAI(model="llama-3.1-70b-instruct", openai_api_key=settings.OPENAI_API_KEY.get_secret_value(), openai_api_base=settings.OPENAI_API_BASE_URL)
+# %%
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
+system_prompt = (
+    "You are an assistant for question-answering tasks. "
+    "Use the following pieces of retrieved context to answer "
+    "the question. If you don't know the answer, say that you "
+    "don't know."
+    "\n\n"
+    "{context}"
+)
+prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", system_prompt),
+        ("human", "{input}"),
+    ]
+)
+rag_chain = (
+    {"context": retriever | format_docs, "input": RunnablePassthrough()}
+    | prompt
+    | llm_model
+)
+# %%
+r = rag_chain.invoke(EXAMPLE_QUESTION)
+print(fixed_width_wrap(r.content))
+print("-" * 42)
+pprint(r.response_metadata)
+# %%
+r = rag_chain.invoke("Outline the ongoing Health emergencies in Europe")
+print(fixed_width_wrap(r.content))
+print("-" * 42)
+pprint(r.response_metadata)
+# %%

client_v1/formatting_utils.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# %%
+import textwrap
+from benedict import benedict
+from langchain_core.documents import Document
+def _fixed_width_wrap(text, width: int = 70, join_str: str = "\n"):
+    return join_str.join(textwrap.wrap(text, width=width))
+def fixed_width_wrap(text, width: int = 70, join_str: str = "\n", split_str="\n"):
+    return join_str.join(
+        [
+            _fixed_width_wrap(t, width=width, join_str=join_str)
+            for t in text.split(split_str)
+        ]
+    )
+def format_doc_minimal(d, fixed_width=False):
+    if isinstance(d, Document):
+        _cont = d.page_content
+        _meta = benedict(d.metadata)
+    else:
+        _cont = d["page_content"]
+        _meta = benedict(d["metadata"])
+    if fixed_width:
+        _cont = _fixed_width_wrap(_cont)
+    return """\
+Title:\t{title}
+Published on:\t{pubdate}
+Source:\t{source_name} ({source_country})
+Chunk Content:
+\t{cont}
+""".format(
+        d=d,
+        title=_meta.get("title"),
+        pubdate=_meta.get("pubdate"),
+        source_name=_meta.get("source.host") or _meta.get("source.id"),
+        source_country=_meta.get("source.country", "n/a"),
+        cont=_cont,
+    )
+def format_docs(docs, doc_fn=format_doc_minimal, **kwargs):
+    return "\n---\n".join([doc_fn(d, **kwargs) for d in docs])

client_v1/jrc_openai.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from langchain_openai import ChatOpenAI
+from langchain_core.language_models import LanguageModelInput
+from typing import Any, List, Optional
+# this will look for the regular openai env vars
+# (OPENAI_API_KEY and OPENAI_API_BASE so override externally with gpt-jrc coords)
+class JRCChatOpenAI(ChatOpenAI):
+    def _get_request_payload(
+        self,
+        input_: LanguageModelInput,
+        *,
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> dict:
+        """
+        NOTE: this solves bug_00_tool_message: by changing role of tool messages to system
+            gpt@jrc is happy
+        """
+        r = super()._get_request_payload(input_=input_, stop=stop, **kwargs)
+        for m in r["messages"]:
+            if m["role"] == "tool":
+                m["role"] = "system"
+        return r

client_v1/settings.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from pydantic_settings import BaseSettings, SettingsConfigDict
+from pydantic import SecretStr
+class EmmRetrieversSettings(BaseSettings):
+    API_BASE: str
+    API_KEY: SecretStr
+    OPENAI_API_BASE_URL: str
+    OPENAI_API_KEY: SecretStr
+    LANGCHAIN_API_KEY: SecretStr
+    DEFAULT_CLUSTER: str = "rag-os"
+    DEFAULT_INDEX: str = "mine_e_emb-rag_live"
+    DEFAULT_TIMEOUT: int = 120
+    model_config = SettingsConfigDict(env_prefix="EMM_RETRIEVERS_", env_file="../.env")

storylines-app.py ADDED Viewed

	@@ -0,0 +1,350 @@

+import os
+import pandas as pd
+from datetime import datetime, date
+import gradio as gr
+import matplotlib.pyplot as plt
+from matplotlib.lines import Line2D
+import networkx as nx
+import ast
+# # Environment settings
+# os.environ["CUDA_VISIBLE_DEVICES"] = "2"
+# os.environ["HF_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
+# os.environ["HUGGINGFACE_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
+# os.environ["HF_HOME"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
+# Load the CSV file
+#df = pd.read_csv("emdat2.csv", sep=',', header=0, dtype=str, encoding='utf-8')
+df = pd.read_csv("https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/ETOHA/storylines/emdat2.csv", sep=',', header=0, dtype=str, encoding='utf-8')
+# grp=eval(df.iloc[0]["causal graph"])
+#
+# source, relations, target = list(zip(*grp))
+# kg_df = pd.DataFrame({'source':source, 'target':target, 'edge':relations})
+#
+# print("ciao")
+def try_parse_date(y, m, d):
+    try:
+        if not y or not m or not d:
+            return None
+        return date(int(float(y)), int(float(m)), int(float(d)))
+    except (ValueError, TypeError):
+        return None
+def plot_cgraph(grp):
+    if not grp:
+        return None
+    source, relations, target = list(zip(*grp))
+    kg_df = pd.DataFrame({'source': source, 'target': target, 'edge': relations})
+    G = nx.from_pandas_edgelist(kg_df, "source", "target", edge_attr='edge', create_using=nx.MultiDiGraph())
+    edge_colors_dict = {"causes": "red", "prevents": "green"}
+    edge_color_list = [edge_colors_dict.get(G[u][v][key]['edge'], 'black') for u, v, key in G.edges(keys=True)]
+    plt.figure(figsize=(12, 12))
+    pos = nx.spring_layout(G, k=1.5, iterations=100)
+    nx.draw_networkx_nodes(G, pos, node_color='skyblue', node_size=800, alpha=0.8)
+    nx.draw_networkx_edges(G, pos, edge_color=edge_color_list, arrows=True, width=2)
+    nx.draw_networkx_labels(G, pos)
+    legend_elements = [Line2D([0], [0], color=color, label=edge_type, lw=2) for edge_type, color in
+                       edge_colors_dict.items()]
+    plt.legend(handles=legend_elements, loc='best')
+    plt.axis('off')
+    plt.tight_layout()
+    return plt.gcf()
+def display_info(selected_row_str, country, year, month, day):
+    additional_fields = [
+        "Country", "ISO", "Subregion", "Region", "Location", "Origin",
+        "Disaster Group", "Disaster Subgroup", "Disaster Type", "Disaster Subtype", "External IDs",
+        "Event Name", "Associated Types", "OFDA/BHA Response", "Appeal", "Declaration",
+        "AID Contribution ('000 US$)", "Magnitude", "Magnitude Scale", "Latitude",
+        "Longitude", "River Basin", "Total Deaths", "No. Injured",
+        "No. Affected", "No. Homeless", "Total Affected",
+        "Reconstruction Costs ('000 US$)", "Reconstruction Costs, Adjusted ('000 US$)",
+        "Insured Damage ('000 US$)", "Insured Damage, Adjusted ('000 US$)",
+        "Total Damage ('000 US$)", "Total Damage, Adjusted ('000 US$)", "CPI",
+        "Admin Units",
+        #"Entry Date", "Last Update"
+    ]
+    if selected_row_str is None or selected_row_str == '':
+        print("No row selected.")
+        return ('', '', '', '', '', '', '', None, '', '') + tuple([''] * len(additional_fields))
+    print(f"Selected Country: {country}, Selected Row: {selected_row_str}, Date: {year}-{month}-{day}")
+    filtered_df = df
+    if country:
+        filtered_df = filtered_df[filtered_df['Country'] == country]
+    selected_date = try_parse_date(year, month, day)
+    if selected_date:
+        filtered_df = filtered_df[filtered_df.apply(
+            lambda row: (
+                    (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
+                    (try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'], "01" if row['End Day'] == "" else row['End Day']) is not None) and
+                    (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) <= selected_date <=
+                     try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'], "01" if row['End Day'] == "" else row['End Day']))
+            ), axis=1)]
+    else:
+        if year:
+            sstart = None
+            eend = None
+            if month:
+                try:
+                    sstart = try_parse_date(year, month, "01")
+                    eend = try_parse_date(year, int(float(month)) + 1, "01")
+                except Exception as err:
+                    print("Invalid selected date.")
+                    sstart = None
+                    eend = None
+                if sstart and eend:
+                    filtered_df = filtered_df[filtered_df.apply(
+                        lambda row: (
+                                (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
+                                (sstart <= try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) < eend)
+                        ), axis=1)]
+            else:
+                try:
+                    sstart = try_parse_date(year, "01", "01")
+                    eend = try_parse_date(year, "12", "31")
+                except Exception as err:
+                    print("Invalid selected date.")
+                    sstart = None
+                    eend = None
+                if sstart and eend:
+                    filtered_df = filtered_df[filtered_df.apply(
+                        lambda row: (
+                                (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
+                                (sstart <= try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) <= eend)
+                        ), axis=1)]
+        else:
+            print("Invalid selected date.")
+    # Use the "DisNo." column for selecting the row
+    row_data = filtered_df[filtered_df['DisNo.'] == selected_row_str].squeeze()
+    if not row_data.empty:
+        print(f"Row data: {row_data}")
+        key_information = row_data.get('key information', '')
+        severity = row_data.get('severity', '')
+        key_drivers = row_data.get('key drivers', '')
+        impacts_exposure_vulnerability = row_data.get('main impacts, exposure, and vulnerability', '')
+        likelihood_multi_hazard = row_data.get('likelihood of multi-hazard risks', '')
+        best_practices = row_data.get('best practices for managing this risk', '')
+        recommendations = row_data.get('recommendations and supportive measures for recovery', '')
+        causal_graph_caption = row_data.get('causal graph', '')
+        grp = ast.literal_eval(causal_graph_caption) if causal_graph_caption else []
+        causal_graph_plot = plot_cgraph(grp)
+        # Parse and format the start date
+        start_date = try_parse_date(row_data['Start Year'], row_data['Start Month'], row_data['Start Day'])
+        start_date_str = start_date.strftime('%Y-%m-%d') if start_date else str(row_data['Start Year'])+"-"+str(row_data['Start Month'])+"-"+str(row_data['Start Day']) #'N/A'
+        # Parse and format the end date
+        end_date = try_parse_date(row_data['End Year'], row_data['End Month'], row_data['End Day'])
+        end_date_str = end_date.strftime('%Y-%m-%d') if end_date else str(row_data['End Year'])+"-"+str(row_data['End Month'])+"-"+str(row_data['End Day']) #'N/A'
+        # Collect additional field data
+        additional_data = [row_data.get(field, '') for field in additional_fields]
+        return (
+            key_information,
+            severity,
+            key_drivers,
+            impacts_exposure_vulnerability,
+            likelihood_multi_hazard,
+            best_practices,
+            recommendations,
+            causal_graph_plot,
+            start_date_str,
+            end_date_str
+        ) + tuple(additional_data)
+    else:
+        print("No valid data found for the selection.")
+        return ('', '', '', '', '', '', '', None, '', '') + tuple([''] * len(additional_fields))
+def update_row_dropdown(country, year, month, day):
+    filtered_df = df
+    if country:
+        filtered_df = filtered_df[filtered_df['Country'] == country]
+    selected_date = try_parse_date(year, month, day)
+    if selected_date:
+        # filtered_rows = []
+        # for idx, row in filtered_df.iterrows():
+        #     if (try_parse_date(row['Start Year'], row['Start Month'], row['Start Day']) is not None) and \
+        #             (try_parse_date(row['End Year'], row['End Month'], row['End Day']) is not None) and \
+        #             (try_parse_date(row['Start Year'], row['Start Month'], row['Start Day']) <= selected_date <= \
+        #              try_parse_date(row['End Year'], row['End Month'], row['End Day'])):
+        #         filtered_rows.append(row)
+        #
+        # filtered_df = pd.DataFrame(filtered_rows)
+        filtered_df = filtered_df[filtered_df.apply(
+            lambda row: (
+                    (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
+                    (try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'], "01" if row['End Day'] == "" else row['End Day']) is not None) and
+                    (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) <= selected_date <=
+                     try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'], "01" if row['End Day'] == "" else row['End Day']))
+            ), axis=1)]
+    else:
+        if year:
+            sstart = None
+            eend = None
+            if month:
+                try:
+                    sstart = try_parse_date(year, month, "01")
+                    eend = try_parse_date(year, int(float(month)) + 1, "01")
+                except Exception as err:
+                    print("Invalid selected date.")
+                    sstart = None
+                    eend = None
+                if sstart and eend:
+                    filtered_df = filtered_df[filtered_df.apply(
+                        lambda row: (
+                                (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
+                                (sstart <= try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) < eend)
+                        ), axis=1)]
+            else:
+                try:
+                    sstart = try_parse_date(year, "01", "01")
+                    eend = try_parse_date(year, "12", "31")
+                except Exception as err:
+                    print("Invalid selected date.")
+                    sstart = None
+                    eend = None
+                if sstart and eend:
+                    filtered_df = filtered_df[filtered_df.apply(
+                        lambda row: (
+                                (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
+                                (sstart <= try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) <= eend)
+                        ), axis=1)]
+        else:
+            print("Invalid selected date.")
+    # Use the "DisNo." column for choices
+    choices = filtered_df['DisNo.'].tolist() if not filtered_df.empty else []
+    print(f"Available rows for {country} on {year}-{month}-{day}: {choices}")
+    return gr.update(choices=choices, value=choices[0] if choices else None)
+def build_interface():
+    with gr.Blocks() as interface:
+        # Add title and description using text elements
+        gr.Markdown("## From Data to Narratives: AI-Enhanced Disaster and Health Threats Storylines")  # Title
+        gr.Markdown("This Gradio app complements Health Threats and Disaster event data through generative AI techniques, including the use of Retrieval Augmented Generation (RAG) with the [Europe Media Monitoring (EMM)](https://emm.newsbrief.eu/overview.html) service, "
+                    "and Large Language Models (LLMs) from the [GPT@JRC](https://gpt.jrc.ec.europa.eu/) portfolio. <br>"
+                    "The app leverages the EMM RAG service to retrieve relevant news chunks for each event data, transforms the unstructured news chunks into structured narratives and causal knowledge graphs using LLMs and text-to-graph techniques, linking health threats and disaster events to their causes and impacts. "
+                    "Drawing data from sources like the [EM-DAT](https://www.emdat.be/) database, it augments each event with news-derived information in a storytelling fashion. <br>"
+                    "This tool enables decision-makers to better explore health threats and disaster dynamics, identify patterns, and simulate scenarios for improved response and readiness. <br><br>"
+                    "Select an event data below. You can filter by country and date period. Below, you will see the AI-generated storyline and causal knowledge graph, while on the right you can see the related EM-DAT data record.  <br><br>")  # Description  -, and constructs disaster-specific ontologies. "
+        # Extract and prepare unique years from "Start Year" and "End Year"
+        if not df.empty:
+            start_years = df["Start Year"].dropna().unique()
+            end_years = df["End Year"].dropna().unique()
+            # Convert to integers and merge to create a union set
+            years = set(start_years.astype(int).tolist() + end_years.astype(int).tolist())
+            year_choices = sorted(years)
+        else:
+            year_choices = []
+        country_dropdown = gr.Dropdown(choices=[''] + df['Country'].unique().tolist(), label="Select Country")
+        year_dropdown = gr.Dropdown(choices=[""] + [str(year) for year in year_choices], label="Select Year")
+        month_dropdown = gr.Dropdown(choices=[""] + [f"{i:02d}" for i in range(1, 13)], label="Select Month")
+        day_dropdown = gr.Dropdown(choices=[""] + [f"{i:02d}" for i in range(1, 32)], label="Select Day")
+        row_dropdown = gr.Dropdown(choices=[], label="Select Disaster Event #", interactive=True)
+        # Define the additional fields once to use later in both position and function
+        additional_fields = [
+            "Country", "ISO", "Subregion", "Region", "Location", "Origin",
+            "Disaster Group", "Disaster Subgroup", "Disaster Type", "Disaster Subtype", "External IDs",
+            "Event Name", "Associated Types", "OFDA/BHA Response", "Appeal", "Declaration",
+            "AID Contribution ('000 US$)", "Magnitude", "Magnitude Scale", "Latitude",
+            "Longitude", "River Basin", "Total Deaths", "No. Injured",
+            "No. Affected", "No. Homeless", "Total Affected",
+            "Reconstruction Costs ('000 US$)", "Reconstruction Costs, Adjusted ('000 US$)",
+            "Insured Damage ('000 US$)", "Insured Damage, Adjusted ('000 US$)",
+            "Total Damage ('000 US$)", "Total Damage, Adjusted ('000 US$)", "CPI",
+            "Admin Units",
+            #"Entry Date", "Last Update"
+        ]
+        with gr.Row():
+            with gr.Column():
+                # Main controls and outputs
+                country_dropdown
+                year_dropdown
+                month_dropdown
+                day_dropdown
+                row_dropdown
+                outputs = [
+                    gr.Textbox(label="Key Information", interactive=False),
+                    gr.Textbox(label="Severity", interactive=False),
+                    gr.Textbox(label="Key Drivers", interactive=False),
+                    gr.Textbox(label="Main Impacts, Exposure, and Vulnerability", interactive=False),
+                    gr.Textbox(label="Likelihood of Multi-Hazard Risks", interactive=False),
+                    gr.Textbox(label="Best Practices for Managing This Risk", interactive=False),
+                    gr.Textbox(label="Recommendations and Supportive Measures for Recovery", interactive=False),
+                    gr.Plot(label="Causal Graph")
+                ]
+            with gr.Column():
+                # Additional information on the right
+                outputs.extend([
+                    gr.Textbox(label="Start Date", interactive=False),
+                    gr.Textbox(label="End Date", interactive=False)
+                ])
+                for field in additional_fields:
+                    outputs.append(gr.Textbox(label=field, interactive=False))
+        # Update the selectable rows when any of the filters change
+        country_dropdown.change(
+            fn=update_row_dropdown,
+            inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
+            outputs=row_dropdown
+        )
+        year_dropdown.change(
+            fn=update_row_dropdown,
+            inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
+            outputs=row_dropdown
+        )
+        month_dropdown.change(
+            fn=update_row_dropdown,
+            inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
+            outputs=row_dropdown
+        )
+        day_dropdown.change(
+            fn=update_row_dropdown,
+            inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
+            outputs=row_dropdown
+        )
+        # Update the display information when a row is selected
+        row_dropdown.change(
+            fn=display_info,
+            inputs=[row_dropdown, country_dropdown, year_dropdown, month_dropdown, day_dropdown],
+            outputs=outputs
+        )
+    return interface
+app = build_interface()
+app.launch(share=True)

storylines-app_plotly_version.py ADDED Viewed

	@@ -0,0 +1,347 @@

+import os
+import pandas as pd
+from datetime import date
+import gradio as gr
+import networkx as nx
+import ast
+import plotly.graph_objects as go
+import plotly.express as px
+# Load the CSV file
+df = pd.read_csv("https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/ETOHA/storylines/emdat2.csv", sep=',', header=0,
+                 dtype=str, encoding='utf-8')
+def try_parse_date(y, m, d):
+    try:
+        if not y or not m or not d:
+            return None
+        return date(int(float(y)), int(float(m)), int(float(d)))
+    except (ValueError, TypeError):
+        return None
+def plot_cgraph(grp):
+    if not grp:
+        return None
+    source, relations, target = list(zip(*grp))
+    kg_df = pd.DataFrame({'source': source, 'target': target, 'edge': relations})
+    G = nx.from_pandas_edgelist(kg_df, "source", "target", edge_attr='edge', create_using=nx.MultiDiGraph())
+    pos = nx.spring_layout(G, k=1.5, iterations=100)
+    # Separate edges based on their color
+    edge_colors_dict = {"causes": "red", "prevents": "green"}
+    traces = []
+    for color in edge_colors_dict.values():
+        edge_x = []
+        edge_y = []
+        for u, v, key in G.edges(keys=True):
+            current_color = edge_colors_dict.get(G[u][v][key]['edge'], 'black')
+            if current_color == color:
+                x0, y0 = pos[u]
+                x1, y1 = pos[v]
+                edge_x.extend([x0, x1, None])
+                edge_y.extend([y0, y1, None])
+        trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=2, color=color), hoverinfo='none', mode='lines')
+        traces.append(trace)
+    node_x = []
+    node_y = []
+    node_text = []
+    for node in G.nodes():
+        x, y = pos[node]
+        node_x.append(x)
+        node_y.append(y)
+        node_text.append(node)
+    node_trace = go.Scatter(
+        x=node_x, y=node_y, mode='markers+text', text=node_text,
+        marker=dict(size=10, color='skyblue', line_width=2),
+        textposition="top center", hoverinfo='text'
+    )
+    traces.append(node_trace)
+    fig = go.Figure(data=traces,
+                    layout=go.Layout(showlegend=False,
+                                     hovermode='closest',
+                                     margin=dict(b=20, l=5, r=5, t=40)))
+    return fig
+def display_info(selected_row_str, country, year, month, day):
+    additional_fields = [
+        "Country", "ISO", "Subregion", "Region", "Location", "Origin",
+        "Disaster Group", "Disaster Subgroup", "Disaster Type", "Disaster Subtype", "External IDs",
+        "Event Name", "Associated Types", "OFDA/BHA Response", "Appeal", "Declaration",
+        "AID Contribution ('000 US$)", "Magnitude", "Magnitude Scale", "Latitude",
+        "Longitude", "River Basin", "Total Deaths", "No. Injured",
+        "No. Affected", "No. Homeless", "Total Affected",
+        "Reconstruction Costs ('000 US$)", "Reconstruction Costs, Adjusted ('000 US$)",
+        "Insured Damage ('000 US$)", "Insured Damage, Adjusted ('000 US$)",
+        "Total Damage ('000 US$)", "Total Damage, Adjusted ('000 US$)", "CPI",
+        "Admin Units",
+    ]
+    if selected_row_str is None or selected_row_str == '':
+        return ('', '', '', '', '', '', '', None, '', '') + tuple([''] * len(additional_fields))
+    filtered_df = df
+    if country:
+        filtered_df = filtered_df[filtered_df['Country'] == country]
+    selected_date = try_parse_date(year, month, day)
+    if selected_date:
+        filtered_df = filtered_df[filtered_df.apply(
+            lambda row: (
+                    (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'],
+                                    "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
+                    (try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'],
+                                    "01" if row['End Day'] == "" else row['End Day']) is not None) and
+                    (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'],
+                                    "01" if row['Start Day'] == "" else row['Start Day']) <= selected_date <=
+                     try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'],
+                                    "01" if row['End Day'] == "" else row['End Day']))
+            ), axis=1)]
+    else:
+        if year:
+            sstart = None
+            eend = None
+            if month:
+                try:
+                    sstart = try_parse_date(year, month, "01")
+                    eend = try_parse_date(year, int(float(month)) + 1, "01")
+                except Exception as err:
+                    sstart = None
+                    eend = None
+                if sstart and eend:
+                    filtered_df = filtered_df[filtered_df.apply(
+                        lambda row: (
+                                (try_parse_date(row['Start Year'],
+                                                "01" if row['Start Month'] == "" else row['Start Month'],
+                                                "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
+                                (sstart <= try_parse_date(row['Start Year'],
+                                                          "01" if row['Start Month'] == "" else row['Start Month'],
+                                                          "01" if row['Start Day'] == "" else row['Start Day']) < eend)
+                        ), axis=1)]
+            else:
+                try:
+                    sstart = try_parse_date(year, "01", "01")
+                    eend = try_parse_date(year, "12", "31")
+                except Exception as err:
+                    sstart = None
+                    eend = None
+                if sstart and eend:
+                    filtered_df = filtered_df[filtered_df.apply(
+                        lambda row: (
+                                (try_parse_date(row['Start Year'],
+                                                "01" if row['Start Month'] == "" else row['Start Month'],
+                                                "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
+                                (sstart <= try_parse_date(row['Start Year'],
+                                                          "01" if row['Start Month'] == "" else row['Start Month'],
+                                                          "01" if row['Start Day'] == "" else row['Start Day']) <= eend)
+                        ), axis=1)]
+    row_data = filtered_df[filtered_df['DisNo.'] == selected_row_str].squeeze()
+    if not row_data.empty:
+        key_information = row_data.get('key information', '')
+        severity = row_data.get('severity', '')
+        key_drivers = row_data.get('key drivers', '')
+        impacts_exposure_vulnerability = row_data.get('main impacts, exposure, and vulnerability', '')
+        likelihood_multi_hazard = row_data.get('likelihood of multi-hazard risks', '')
+        best_practices = row_data.get('best practices for managing this risk', '')
+        recommendations = row_data.get('recommendations and supportive measures for recovery', '')
+        causal_graph_caption = row_data.get('causal graph', '')
+        grp = ast.literal_eval(causal_graph_caption) if causal_graph_caption else []
+        causal_graph_plot = plot_cgraph(grp)
+        start_date = try_parse_date(row_data['Start Year'], row_data['Start Month'], row_data['Start Day'])
+        start_date_str = start_date.strftime('%Y-%m-%d') if start_date else 'N/A'
+        end_date = try_parse_date(row_data['End Year'], row_data['End Month'], row_data['End Day'])
+        end_date_str = end_date.strftime('%Y-%m-%d') if end_date else 'N/A'
+        additional_data = [row_data.get(field, '') for field in additional_fields]
+        return (
+            key_information,
+            severity,
+            key_drivers,
+            impacts_exposure_vulnerability,
+            likelihood_multi_hazard,
+            best_practices,
+            recommendations,
+            causal_graph_plot,
+            start_date_str,
+            end_date_str
+        ) + tuple(additional_data)
+    else:
+        return ('', '', '', '', '', '', '', None, '', '') + tuple([''] * len(additional_fields))
+def update_row_dropdown(country, year, month, day):
+    filtered_df = df
+    if country:
+        filtered_df = filtered_df[filtered_df['Country'] == country]
+    selected_date = try_parse_date(year, month, day)
+    if selected_date:
+        filtered_df = filtered_df[filtered_df.apply(
+            lambda row: (
+                    (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'],
+                                    "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
+                    (try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'],
+                                    "01" if row['End Day'] == "" else row['End Day']) is not None) and
+                    (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'],
+                                    "01" if row['Start Day'] == "" else row['Start Day']) <= selected_date <=
+                     try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'],
+                                    "01" if row['End Day'] == "" else row['End Day']))
+            ), axis=1)]
+    else:
+        if year:
+            sstart = None
+            eend = None
+            if month:
+                try:
+                    sstart = try_parse_date(year, month, "01")
+                    eend = try_parse_date(year, int(float(month)) + 1, "01")
+                except Exception as err:
+                    sstart = None
+                    eend = None
+                if sstart and eend:
+                    filtered_df = filtered_df[filtered_df.apply(
+                        lambda row: (
+                                (try_parse_date(row['Start Year'],
+                                                "01" if row['Start Month'] == "" else row['Start Month'],
+                                                "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
+                                (sstart <= try_parse_date(row['Start Year'],
+                                                          "01" if row['Start Month'] == "" else row['Start Month'],
+                                                          "01" if row['Start Day'] == "" else row['Start Day']) < eend)
+                        ), axis=1)]
+            else:
+                try:
+                    sstart = try_parse_date(year, "01", "01")
+                    eend = try_parse_date(year, "12", "31")
+                except Exception as err:
+                    sstart = None
+                    eend = None
+                if sstart and eend:
+                    filtered_df = filtered_df[filtered_df.apply(
+                        lambda row: (
+                                (try_parse_date(row['Start Year'],
+                                                "01" if row['Start Month'] == "" else row['Start Month'],
+                                                "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
+                                (sstart <= try_parse_date(row['Start Year'],
+                                                          "01" if row['Start Month'] == "" else row['Start Month'],
+                                                          "01" if row['Start Day'] == "" else row['Start Day']) <= eend)
+                        ), axis=1)]
+    choices = filtered_df['DisNo.'].tolist() if not filtered_df.empty else []
+    return gr.update(choices=choices, value=choices[0] if choices else None)
+def build_interface():
+    with gr.Blocks() as interface:
+        gr.Markdown("## From Data to Narratives: AI-Enhanced Disaster and Health Threats Storylines")
+        gr.Markdown(
+            "This Gradio app complements Health Threats and Disaster event data... <br>"
+            "Select an event data below..."
+        )
+        if not df.empty:
+            start_years = df["Start Year"].dropna().unique()
+            end_years = df["End Year"].dropna().unique()
+            years = set(start_years.astype(int).tolist() + end_years.astype(int).tolist())
+            year_choices = sorted(years)
+        else:
+            year_choices = []
+        country_dropdown = gr.Dropdown(choices=[''] + df['Country'].unique().tolist(), label="Select Country")
+        year_dropdown = gr.Dropdown(choices=[""] + [str(year) for year in year_choices], label="Select Year")
+        month_dropdown = gr.Dropdown(choices=[""] + [f"{i:02d}" for i in range(1, 13)], label="Select Month")
+        day_dropdown = gr.Dropdown(choices=[""] + [f"{i:02d}" for i in range(1, 32)], label="Select Day")
+        row_dropdown = gr.Dropdown(choices=[], label="Select Disaster Event #", interactive=True)
+        additional_fields = [
+            "Country", "ISO", "Subregion", "Region", "Location", "Origin",
+            "Disaster Group", "Disaster Subgroup", "Disaster Type", "Disaster Subtype", "External IDs",
+            "Event Name", "Associated Types", "OFDA/BHA Response", "Appeal", "Declaration",
+            "AID Contribution ('000 US$)", "Magnitude", "Magnitude Scale", "Latitude",
+            "Longitude", "River Basin", "Total Deaths", "No. Injured",
+            "No. Affected", "No. Homeless", "Total Affected",
+            "Reconstruction Costs ('000 US$)", "Reconstruction Costs, Adjusted ('000 US$)",
+            "Insured Damage ('000 US$)", "Insured Damage, Adjusted ('000 US$)",
+            "Total Damage ('000 US$)", "Total Damage, Adjusted ('000 US$)", "CPI",
+            "Admin Units",
+        ]
+        with gr.Row():
+            with gr.Column():
+                country_dropdown
+                year_dropdown
+                month_dropdown
+                day_dropdown
+                row_dropdown
+                outputs = [
+                    gr.Textbox(label="Key Information", interactive=False),
+                    gr.Textbox(label="Severity", interactive=False),
+                    gr.Textbox(label="Key Drivers", interactive=False),
+                    gr.Textbox(label="Main Impacts, Exposure, and Vulnerability", interactive=False),
+                    gr.Textbox(label="Likelihood of Multi-Hazard Risks", interactive=False),
+                    gr.Textbox(label="Best Practices for Managing This Risk", interactive=False),
+                    gr.Textbox(label="Recommendations and Supportive Measures for Recovery", interactive=False),
+                    gr.Plot(label="Causal Graph")
+                ]
+            with gr.Column():
+                outputs.extend([
+                    gr.Textbox(label="Start Date", interactive=False),
+                    gr.Textbox(label="End Date", interactive=False)
+                ])
+                for field in additional_fields:
+                    outputs.append(gr.Textbox(label=field, interactive=False))
+        country_dropdown.change(
+            fn=update_row_dropdown,
+            inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
+            outputs=row_dropdown
+        )
+        year_dropdown.change(
+            fn=update_row_dropdown,
+            inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
+            outputs=row_dropdown
+        )
+        month_dropdown.change(
+            fn=update_row_dropdown,
+            inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
+            outputs=row_dropdown
+        )
+        day_dropdown.change(
+            fn=update_row_dropdown,
+            inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
+            outputs=row_dropdown
+        )
+        row_dropdown.change(
+            fn=display_info,
+            inputs=[row_dropdown, country_dropdown, year_dropdown, month_dropdown, day_dropdown],
+            outputs=outputs
+        )
+    return interface
+app = build_interface()
+app.launch()

storylines-env.yml ADDED Viewed

	@@ -0,0 +1,289 @@

+name: /scratch/consose-011/conda/storylines-env-accel-011
+channels:
+  - plotly
+  - pytorch
+  - nvidia
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - absl-py=2.1.0=pyhd8ed1ab_0
+  - archspec=0.2.1=pyhd3eb1b0_0
+  - blas=1.0=mkl
+  - blessed=1.19.1=pyhe4f9e05_2
+  - boltons=23.0.0=py312h06a4308_0
+  - bottleneck=1.3.7=py312ha883a20_0
+  - brotli=1.0.9=h5eee18b_8
+  - brotli-bin=1.0.9=h5eee18b_8
+  - brotli-python=1.0.9=py312h6a678d5_7
+  - bzip2=1.0.8=h7b6447c_0
+  - c-ares=1.19.1=h5eee18b_0
+  - ca-certificates=2024.8.30=hbcca054_0
+  - certifi=2024.8.30=pyhd8ed1ab_0
+  - cffi=1.16.0=py312h5eee18b_0
+  - charset-normalizer=2.0.4=pyhd3eb1b0_0
+  - click=8.1.7=unix_pyh707e725_0
+  - colorama=0.4.6=pyhd8ed1ab_0
+  - conda-content-trust=0.2.0=py312h06a4308_0
+  - conda-package-handling=2.2.0=py312h06a4308_0
+  - conda-package-streaming=0.9.0=py312h06a4308_0
+  - contourpy=1.2.0=py312hdb19cb5_0
+  - cryptography=41.0.7=py312hdda0065_0
+  - cuda-cudart=12.1.105=0
+  - cuda-cupti=12.1.105=0
+  - cuda-libraries=12.1.0=0
+  - cuda-nvrtc=12.1.105=0
+  - cuda-nvtx=12.1.105=0
+  - cuda-opencl=12.4.127=0
+  - cuda-runtime=12.1.0=0
+  - cuda-version=10.1=h0a949dd_3
+  - cudatoolkit=10.1.243=h036e899_8
+  - cycler=0.11.0=pyhd3eb1b0_0
+  - cyrus-sasl=2.1.28=h52b45da_1
+  - dbus=1.13.18=hb2f20db_0
+  - distro=1.8.0=py312h06a4308_0
+  - expat=2.5.0=h6a678d5_0
+  - ffmpeg=4.3=hf484d3e_0
+  - filelock=3.13.1=py312h06a4308_0
+  - fmt=9.1.0=hdb19cb5_0
+  - fontconfig=2.14.1=h4c34cd2_2
+  - fonttools=4.51.0=py312h5eee18b_0
+  - freetype=2.12.1=h4a9f257_0
+  - glib=2.78.4=h6a678d5_0
+  - glib-tools=2.78.4=h6a678d5_0
+  - gmp=6.2.1=h295c915_3
+  - gnutls=3.6.15=he1e5248_0
+  - gpustat=1.1.1=pyhd8ed1ab_0
+  - gst-plugins-base=1.14.1=h6a678d5_1
+  - gstreamer=1.14.1=h5eee18b_1
+  - icu=73.1=h6a678d5_0
+  - idna=3.4=py312h06a4308_0
+  - importlib-metadata=8.0.0=pyha770c72_0
+  - intel-openmp=2023.1.0=hdb19cb5_46306
+  - isodate=0.6.1=pyhd8ed1ab_0
+  - jinja2=3.1.4=py312h06a4308_0
+  - joblib=1.4.2=pyhd8ed1ab_0
+  - jpeg=9e=h5eee18b_1
+  - jsonpointer=2.1=pyhd3eb1b0_0
+  - kiwisolver=1.4.4=py312h6a678d5_0
+  - krb5=1.20.1=h143b758_1
+  - lame=3.100=h7b6447c_0
+  - lcms2=2.12=h3be6417_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - lerc=3.0=h295c915_0
+  - libabseil=20240116.2=cxx17_h6a678d5_0
+  - libarchive=3.6.2=h6ac8c49_2
+  - libbrotlicommon=1.0.9=h5eee18b_8
+  - libbrotlidec=1.0.9=h5eee18b_8
+  - libbrotlienc=1.0.9=h5eee18b_8
+  - libclang=14.0.6=default_hc6dbbc7_1
+  - libclang13=14.0.6=default_he11475f_1
+  - libcublas=12.1.0.26=0
+  - libcufft=11.0.2.4=0
+  - libcufile=1.9.1.3=0
+  - libcups=2.4.2=h2d74bed_1
+  - libcurand=10.3.5.147=0
+  - libcurl=8.5.0=h251f7ec_0
+  - libcusolver=11.4.4.55=0
+  - libcusparse=12.0.2.55=0
+  - libdeflate=1.17=h5eee18b_1
+  - libedit=3.1.20230828=h5eee18b_0
+  - libev=4.33=h7f8727e_1
+  - libffi=3.4.4=h6a678d5_0
+  - libgcc-ng=11.2.0=h1234567_1
+  - libglib=2.78.4=hdc74915_0
+  - libgomp=11.2.0=h1234567_1
+  - libiconv=1.16=h5eee18b_3
+  - libidn2=2.3.4=h5eee18b_0
+  - libjpeg-turbo=2.0.0=h9bf148f_0
+  - libllvm14=14.0.6=hdb19cb5_3
+  - libmamba=1.5.3=haf1ee3a_0
+  - libmambapy=1.5.3=py312h2dafd23_0
+  - libnghttp2=1.57.0=h2d74bed_0
+  - libnpp=12.0.2.50=0
+  - libnvjitlink=12.1.105=0
+  - libnvjpeg=12.1.1.14=0
+  - libpng=1.6.39=h5eee18b_0
+  - libpq=12.17=hdbd6064_0
+  - libprotobuf=4.25.3=he621ea3_0
+  - libsolv=0.7.24=he621ea3_0
+  - libssh2=1.10.0=hdbd6064_2
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libtasn1=4.19.0=h5eee18b_0
+  - libtiff=4.5.1=h6a678d5_0
+  - libunistring=0.9.10=h27cfd23_0
+  - libuuid=1.41.5=h5eee18b_0
+  - libwebp-base=1.3.2=h5eee18b_0
+  - libxcb=1.15=h7f8727e_0
+  - libxkbcommon=1.0.1=h5eee18b_1
+  - libxml2=2.10.4=hf1b16e4_1
+  - llvm-openmp=14.0.6=h9e868ea_0
+  - lz4-c=1.9.4=h6a678d5_0
+  - markupsafe=2.1.3=py312h5eee18b_0
+  - menuinst=2.0.2=py312h06a4308_0
+  - mkl=2023.1.0=h213fc3f_46344
+  - mkl-service=2.4.0=py312h5eee18b_1
+  - mkl_fft=1.3.8=py312h5eee18b_0
+  - mkl_random=1.2.4=py312hdb19cb5_0
+  - mpmath=1.3.0=py312h06a4308_0
+  - mysql=5.7.24=h721c034_2
+  - ncurses=6.4=h6a678d5_0
+  - nettle=3.7.3=hbbd107a_1
+  - networkx=3.3=py312h06a4308_0
+  - nltk=3.8.1=pyhd8ed1ab_0
+  - numexpr=2.8.7=py312hf827012_0
+  - numpy=1.26.4=py312hc5e2394_0
+  - numpy-base=1.26.4=py312h0da6c21_0
+  - openh264=2.1.1=h4ff587b_0
+  - openjpeg=2.4.0=h9ca470c_2
+  - openssl=3.0.14=h5eee18b_0
+  - packaging=23.1=py312h06a4308_0
+  - pandas=2.2.2=py312h526ad5a_0
+  - pcre2=10.42=hebb0a14_0
+  - pillow=10.4.0=py312h5eee18b_0
+  - pip=23.3.1=py312h06a4308_0
+  - platformdirs=3.10.0=py312h06a4308_0
+  - plotly=5.24.1=py_0
+  - pluggy=1.0.0=py312h06a4308_1
+  - ply=3.11=py312h06a4308_1
+  - protobuf=4.25.3=py312h12ddb61_0
+  - pybind11-abi=4=hd3eb1b0_1
+  - pycosat=0.6.6=py312h5eee18b_0
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pyparsing=3.1.2=pyhd8ed1ab_0
+  - pyqt=5.15.10=py312h6a678d5_0
+  - pyqt5-sip=12.13.0=py312h5eee18b_0
+  - pysocks=1.7.1=py312h06a4308_0
+  - python=3.12.1=h996f2a0_0
+  - python-benedict=0.34.0=pyhff2d567_0
+  - python-dateutil=2.9.0post0=py312h06a4308_2
+  - python-fsutil=0.14.1=pyhd8ed1ab_0
+  - python-slugify=8.0.4=pyhd8ed1ab_0
+  - python-tzdata=2024.1=pyhd8ed1ab_0
+  - pytorch=2.3.1=py3.12_cuda12.1_cudnn8.9.2_0
+  - pytorch-cuda=12.1=ha16c6d3_5
+  - pytorch-mutex=1.0=cuda
+  - pytz=2024.1=py312h06a4308_0
+  - pyyaml=6.0.1=py312h5eee18b_0
+  - qt-main=5.15.2=h53bd1ea_10
+  - rdflib=7.0.0=pyhd8ed1ab_0
+  - readline=8.2=h5eee18b_0
+  - reproc=14.2.4=h295c915_1
+  - reproc-cpp=14.2.4=h295c915_1
+  - requests=2.32.3=pyhd8ed1ab_0
+  - rouge-score=0.1.2=pyhd8ed1ab_0
+  - ruamel.yaml=0.17.21=py312h5eee18b_0
+  - setuptools=68.2.2=py312h06a4308_0
+  - sip=6.7.12=py312h6a678d5_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - sqlite=3.41.2=h5eee18b_0
+  - sympy=1.13.0=pyh04b8f61_3
+  - tbb=2021.8.0=hdb19cb5_0
+  - text-unidecode=1.3=pyhd8ed1ab_1
+  - tk=8.6.12=h1ccaba5_0
+  - torchaudio=2.3.1=py312_cu121
+  - torchvision=0.18.1=py312_cu121
+  - tornado=6.4.1=py312h5eee18b_0
+  - tqdm=4.66.4=pyhd8ed1ab_0
+  - truststore=0.8.0=py312h06a4308_0
+  - typing_extensions=4.11.0=py312h06a4308_0
+  - tzdata=2024a=h0c530f3_0
+  - unicodedata2=15.1.0=py312h5eee18b_0
+  - urllib3=2.1.0=py312h06a4308_1
+  - wcwidth=0.2.13=pyhd8ed1ab_0
+  - wheel=0.41.2=py312h06a4308_0
+  - xz=5.4.5=h5eee18b_0
+  - yaml=0.2.5=h7b6447c_0
+  - yaml-cpp=0.8.0=h6a678d5_0
+  - zipp=3.19.2=pyhd8ed1ab_0
+  - zlib=1.2.13=h5eee18b_0
+  - zstandard=0.19.0=py312h5eee18b_0
+  - zstd=1.5.5=hc292b87_0
+  - pip:
+      - accelerate==0.32.1
+      - aiofiles==23.2.1
+      - aiohttp==3.9.5
+      - aiosignal==1.3.1
+      - annotated-types==0.7.0
+      - anyio==4.6.0
+      - attrs==23.2.0
+      - bert-score==0.3.13
+      - bitsandbytes==0.43.3
+      - coloredlogs==15.0.1
+      - cuda-python==12.4.0
+      - datasets==2.20.0
+      - deepspeed==0.14.4
+      - dill==0.3.8
+      - evaluate==0.4.2
+      - fastapi==0.115.0
+      - ffmpy==0.4.0
+      - flatbuffers==24.3.25
+      - frozenlist==1.4.1
+      - fsspec==2024.5.0
+      - gliner==0.2.13
+      - gradio==5.0.1
+      - gradio-client==1.4.0
+      - greenlet==3.1.1
+      - h11==0.14.0
+      - hjson==3.1.0
+      - httpcore==1.0.6
+      - httpx==0.27.2
+      - huggingface-hub==0.25.2
+      - humanfriendly==10.0
+      - ipynb-py-convert==0.4.6
+      - jiter==0.6.1
+      - jsonpatch==1.33
+      - langchain==0.3.3
+      - langchain-core==0.3.10
+      - langchain-openai==0.2.2
+      - langchain-text-splitters==0.3.0
+      - langsmith==0.1.135
+      - markdown-it-py==3.0.0
+      - matplotlib==3.9.1.post1
+      - mdurl==0.1.2
+      - multidict==6.0.5
+      - multiprocess==0.70.16
+      - ninja==1.11.1.1
+      - nvidia-ml-py==12.555.43
+      - onnxruntime==1.19.2
+      - openai==1.51.2
+      - orjson==3.10.7
+      - peft==0.12.0
+      - psutil==6.0.0
+      - py-cpuinfo==9.0.0
+      - pyarrow==17.0.0
+      - pyarrow-hotfix==0.6
+      - pydantic==2.8.2
+      - pydantic-core==2.20.1
+      - pydantic-settings==2.5.2
+      - pydub==0.25.1
+      - pygments==2.18.0
+      - python-dotenv==1.0.1
+      - python-multipart==0.0.12
+      - regex==2024.5.15
+      - requests-toolbelt==1.0.0
+      - rich==13.9.2
+      - ruff==0.6.9
+      - safetensors==0.4.3
+      - scikit-learn==1.5.1
+      - scipy==1.14.0
+      - semantic-version==2.10.0
+      - sentencepiece==0.2.0
+      - shellingham==1.5.4
+      - sniffio==1.3.1
+      - sqlalchemy==2.0.36
+      - starlette==0.38.6
+      - tenacity==8.5.0
+      - threadpoolctl==3.5.0
+      - tiktoken==0.8.0
+      - tokenizers==0.19.1
+      - tomlkit==0.12.0
+      - torch-testing==0.0.2
+      - transformers==4.42.4
+      - typer==0.12.5
+      - uvicorn==0.31.1
+      - websockets==12.0
+      - xxhash==3.4.1
+      - yarl==1.9.4
+prefix: /scratch/consose-011/conda/storylines-env-accel-011