jattokatarratto commited on
Commit
3724ac8
·
verified ·
1 Parent(s): cc757d0

Upload folder using huggingface_hub

Browse files
.gitignore ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+
4
+ .idea/
5
+
6
+ config.py
7
+
8
+
9
+ #
10
+ # Project specific excludes
11
+ #
12
+
13
+ *.csv
14
+ *.log
15
+ *.key
16
+ *.env
17
+
18
+ /.gradio/
19
+ /.gradio/*
20
+
21
+ med_news.txt
22
+
23
+ example_sergio.sh
24
+ example_sergio_summary.sh
25
+
26
+ screenlog.0
27
+
28
+ /prove/
29
+ /prove/*
30
+
31
+ *.json
32
+
33
+ /__pycache__/
34
+ /__pychache__/*
35
+
36
+ /vast_api_logs/
37
+ /vast_api_logs/*
38
+
39
+ *.tpl
40
+
41
+
42
+ ./.settings/
43
+
44
+
45
+
46
+
47
+ *.Rhistory
48
+ *.Rproj
49
+ *.RData
50
+
51
+ tomcat
52
+
53
+ #
54
+ # Default excludes
55
+ #
56
+
57
+ # Binaries
58
+ *.7z
59
+ *.dmg
60
+ *.gz
61
+ *.iso
62
+ *.jar
63
+ *.rar
64
+ *.tar
65
+ *.zip
66
+ *.war
67
+ *.ear
68
+ *.sar
69
+ *.class
70
+
71
+ # Maven
72
+ target/
73
+
74
+ # IntelliJ project files
75
+ *.iml
76
+ *.iws
77
+ *.ipr
78
+ .idea/
79
+
80
+ # eclipse project file
81
+ .settings/
82
+ .classpath
83
+ .project
84
+
85
+ # NetBeans specific
86
+ nbproject/private/
87
+ build/
88
+ nbbuild/
89
+ dist/
90
+ nbdist/
91
+ nbactions.xml
92
+ nb-configuration.xml
93
+
94
+
95
+ # OS
96
+ .DS_Store
97
+
98
+ # Misc
99
+ *.swp
100
+ release.properties
101
+ pom.xml.releaseBackup
102
+ pom.xml.tag
103
+ __pycache__
104
+
105
+ .Rproj.user
106
+
107
+ /bin/
README.md CHANGED
@@ -1,12 +1,7 @@
1
  ---
2
- title: CrisesStorylinesRAG
3
- emoji: 🦀
4
- colorFrom: gray
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.8.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: crisesStorylinesRAG
3
+ app_file: storylines-app.py
 
 
4
  sdk: gradio
5
+ sdk_version: 5.0.1
 
 
6
  ---
7
+ # crisesStorylinesRAG
 
client_v1/__init__.py ADDED
File without changes
client_v1/client.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Coroutine
2
+
3
+ import httpx
4
+ from langchain_core.callbacks.manager import (
5
+ AsyncCallbackManagerForRetrieverRun,
6
+ CallbackManagerForRetrieverRun,
7
+ )
8
+ from langchain_core.documents import Document
9
+ from langchain_core.retrievers import BaseRetriever
10
+ from pydantic import Field, PrivateAttr, model_validator
11
+
12
+ from .settings import EmmRetrieversSettings
13
+
14
+
15
+ def as_lc_docs(dicts: list[dict]) -> list[Document]:
16
+ return [
17
+ Document(page_content=d["page_content"], metadata=d["metadata"]) for d in dicts
18
+ ]
19
+
20
+
21
+ # the simple retriver is built with fixed spec/filter/params/route config
22
+ # and the can be used many times with different queries.
23
+ # Note these are cheap to construct.
24
+
25
+
26
+ class EmmRetrieverV1(BaseRetriever):
27
+ settings: EmmRetrieversSettings
28
+ spec: dict
29
+ filter: dict | None = None
30
+ params: dict = Field(default_factory=dict)
31
+ route: str = "/r/rag-minimal/query"
32
+ add_ref_key: bool = True
33
+
34
+ _client: httpx.Client = PrivateAttr()
35
+ _aclient: httpx.AsyncClient = PrivateAttr()
36
+
37
+ # ------- interface impl:
38
+ def _get_relevant_documents(
39
+ self, query: str, *, run_manager: CallbackManagerForRetrieverRun
40
+ ) -> list[Document]:
41
+ r = self._client.post(**self.search_post_kwargs(query))
42
+ if r.status_code == 422:
43
+ print("ERROR:\n", r.json())
44
+ r.raise_for_status()
45
+ resp = r.json()
46
+ return self._as_lc_docs(resp["documents"])
47
+
48
+ async def _aget_relevant_documents(
49
+ self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
50
+ ) -> Coroutine[Any, Any, list[Document]]:
51
+ r = await self._aclient.post(**self.search_post_kwargs(query))
52
+ if r.status_code == 422:
53
+ print("ERROR:\n", r.json())
54
+ r.raise_for_status()
55
+ resp = r.json()
56
+ return self._as_lc_docs(resp["documents"])
57
+
58
+ # ---------
59
+ @model_validator(mode="after")
60
+ def create_clients(self):
61
+ _auth_headers = {
62
+ "Authorization": f"Bearer {self.settings.API_KEY.get_secret_value()}"
63
+ }
64
+
65
+ kwargs = dict(
66
+ base_url=self.settings.API_BASE,
67
+ headers=_auth_headers,
68
+ timeout=self.settings.DEFAULT_TIMEOUT,
69
+ )
70
+
71
+ self._client = httpx.Client(**kwargs)
72
+ self._aclient = httpx.AsyncClient(**kwargs)
73
+ return self
74
+
75
+ @model_validator(mode="after")
76
+ def apply_default_params(self):
77
+ self.params = {
78
+ **{
79
+ "cluster_name": self.settings.DEFAULT_CLUSTER,
80
+ "index": self.settings.DEFAULT_INDEX,
81
+ },
82
+ **(self.params or {}),
83
+ }
84
+ return self
85
+
86
+ def _as_lc_docs(self, dicts: list[dict]) -> list[Document]:
87
+ docs = as_lc_docs(dicts)
88
+ if self.add_ref_key:
89
+ for i, d in enumerate(docs):
90
+ d.metadata["ref_key"] = i
91
+
92
+ return docs
93
+
94
+ def search_post_kwargs(self, query: str):
95
+ return dict(
96
+ url=self.route,
97
+ params=self.params,
98
+ json={"query": query, "spec": self.spec, "filter": self.filter},
99
+ )
client_v1/example_00.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ from pprint import pprint
3
+ import os
4
+ import httpx
5
+
6
+ # from pydantic_settings import BaseSettings, SettingsConfigDict
7
+ # from pydantic import SecretStr
8
+ #
9
+ # model_config = SettingsConfigDict(env_prefix="EMM_RETRIEVERS_", env_file="/eos/jeodpp/home/users/consose/PycharmProjects/disasterStories-prj/.env")
10
+ #
11
+ # class RetrieverSettings(BaseSettings):
12
+ # api_base: str
13
+ # api_key: SecretStr
14
+ #
15
+ # class Config:
16
+ # config_dict = model_config
17
+ #
18
+ # settings = RetrieverSettings()
19
+ # print(settings.api_base)
20
+ #print(settings.api_key.get_secret_value())
21
+
22
+
23
+ from client_v1.formatting_utils import fixed_width_wrap, format_docs
24
+ from client_v1.settings import EmmRetrieversSettings
25
+
26
+
27
+
28
+ # %%
29
+ settings = EmmRetrieversSettings()
30
+
31
+ settings.API_BASE
32
+
33
+ # the test index configuration
34
+ TEST_INDEX = "mine_e_emb-rag_live_test_001"
35
+ INDEX_MIN = "2024-09-14"
36
+ INDEX_MAX = "2024-09-28"
37
+
38
+ # instantiate an httpx client once with base url and auth
39
+ client = httpx.Client(
40
+ base_url=settings.API_BASE,
41
+ headers={"Authorization": f"Bearer {settings.API_KEY.get_secret_value()}"},
42
+ )
43
+
44
+
45
+ # %%
46
+ # get your auth info
47
+ client.get("/_cat/token").json()
48
+
49
+ EXAMPLE_QUESTION = "What natural disasters are currently occuring?"
50
+
51
+ # %%
52
+ r = client.post(
53
+ "/r/rag-minimal/query",
54
+ params={"cluster_name": settings.DEFAULT_CLUSTER, "index": TEST_INDEX},
55
+ json={
56
+ "query": EXAMPLE_QUESTION,
57
+ "spec": {"search_k": 20},
58
+ "filter": {
59
+ "max_chunk_no": 1,
60
+ "min_chars": 200,
61
+ "start_dt": "2024-09-19",
62
+ "end_dt": "2024-09-20",
63
+ },
64
+ },
65
+ )
66
+
67
+ r.raise_for_status()
68
+
69
+ search_resp = r.json()
70
+
71
+ documents = search_resp["documents"]
72
+ print(len(documents))
73
+
74
+
75
+ titles = [d["metadata"]["title"] for d in documents]
76
+
77
+ print("\n".join([f"- {title}" for title in titles]))
78
+
79
+ # %%
80
+ # full chunk formatting:
81
+
82
+ print(format_docs(documents, fixed_width=True))
83
+
84
+ # %%
85
+ # Using the gpt@jrc language models
86
+
87
+
88
+ from client_v1.jrc_openai import JRCChatOpenAI
89
+
90
+ llm_model = JRCChatOpenAI(model="llama-3.1-70b-instruct", openai_api_key=settings.OPENAI_API_KEY.get_secret_value(), openai_api_base=settings.OPENAI_API_BASE_URL)
91
+
92
+ resp = llm_model.invoke("What is the JRC?")
93
+ print(resp.content)
94
+ pprint(resp.response_metadata)
95
+
96
+ # %%
97
+
98
+ from langchain_core.prompts import ChatPromptTemplate
99
+ from langchain_core.runnables import RunnablePassthrough
100
+ from langchain_core.output_parsers import StrOutputParser
101
+
102
+
103
+ system_prompt = (
104
+ "You are an assistant for question-answering tasks. "
105
+ "Use the following pieces of retrieved context to answer "
106
+ "the question. If you don't know the answer, say that you "
107
+ "don't know."
108
+ "\n\n"
109
+ "{context}"
110
+ )
111
+
112
+ prompt = ChatPromptTemplate.from_messages(
113
+ [
114
+ ("system", system_prompt),
115
+ ("human", "{input}"),
116
+ ]
117
+ )
118
+
119
+ rag_chain = prompt | llm_model
120
+
121
+ # Add the API key to the LLM model
122
+ #llm_model.api_key = settings.OPENAI_API_KEY.get_secret_value()
123
+
124
+ r = rag_chain.invoke({"input": EXAMPLE_QUESTION, "context": format_docs(documents)})
125
+
126
+ print(fixed_width_wrap(r.content))
127
+ print("-" * 42)
128
+ pprint(r.response_metadata)
129
+
130
+ # %% [markdown]
131
+
132
+ # notes:
133
+ # - custom retriever class
134
+ # - multiquery retrieval https://python.langchain.com/docs/how_to/MultiQueryRetriever/
135
+ # - self query https://python.langchain.com/docs/how_to/self_query/
136
+
137
+
138
+ # %%
139
+ # using prompt hubs
140
+
141
+ import langchain.hub
142
+
143
+ if hasattr(settings, 'LANGCHAIN_API_KEY'):
144
+ os.environ["LANGCHAIN_API_KEY"] = settings.LANGCHAIN_API_KEY.get_secret_value()
145
+
146
+ rag_prompt = langchain.hub.pull("rlm/rag-prompt")
147
+ print(
148
+ fixed_width_wrap(
149
+ rag_prompt.format(**{k: "{" + k + "}" for k in rag_prompt.input_variables})
150
+ )
151
+ )
152
+
153
+
154
+ # %%
client_v1/example_01.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ from pprint import pprint
3
+
4
+ import httpx
5
+
6
+ from client_v1.formatting_utils import fixed_width_wrap, format_docs
7
+ from client_v1.settings import EmmRetrieversSettings
8
+
9
+ # %%
10
+ settings = EmmRetrieversSettings()
11
+
12
+ settings.API_BASE
13
+
14
+ # the test index configuration
15
+ TEST_INDEX = "mine_e_emb-rag_live_test_001"
16
+ INDEX_MIN = "2024-09-14"
17
+ INDEX_MAX = "2024-09-28"
18
+
19
+ # %%
20
+
21
+ from client_v1.client import EmmRetrieverV1
22
+
23
+ # we can build a concrete retriver by specifying all but the actual `query`
24
+ # here for example we build a retriver for just a specific date
25
+ retriever = EmmRetrieverV1(
26
+ settings=settings,
27
+ params={"index": TEST_INDEX},
28
+ route="/r/rag-minimal/query",
29
+ spec={"search_k": 20},
30
+ filter={
31
+ "max_chunk_no": 1,
32
+ "min_chars": 200,
33
+ "start_dt": "2024-09-19",
34
+ "end_dt": "2024-09-20",
35
+ },
36
+ )
37
+
38
+ # %%
39
+
40
+ EXAMPLE_QUESTION = "What natural disasters are currently occuring?"
41
+
42
+ docs = retriever.invoke(EXAMPLE_QUESTION)
43
+
44
+ docs
45
+ # %%
46
+ # very similar except `metadata` is an attribute
47
+ titles = [d.metadata["title"] for d in docs]
48
+
49
+ print("\n".join([f"- {title}" for title in titles]))
50
+
51
+ # %%
52
+
53
+ print(format_docs(docs))
54
+
55
+ # %%
56
+ # Using the gpt@jrc language models
57
+
58
+
59
+ from client_v1.jrc_openai import JRCChatOpenAI
60
+
61
+ llm_model = JRCChatOpenAI(model="llama-3.1-70b-instruct", openai_api_key=settings.OPENAI_API_KEY.get_secret_value(), openai_api_base=settings.OPENAI_API_BASE_URL)
62
+
63
+ # %%
64
+
65
+ from langchain_core.prompts import ChatPromptTemplate
66
+ from langchain_core.runnables import RunnablePassthrough
67
+ from langchain_core.output_parsers import StrOutputParser
68
+
69
+
70
+ system_prompt = (
71
+ "You are an assistant for question-answering tasks. "
72
+ "Use the following pieces of retrieved context to answer "
73
+ "the question. If you don't know the answer, say that you "
74
+ "don't know."
75
+ "\n\n"
76
+ "{context}"
77
+ )
78
+
79
+ prompt = ChatPromptTemplate.from_messages(
80
+ [
81
+ ("system", system_prompt),
82
+ ("human", "{input}"),
83
+ ]
84
+ )
85
+
86
+ rag_chain = (
87
+ {"context": retriever | format_docs, "input": RunnablePassthrough()}
88
+ | prompt
89
+ | llm_model
90
+ )
91
+
92
+ # %%
93
+ r = rag_chain.invoke(EXAMPLE_QUESTION)
94
+
95
+ print(fixed_width_wrap(r.content))
96
+ print("-" * 42)
97
+ pprint(r.response_metadata)
98
+
99
+ # %%
100
+ r = rag_chain.invoke("Outline the ongoing Health emergencies in Europe")
101
+
102
+ print(fixed_width_wrap(r.content))
103
+ print("-" * 42)
104
+ pprint(r.response_metadata)
105
+
106
+ # %%
client_v1/formatting_utils.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+
3
+
4
+ import textwrap
5
+
6
+ from benedict import benedict
7
+ from langchain_core.documents import Document
8
+
9
+
10
+ def _fixed_width_wrap(text, width: int = 70, join_str: str = "\n"):
11
+ return join_str.join(textwrap.wrap(text, width=width))
12
+
13
+
14
+ def fixed_width_wrap(text, width: int = 70, join_str: str = "\n", split_str="\n"):
15
+ return join_str.join(
16
+ [
17
+ _fixed_width_wrap(t, width=width, join_str=join_str)
18
+ for t in text.split(split_str)
19
+ ]
20
+ )
21
+
22
+
23
+ def format_doc_minimal(d, fixed_width=False):
24
+ if isinstance(d, Document):
25
+ _cont = d.page_content
26
+ _meta = benedict(d.metadata)
27
+ else:
28
+ _cont = d["page_content"]
29
+ _meta = benedict(d["metadata"])
30
+
31
+ if fixed_width:
32
+ _cont = _fixed_width_wrap(_cont)
33
+
34
+ return """\
35
+ Title:\t{title}
36
+ Published on:\t{pubdate}
37
+ Source:\t{source_name} ({source_country})
38
+ Chunk Content:
39
+
40
+ \t{cont}
41
+ """.format(
42
+ d=d,
43
+ title=_meta.get("title"),
44
+ pubdate=_meta.get("pubdate"),
45
+ source_name=_meta.get("source.host") or _meta.get("source.id"),
46
+ source_country=_meta.get("source.country", "n/a"),
47
+ cont=_cont,
48
+ )
49
+
50
+
51
+ def format_docs(docs, doc_fn=format_doc_minimal, **kwargs):
52
+ return "\n---\n".join([doc_fn(d, **kwargs) for d in docs])
client_v1/jrc_openai.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from langchain_core.language_models import LanguageModelInput
3
+ from typing import Any, List, Optional
4
+
5
+
6
+ # this will look for the regular openai env vars
7
+ # (OPENAI_API_KEY and OPENAI_API_BASE so override externally with gpt-jrc coords)
8
+ class JRCChatOpenAI(ChatOpenAI):
9
+
10
+ def _get_request_payload(
11
+ self,
12
+ input_: LanguageModelInput,
13
+ *,
14
+ stop: Optional[List[str]] = None,
15
+ **kwargs: Any,
16
+ ) -> dict:
17
+ """
18
+ NOTE: this solves bug_00_tool_message: by changing role of tool messages to system
19
+ gpt@jrc is happy
20
+ """
21
+
22
+ r = super()._get_request_payload(input_=input_, stop=stop, **kwargs)
23
+ for m in r["messages"]:
24
+ if m["role"] == "tool":
25
+ m["role"] = "system"
26
+ return r
client_v1/settings.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+ from pydantic import SecretStr
3
+
4
+
5
+ class EmmRetrieversSettings(BaseSettings):
6
+ API_BASE: str
7
+ API_KEY: SecretStr
8
+
9
+ OPENAI_API_BASE_URL: str
10
+ OPENAI_API_KEY: SecretStr
11
+
12
+ LANGCHAIN_API_KEY: SecretStr
13
+
14
+ DEFAULT_CLUSTER: str = "rag-os"
15
+ DEFAULT_INDEX: str = "mine_e_emb-rag_live"
16
+
17
+ DEFAULT_TIMEOUT: int = 120
18
+
19
+ model_config = SettingsConfigDict(env_prefix="EMM_RETRIEVERS_", env_file="../.env")
20
+
21
+
22
+
storylines-app.py ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from datetime import datetime, date
4
+ import gradio as gr
5
+ import matplotlib.pyplot as plt
6
+ from matplotlib.lines import Line2D
7
+ import networkx as nx
8
+ import ast
9
+
10
+ # # Environment settings
11
+ # os.environ["CUDA_VISIBLE_DEVICES"] = "2"
12
+ # os.environ["HF_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
13
+ # os.environ["HUGGINGFACE_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
14
+ # os.environ["HF_HOME"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
15
+
16
+ # Load the CSV file
17
+ #df = pd.read_csv("emdat2.csv", sep=',', header=0, dtype=str, encoding='utf-8')
18
+ df = pd.read_csv("https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/ETOHA/storylines/emdat2.csv", sep=',', header=0, dtype=str, encoding='utf-8')
19
+
20
+ # grp=eval(df.iloc[0]["causal graph"])
21
+ #
22
+ # source, relations, target = list(zip(*grp))
23
+ # kg_df = pd.DataFrame({'source':source, 'target':target, 'edge':relations})
24
+ #
25
+ # print("ciao")
26
+
27
+
28
+ def try_parse_date(y, m, d):
29
+ try:
30
+ if not y or not m or not d:
31
+ return None
32
+ return date(int(float(y)), int(float(m)), int(float(d)))
33
+ except (ValueError, TypeError):
34
+ return None
35
+
36
+ def plot_cgraph(grp):
37
+ if not grp:
38
+ return None
39
+ source, relations, target = list(zip(*grp))
40
+ kg_df = pd.DataFrame({'source': source, 'target': target, 'edge': relations})
41
+ G = nx.from_pandas_edgelist(kg_df, "source", "target", edge_attr='edge', create_using=nx.MultiDiGraph())
42
+ edge_colors_dict = {"causes": "red", "prevents": "green"}
43
+ edge_color_list = [edge_colors_dict.get(G[u][v][key]['edge'], 'black') for u, v, key in G.edges(keys=True)]
44
+
45
+ plt.figure(figsize=(12, 12))
46
+ pos = nx.spring_layout(G, k=1.5, iterations=100)
47
+ nx.draw_networkx_nodes(G, pos, node_color='skyblue', node_size=800, alpha=0.8)
48
+ nx.draw_networkx_edges(G, pos, edge_color=edge_color_list, arrows=True, width=2)
49
+ nx.draw_networkx_labels(G, pos)
50
+ legend_elements = [Line2D([0], [0], color=color, label=edge_type, lw=2) for edge_type, color in
51
+ edge_colors_dict.items()]
52
+ plt.legend(handles=legend_elements, loc='best')
53
+ plt.axis('off')
54
+ plt.tight_layout()
55
+ return plt.gcf()
56
+
57
+ def display_info(selected_row_str, country, year, month, day):
58
+ additional_fields = [
59
+ "Country", "ISO", "Subregion", "Region", "Location", "Origin",
60
+ "Disaster Group", "Disaster Subgroup", "Disaster Type", "Disaster Subtype", "External IDs",
61
+ "Event Name", "Associated Types", "OFDA/BHA Response", "Appeal", "Declaration",
62
+ "AID Contribution ('000 US$)", "Magnitude", "Magnitude Scale", "Latitude",
63
+ "Longitude", "River Basin", "Total Deaths", "No. Injured",
64
+ "No. Affected", "No. Homeless", "Total Affected",
65
+ "Reconstruction Costs ('000 US$)", "Reconstruction Costs, Adjusted ('000 US$)",
66
+ "Insured Damage ('000 US$)", "Insured Damage, Adjusted ('000 US$)",
67
+ "Total Damage ('000 US$)", "Total Damage, Adjusted ('000 US$)", "CPI",
68
+ "Admin Units",
69
+ #"Entry Date", "Last Update"
70
+ ]
71
+
72
+ if selected_row_str is None or selected_row_str == '':
73
+ print("No row selected.")
74
+ return ('', '', '', '', '', '', '', None, '', '') + tuple([''] * len(additional_fields))
75
+
76
+ print(f"Selected Country: {country}, Selected Row: {selected_row_str}, Date: {year}-{month}-{day}")
77
+
78
+ filtered_df = df
79
+ if country:
80
+ filtered_df = filtered_df[filtered_df['Country'] == country]
81
+
82
+ selected_date = try_parse_date(year, month, day)
83
+
84
+ if selected_date:
85
+ filtered_df = filtered_df[filtered_df.apply(
86
+ lambda row: (
87
+ (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
88
+ (try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'], "01" if row['End Day'] == "" else row['End Day']) is not None) and
89
+ (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) <= selected_date <=
90
+ try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'], "01" if row['End Day'] == "" else row['End Day']))
91
+ ), axis=1)]
92
+ else:
93
+ if year:
94
+ sstart = None
95
+ eend = None
96
+ if month:
97
+ try:
98
+ sstart = try_parse_date(year, month, "01")
99
+ eend = try_parse_date(year, int(float(month)) + 1, "01")
100
+ except Exception as err:
101
+ print("Invalid selected date.")
102
+ sstart = None
103
+ eend = None
104
+
105
+ if sstart and eend:
106
+ filtered_df = filtered_df[filtered_df.apply(
107
+ lambda row: (
108
+ (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
109
+ (sstart <= try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) < eend)
110
+ ), axis=1)]
111
+ else:
112
+ try:
113
+ sstart = try_parse_date(year, "01", "01")
114
+ eend = try_parse_date(year, "12", "31")
115
+ except Exception as err:
116
+ print("Invalid selected date.")
117
+ sstart = None
118
+ eend = None
119
+
120
+ if sstart and eend:
121
+ filtered_df = filtered_df[filtered_df.apply(
122
+ lambda row: (
123
+ (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
124
+ (sstart <= try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) <= eend)
125
+ ), axis=1)]
126
+
127
+ else:
128
+ print("Invalid selected date.")
129
+
130
+ # Use the "DisNo." column for selecting the row
131
+ row_data = filtered_df[filtered_df['DisNo.'] == selected_row_str].squeeze()
132
+
133
+ if not row_data.empty:
134
+ print(f"Row data: {row_data}")
135
+ key_information = row_data.get('key information', '')
136
+ severity = row_data.get('severity', '')
137
+ key_drivers = row_data.get('key drivers', '')
138
+ impacts_exposure_vulnerability = row_data.get('main impacts, exposure, and vulnerability', '')
139
+ likelihood_multi_hazard = row_data.get('likelihood of multi-hazard risks', '')
140
+ best_practices = row_data.get('best practices for managing this risk', '')
141
+ recommendations = row_data.get('recommendations and supportive measures for recovery', '')
142
+ causal_graph_caption = row_data.get('causal graph', '')
143
+ grp = ast.literal_eval(causal_graph_caption) if causal_graph_caption else []
144
+ causal_graph_plot = plot_cgraph(grp)
145
+
146
+ # Parse and format the start date
147
+ start_date = try_parse_date(row_data['Start Year'], row_data['Start Month'], row_data['Start Day'])
148
+ start_date_str = start_date.strftime('%Y-%m-%d') if start_date else str(row_data['Start Year'])+"-"+str(row_data['Start Month'])+"-"+str(row_data['Start Day']) #'N/A'
149
+
150
+ # Parse and format the end date
151
+ end_date = try_parse_date(row_data['End Year'], row_data['End Month'], row_data['End Day'])
152
+ end_date_str = end_date.strftime('%Y-%m-%d') if end_date else str(row_data['End Year'])+"-"+str(row_data['End Month'])+"-"+str(row_data['End Day']) #'N/A'
153
+
154
+ # Collect additional field data
155
+ additional_data = [row_data.get(field, '') for field in additional_fields]
156
+
157
+ return (
158
+ key_information,
159
+ severity,
160
+ key_drivers,
161
+ impacts_exposure_vulnerability,
162
+ likelihood_multi_hazard,
163
+ best_practices,
164
+ recommendations,
165
+ causal_graph_plot,
166
+ start_date_str,
167
+ end_date_str
168
+ ) + tuple(additional_data)
169
+ else:
170
+ print("No valid data found for the selection.")
171
+ return ('', '', '', '', '', '', '', None, '', '') + tuple([''] * len(additional_fields))
172
+
173
+ def update_row_dropdown(country, year, month, day):
174
+ filtered_df = df
175
+ if country:
176
+ filtered_df = filtered_df[filtered_df['Country'] == country]
177
+
178
+ selected_date = try_parse_date(year, month, day)
179
+
180
+ if selected_date:
181
+ # filtered_rows = []
182
+ # for idx, row in filtered_df.iterrows():
183
+ # if (try_parse_date(row['Start Year'], row['Start Month'], row['Start Day']) is not None) and \
184
+ # (try_parse_date(row['End Year'], row['End Month'], row['End Day']) is not None) and \
185
+ # (try_parse_date(row['Start Year'], row['Start Month'], row['Start Day']) <= selected_date <= \
186
+ # try_parse_date(row['End Year'], row['End Month'], row['End Day'])):
187
+ # filtered_rows.append(row)
188
+ #
189
+ # filtered_df = pd.DataFrame(filtered_rows)
190
+ filtered_df = filtered_df[filtered_df.apply(
191
+ lambda row: (
192
+ (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
193
+ (try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'], "01" if row['End Day'] == "" else row['End Day']) is not None) and
194
+ (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) <= selected_date <=
195
+ try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'], "01" if row['End Day'] == "" else row['End Day']))
196
+ ), axis=1)]
197
+ else:
198
+
199
+ if year:
200
+ sstart = None
201
+ eend = None
202
+ if month:
203
+ try:
204
+ sstart = try_parse_date(year, month, "01")
205
+ eend = try_parse_date(year, int(float(month)) + 1, "01")
206
+ except Exception as err:
207
+ print("Invalid selected date.")
208
+ sstart = None
209
+ eend = None
210
+
211
+ if sstart and eend:
212
+ filtered_df = filtered_df[filtered_df.apply(
213
+ lambda row: (
214
+ (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
215
+ (sstart <= try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) < eend)
216
+ ), axis=1)]
217
+ else:
218
+ try:
219
+ sstart = try_parse_date(year, "01", "01")
220
+ eend = try_parse_date(year, "12", "31")
221
+ except Exception as err:
222
+ print("Invalid selected date.")
223
+ sstart = None
224
+ eend = None
225
+
226
+ if sstart and eend:
227
+ filtered_df = filtered_df[filtered_df.apply(
228
+ lambda row: (
229
+ (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
230
+ (sstart <= try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'], "01" if row['Start Day'] == "" else row['Start Day']) <= eend)
231
+ ), axis=1)]
232
+
233
+ else:
234
+ print("Invalid selected date.")
235
+
236
+
237
+
238
+ # Use the "DisNo." column for choices
239
+ choices = filtered_df['DisNo.'].tolist() if not filtered_df.empty else []
240
+ print(f"Available rows for {country} on {year}-{month}-{day}: {choices}")
241
+ return gr.update(choices=choices, value=choices[0] if choices else None)
242
+
243
+
244
+ def build_interface():
245
+ with gr.Blocks() as interface:
246
+
247
+ # Add title and description using text elements
248
+ gr.Markdown("## From Data to Narratives: AI-Enhanced Disaster and Health Threats Storylines") # Title
249
+ gr.Markdown("This Gradio app complements Health Threats and Disaster event data through generative AI techniques, including the use of Retrieval Augmented Generation (RAG) with the [Europe Media Monitoring (EMM)](https://emm.newsbrief.eu/overview.html) service, "
250
+ "and Large Language Models (LLMs) from the [GPT@JRC](https://gpt.jrc.ec.europa.eu/) portfolio. <br>"
251
+ "The app leverages the EMM RAG service to retrieve relevant news chunks for each event data, transforms the unstructured news chunks into structured narratives and causal knowledge graphs using LLMs and text-to-graph techniques, linking health threats and disaster events to their causes and impacts. "
252
+ "Drawing data from sources like the [EM-DAT](https://www.emdat.be/) database, it augments each event with news-derived information in a storytelling fashion. <br>"
253
+ "This tool enables decision-makers to better explore health threats and disaster dynamics, identify patterns, and simulate scenarios for improved response and readiness. <br><br>"
254
+ "Select an event data below. You can filter by country and date period. Below, you will see the AI-generated storyline and causal knowledge graph, while on the right you can see the related EM-DAT data record. <br><br>") # Description -, and constructs disaster-specific ontologies. "
255
+
256
+ # Extract and prepare unique years from "Start Year" and "End Year"
257
+ if not df.empty:
258
+ start_years = df["Start Year"].dropna().unique()
259
+ end_years = df["End Year"].dropna().unique()
260
+
261
+ # Convert to integers and merge to create a union set
262
+ years = set(start_years.astype(int).tolist() + end_years.astype(int).tolist())
263
+ year_choices = sorted(years)
264
+ else:
265
+ year_choices = []
266
+
267
+ country_dropdown = gr.Dropdown(choices=[''] + df['Country'].unique().tolist(), label="Select Country")
268
+ year_dropdown = gr.Dropdown(choices=[""] + [str(year) for year in year_choices], label="Select Year")
269
+ month_dropdown = gr.Dropdown(choices=[""] + [f"{i:02d}" for i in range(1, 13)], label="Select Month")
270
+ day_dropdown = gr.Dropdown(choices=[""] + [f"{i:02d}" for i in range(1, 32)], label="Select Day")
271
+ row_dropdown = gr.Dropdown(choices=[], label="Select Disaster Event #", interactive=True)
272
+
273
+ # Define the additional fields once to use later in both position and function
274
+ additional_fields = [
275
+ "Country", "ISO", "Subregion", "Region", "Location", "Origin",
276
+ "Disaster Group", "Disaster Subgroup", "Disaster Type", "Disaster Subtype", "External IDs",
277
+ "Event Name", "Associated Types", "OFDA/BHA Response", "Appeal", "Declaration",
278
+ "AID Contribution ('000 US$)", "Magnitude", "Magnitude Scale", "Latitude",
279
+ "Longitude", "River Basin", "Total Deaths", "No. Injured",
280
+ "No. Affected", "No. Homeless", "Total Affected",
281
+ "Reconstruction Costs ('000 US$)", "Reconstruction Costs, Adjusted ('000 US$)",
282
+ "Insured Damage ('000 US$)", "Insured Damage, Adjusted ('000 US$)",
283
+ "Total Damage ('000 US$)", "Total Damage, Adjusted ('000 US$)", "CPI",
284
+ "Admin Units",
285
+ #"Entry Date", "Last Update"
286
+ ]
287
+
288
+ with gr.Row():
289
+ with gr.Column():
290
+ # Main controls and outputs
291
+ country_dropdown
292
+ year_dropdown
293
+ month_dropdown
294
+ day_dropdown
295
+ row_dropdown
296
+
297
+ outputs = [
298
+ gr.Textbox(label="Key Information", interactive=False),
299
+ gr.Textbox(label="Severity", interactive=False),
300
+ gr.Textbox(label="Key Drivers", interactive=False),
301
+ gr.Textbox(label="Main Impacts, Exposure, and Vulnerability", interactive=False),
302
+ gr.Textbox(label="Likelihood of Multi-Hazard Risks", interactive=False),
303
+ gr.Textbox(label="Best Practices for Managing This Risk", interactive=False),
304
+ gr.Textbox(label="Recommendations and Supportive Measures for Recovery", interactive=False),
305
+ gr.Plot(label="Causal Graph")
306
+ ]
307
+
308
+ with gr.Column():
309
+ # Additional information on the right
310
+ outputs.extend([
311
+ gr.Textbox(label="Start Date", interactive=False),
312
+ gr.Textbox(label="End Date", interactive=False)
313
+ ])
314
+ for field in additional_fields:
315
+ outputs.append(gr.Textbox(label=field, interactive=False))
316
+
317
+ # Update the selectable rows when any of the filters change
318
+ country_dropdown.change(
319
+ fn=update_row_dropdown,
320
+ inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
321
+ outputs=row_dropdown
322
+ )
323
+ year_dropdown.change(
324
+ fn=update_row_dropdown,
325
+ inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
326
+ outputs=row_dropdown
327
+ )
328
+ month_dropdown.change(
329
+ fn=update_row_dropdown,
330
+ inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
331
+ outputs=row_dropdown
332
+ )
333
+ day_dropdown.change(
334
+ fn=update_row_dropdown,
335
+ inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
336
+ outputs=row_dropdown
337
+ )
338
+
339
+ # Update the display information when a row is selected
340
+ row_dropdown.change(
341
+ fn=display_info,
342
+ inputs=[row_dropdown, country_dropdown, year_dropdown, month_dropdown, day_dropdown],
343
+ outputs=outputs
344
+ )
345
+
346
+ return interface
347
+
348
+
349
+ app = build_interface()
350
+ app.launch(share=True)
storylines-app_plotly_version.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from datetime import date
4
+ import gradio as gr
5
+ import networkx as nx
6
+ import ast
7
+ import plotly.graph_objects as go
8
+ import plotly.express as px
9
+
10
+ # Load the CSV file
11
+ df = pd.read_csv("https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/ETOHA/storylines/emdat2.csv", sep=',', header=0,
12
+ dtype=str, encoding='utf-8')
13
+
14
+
15
+ def try_parse_date(y, m, d):
16
+ try:
17
+ if not y or not m or not d:
18
+ return None
19
+ return date(int(float(y)), int(float(m)), int(float(d)))
20
+ except (ValueError, TypeError):
21
+ return None
22
+
23
+
24
+ def plot_cgraph(grp):
25
+ if not grp:
26
+ return None
27
+ source, relations, target = list(zip(*grp))
28
+ kg_df = pd.DataFrame({'source': source, 'target': target, 'edge': relations})
29
+ G = nx.from_pandas_edgelist(kg_df, "source", "target", edge_attr='edge', create_using=nx.MultiDiGraph())
30
+
31
+ pos = nx.spring_layout(G, k=1.5, iterations=100)
32
+
33
+ # Separate edges based on their color
34
+ edge_colors_dict = {"causes": "red", "prevents": "green"}
35
+ traces = []
36
+
37
+ for color in edge_colors_dict.values():
38
+ edge_x = []
39
+ edge_y = []
40
+ for u, v, key in G.edges(keys=True):
41
+ current_color = edge_colors_dict.get(G[u][v][key]['edge'], 'black')
42
+ if current_color == color:
43
+ x0, y0 = pos[u]
44
+ x1, y1 = pos[v]
45
+ edge_x.extend([x0, x1, None])
46
+ edge_y.extend([y0, y1, None])
47
+
48
+ trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=2, color=color), hoverinfo='none', mode='lines')
49
+ traces.append(trace)
50
+
51
+ node_x = []
52
+ node_y = []
53
+ node_text = []
54
+ for node in G.nodes():
55
+ x, y = pos[node]
56
+ node_x.append(x)
57
+ node_y.append(y)
58
+ node_text.append(node)
59
+
60
+ node_trace = go.Scatter(
61
+ x=node_x, y=node_y, mode='markers+text', text=node_text,
62
+ marker=dict(size=10, color='skyblue', line_width=2),
63
+ textposition="top center", hoverinfo='text'
64
+ )
65
+
66
+ traces.append(node_trace)
67
+
68
+ fig = go.Figure(data=traces,
69
+ layout=go.Layout(showlegend=False,
70
+ hovermode='closest',
71
+ margin=dict(b=20, l=5, r=5, t=40)))
72
+
73
+ return fig
74
+
75
+
76
+ def display_info(selected_row_str, country, year, month, day):
77
+ additional_fields = [
78
+ "Country", "ISO", "Subregion", "Region", "Location", "Origin",
79
+ "Disaster Group", "Disaster Subgroup", "Disaster Type", "Disaster Subtype", "External IDs",
80
+ "Event Name", "Associated Types", "OFDA/BHA Response", "Appeal", "Declaration",
81
+ "AID Contribution ('000 US$)", "Magnitude", "Magnitude Scale", "Latitude",
82
+ "Longitude", "River Basin", "Total Deaths", "No. Injured",
83
+ "No. Affected", "No. Homeless", "Total Affected",
84
+ "Reconstruction Costs ('000 US$)", "Reconstruction Costs, Adjusted ('000 US$)",
85
+ "Insured Damage ('000 US$)", "Insured Damage, Adjusted ('000 US$)",
86
+ "Total Damage ('000 US$)", "Total Damage, Adjusted ('000 US$)", "CPI",
87
+ "Admin Units",
88
+ ]
89
+
90
+ if selected_row_str is None or selected_row_str == '':
91
+ return ('', '', '', '', '', '', '', None, '', '') + tuple([''] * len(additional_fields))
92
+
93
+ filtered_df = df
94
+ if country:
95
+ filtered_df = filtered_df[filtered_df['Country'] == country]
96
+
97
+ selected_date = try_parse_date(year, month, day)
98
+
99
+ if selected_date:
100
+ filtered_df = filtered_df[filtered_df.apply(
101
+ lambda row: (
102
+ (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'],
103
+ "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
104
+ (try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'],
105
+ "01" if row['End Day'] == "" else row['End Day']) is not None) and
106
+ (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'],
107
+ "01" if row['Start Day'] == "" else row['Start Day']) <= selected_date <=
108
+ try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'],
109
+ "01" if row['End Day'] == "" else row['End Day']))
110
+ ), axis=1)]
111
+ else:
112
+ if year:
113
+ sstart = None
114
+ eend = None
115
+ if month:
116
+ try:
117
+ sstart = try_parse_date(year, month, "01")
118
+ eend = try_parse_date(year, int(float(month)) + 1, "01")
119
+ except Exception as err:
120
+ sstart = None
121
+ eend = None
122
+
123
+ if sstart and eend:
124
+ filtered_df = filtered_df[filtered_df.apply(
125
+ lambda row: (
126
+ (try_parse_date(row['Start Year'],
127
+ "01" if row['Start Month'] == "" else row['Start Month'],
128
+ "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
129
+ (sstart <= try_parse_date(row['Start Year'],
130
+ "01" if row['Start Month'] == "" else row['Start Month'],
131
+ "01" if row['Start Day'] == "" else row['Start Day']) < eend)
132
+ ), axis=1)]
133
+ else:
134
+ try:
135
+ sstart = try_parse_date(year, "01", "01")
136
+ eend = try_parse_date(year, "12", "31")
137
+ except Exception as err:
138
+ sstart = None
139
+ eend = None
140
+
141
+ if sstart and eend:
142
+ filtered_df = filtered_df[filtered_df.apply(
143
+ lambda row: (
144
+ (try_parse_date(row['Start Year'],
145
+ "01" if row['Start Month'] == "" else row['Start Month'],
146
+ "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
147
+ (sstart <= try_parse_date(row['Start Year'],
148
+ "01" if row['Start Month'] == "" else row['Start Month'],
149
+ "01" if row['Start Day'] == "" else row['Start Day']) <= eend)
150
+ ), axis=1)]
151
+
152
+ row_data = filtered_df[filtered_df['DisNo.'] == selected_row_str].squeeze()
153
+
154
+ if not row_data.empty:
155
+ key_information = row_data.get('key information', '')
156
+ severity = row_data.get('severity', '')
157
+ key_drivers = row_data.get('key drivers', '')
158
+ impacts_exposure_vulnerability = row_data.get('main impacts, exposure, and vulnerability', '')
159
+ likelihood_multi_hazard = row_data.get('likelihood of multi-hazard risks', '')
160
+ best_practices = row_data.get('best practices for managing this risk', '')
161
+ recommendations = row_data.get('recommendations and supportive measures for recovery', '')
162
+ causal_graph_caption = row_data.get('causal graph', '')
163
+ grp = ast.literal_eval(causal_graph_caption) if causal_graph_caption else []
164
+ causal_graph_plot = plot_cgraph(grp)
165
+
166
+ start_date = try_parse_date(row_data['Start Year'], row_data['Start Month'], row_data['Start Day'])
167
+ start_date_str = start_date.strftime('%Y-%m-%d') if start_date else 'N/A'
168
+
169
+ end_date = try_parse_date(row_data['End Year'], row_data['End Month'], row_data['End Day'])
170
+ end_date_str = end_date.strftime('%Y-%m-%d') if end_date else 'N/A'
171
+
172
+ additional_data = [row_data.get(field, '') for field in additional_fields]
173
+
174
+ return (
175
+ key_information,
176
+ severity,
177
+ key_drivers,
178
+ impacts_exposure_vulnerability,
179
+ likelihood_multi_hazard,
180
+ best_practices,
181
+ recommendations,
182
+ causal_graph_plot,
183
+ start_date_str,
184
+ end_date_str
185
+ ) + tuple(additional_data)
186
+ else:
187
+ return ('', '', '', '', '', '', '', None, '', '') + tuple([''] * len(additional_fields))
188
+
189
+
190
+ def update_row_dropdown(country, year, month, day):
191
+ filtered_df = df
192
+ if country:
193
+ filtered_df = filtered_df[filtered_df['Country'] == country]
194
+
195
+ selected_date = try_parse_date(year, month, day)
196
+
197
+ if selected_date:
198
+ filtered_df = filtered_df[filtered_df.apply(
199
+ lambda row: (
200
+ (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'],
201
+ "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
202
+ (try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'],
203
+ "01" if row['End Day'] == "" else row['End Day']) is not None) and
204
+ (try_parse_date(row['Start Year'], "01" if row['Start Month'] == "" else row['Start Month'],
205
+ "01" if row['Start Day'] == "" else row['Start Day']) <= selected_date <=
206
+ try_parse_date(row['End Year'], "01" if row['End Month'] == "" else row['End Month'],
207
+ "01" if row['End Day'] == "" else row['End Day']))
208
+ ), axis=1)]
209
+ else:
210
+ if year:
211
+ sstart = None
212
+ eend = None
213
+ if month:
214
+ try:
215
+ sstart = try_parse_date(year, month, "01")
216
+ eend = try_parse_date(year, int(float(month)) + 1, "01")
217
+ except Exception as err:
218
+ sstart = None
219
+ eend = None
220
+
221
+ if sstart and eend:
222
+ filtered_df = filtered_df[filtered_df.apply(
223
+ lambda row: (
224
+ (try_parse_date(row['Start Year'],
225
+ "01" if row['Start Month'] == "" else row['Start Month'],
226
+ "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
227
+ (sstart <= try_parse_date(row['Start Year'],
228
+ "01" if row['Start Month'] == "" else row['Start Month'],
229
+ "01" if row['Start Day'] == "" else row['Start Day']) < eend)
230
+ ), axis=1)]
231
+ else:
232
+ try:
233
+ sstart = try_parse_date(year, "01", "01")
234
+ eend = try_parse_date(year, "12", "31")
235
+ except Exception as err:
236
+ sstart = None
237
+ eend = None
238
+
239
+ if sstart and eend:
240
+ filtered_df = filtered_df[filtered_df.apply(
241
+ lambda row: (
242
+ (try_parse_date(row['Start Year'],
243
+ "01" if row['Start Month'] == "" else row['Start Month'],
244
+ "01" if row['Start Day'] == "" else row['Start Day']) is not None) and
245
+ (sstart <= try_parse_date(row['Start Year'],
246
+ "01" if row['Start Month'] == "" else row['Start Month'],
247
+ "01" if row['Start Day'] == "" else row['Start Day']) <= eend)
248
+ ), axis=1)]
249
+
250
+ choices = filtered_df['DisNo.'].tolist() if not filtered_df.empty else []
251
+ return gr.update(choices=choices, value=choices[0] if choices else None)
252
+
253
+
254
+ def build_interface():
255
+ with gr.Blocks() as interface:
256
+ gr.Markdown("## From Data to Narratives: AI-Enhanced Disaster and Health Threats Storylines")
257
+ gr.Markdown(
258
+ "This Gradio app complements Health Threats and Disaster event data... <br>"
259
+ "Select an event data below..."
260
+ )
261
+
262
+ if not df.empty:
263
+ start_years = df["Start Year"].dropna().unique()
264
+ end_years = df["End Year"].dropna().unique()
265
+ years = set(start_years.astype(int).tolist() + end_years.astype(int).tolist())
266
+ year_choices = sorted(years)
267
+ else:
268
+ year_choices = []
269
+
270
+ country_dropdown = gr.Dropdown(choices=[''] + df['Country'].unique().tolist(), label="Select Country")
271
+ year_dropdown = gr.Dropdown(choices=[""] + [str(year) for year in year_choices], label="Select Year")
272
+ month_dropdown = gr.Dropdown(choices=[""] + [f"{i:02d}" for i in range(1, 13)], label="Select Month")
273
+ day_dropdown = gr.Dropdown(choices=[""] + [f"{i:02d}" for i in range(1, 32)], label="Select Day")
274
+ row_dropdown = gr.Dropdown(choices=[], label="Select Disaster Event #", interactive=True)
275
+
276
+ additional_fields = [
277
+ "Country", "ISO", "Subregion", "Region", "Location", "Origin",
278
+ "Disaster Group", "Disaster Subgroup", "Disaster Type", "Disaster Subtype", "External IDs",
279
+ "Event Name", "Associated Types", "OFDA/BHA Response", "Appeal", "Declaration",
280
+ "AID Contribution ('000 US$)", "Magnitude", "Magnitude Scale", "Latitude",
281
+ "Longitude", "River Basin", "Total Deaths", "No. Injured",
282
+ "No. Affected", "No. Homeless", "Total Affected",
283
+ "Reconstruction Costs ('000 US$)", "Reconstruction Costs, Adjusted ('000 US$)",
284
+ "Insured Damage ('000 US$)", "Insured Damage, Adjusted ('000 US$)",
285
+ "Total Damage ('000 US$)", "Total Damage, Adjusted ('000 US$)", "CPI",
286
+ "Admin Units",
287
+ ]
288
+
289
+ with gr.Row():
290
+ with gr.Column():
291
+ country_dropdown
292
+ year_dropdown
293
+ month_dropdown
294
+ day_dropdown
295
+ row_dropdown
296
+
297
+ outputs = [
298
+ gr.Textbox(label="Key Information", interactive=False),
299
+ gr.Textbox(label="Severity", interactive=False),
300
+ gr.Textbox(label="Key Drivers", interactive=False),
301
+ gr.Textbox(label="Main Impacts, Exposure, and Vulnerability", interactive=False),
302
+ gr.Textbox(label="Likelihood of Multi-Hazard Risks", interactive=False),
303
+ gr.Textbox(label="Best Practices for Managing This Risk", interactive=False),
304
+ gr.Textbox(label="Recommendations and Supportive Measures for Recovery", interactive=False),
305
+ gr.Plot(label="Causal Graph")
306
+ ]
307
+
308
+ with gr.Column():
309
+ outputs.extend([
310
+ gr.Textbox(label="Start Date", interactive=False),
311
+ gr.Textbox(label="End Date", interactive=False)
312
+ ])
313
+ for field in additional_fields:
314
+ outputs.append(gr.Textbox(label=field, interactive=False))
315
+
316
+ country_dropdown.change(
317
+ fn=update_row_dropdown,
318
+ inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
319
+ outputs=row_dropdown
320
+ )
321
+ year_dropdown.change(
322
+ fn=update_row_dropdown,
323
+ inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
324
+ outputs=row_dropdown
325
+ )
326
+ month_dropdown.change(
327
+ fn=update_row_dropdown,
328
+ inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
329
+ outputs=row_dropdown
330
+ )
331
+ day_dropdown.change(
332
+ fn=update_row_dropdown,
333
+ inputs=[country_dropdown, year_dropdown, month_dropdown, day_dropdown],
334
+ outputs=row_dropdown
335
+ )
336
+
337
+ row_dropdown.change(
338
+ fn=display_info,
339
+ inputs=[row_dropdown, country_dropdown, year_dropdown, month_dropdown, day_dropdown],
340
+ outputs=outputs
341
+ )
342
+
343
+ return interface
344
+
345
+
346
+ app = build_interface()
347
+ app.launch()
storylines-env.yml ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: /scratch/consose-011/conda/storylines-env-accel-011
2
+ channels:
3
+ - plotly
4
+ - pytorch
5
+ - nvidia
6
+ - conda-forge
7
+ - defaults
8
+ dependencies:
9
+ - _libgcc_mutex=0.1=main
10
+ - _openmp_mutex=5.1=1_gnu
11
+ - absl-py=2.1.0=pyhd8ed1ab_0
12
+ - archspec=0.2.1=pyhd3eb1b0_0
13
+ - blas=1.0=mkl
14
+ - blessed=1.19.1=pyhe4f9e05_2
15
+ - boltons=23.0.0=py312h06a4308_0
16
+ - bottleneck=1.3.7=py312ha883a20_0
17
+ - brotli=1.0.9=h5eee18b_8
18
+ - brotli-bin=1.0.9=h5eee18b_8
19
+ - brotli-python=1.0.9=py312h6a678d5_7
20
+ - bzip2=1.0.8=h7b6447c_0
21
+ - c-ares=1.19.1=h5eee18b_0
22
+ - ca-certificates=2024.8.30=hbcca054_0
23
+ - certifi=2024.8.30=pyhd8ed1ab_0
24
+ - cffi=1.16.0=py312h5eee18b_0
25
+ - charset-normalizer=2.0.4=pyhd3eb1b0_0
26
+ - click=8.1.7=unix_pyh707e725_0
27
+ - colorama=0.4.6=pyhd8ed1ab_0
28
+ - conda-content-trust=0.2.0=py312h06a4308_0
29
+ - conda-package-handling=2.2.0=py312h06a4308_0
30
+ - conda-package-streaming=0.9.0=py312h06a4308_0
31
+ - contourpy=1.2.0=py312hdb19cb5_0
32
+ - cryptography=41.0.7=py312hdda0065_0
33
+ - cuda-cudart=12.1.105=0
34
+ - cuda-cupti=12.1.105=0
35
+ - cuda-libraries=12.1.0=0
36
+ - cuda-nvrtc=12.1.105=0
37
+ - cuda-nvtx=12.1.105=0
38
+ - cuda-opencl=12.4.127=0
39
+ - cuda-runtime=12.1.0=0
40
+ - cuda-version=10.1=h0a949dd_3
41
+ - cudatoolkit=10.1.243=h036e899_8
42
+ - cycler=0.11.0=pyhd3eb1b0_0
43
+ - cyrus-sasl=2.1.28=h52b45da_1
44
+ - dbus=1.13.18=hb2f20db_0
45
+ - distro=1.8.0=py312h06a4308_0
46
+ - expat=2.5.0=h6a678d5_0
47
+ - ffmpeg=4.3=hf484d3e_0
48
+ - filelock=3.13.1=py312h06a4308_0
49
+ - fmt=9.1.0=hdb19cb5_0
50
+ - fontconfig=2.14.1=h4c34cd2_2
51
+ - fonttools=4.51.0=py312h5eee18b_0
52
+ - freetype=2.12.1=h4a9f257_0
53
+ - glib=2.78.4=h6a678d5_0
54
+ - glib-tools=2.78.4=h6a678d5_0
55
+ - gmp=6.2.1=h295c915_3
56
+ - gnutls=3.6.15=he1e5248_0
57
+ - gpustat=1.1.1=pyhd8ed1ab_0
58
+ - gst-plugins-base=1.14.1=h6a678d5_1
59
+ - gstreamer=1.14.1=h5eee18b_1
60
+ - icu=73.1=h6a678d5_0
61
+ - idna=3.4=py312h06a4308_0
62
+ - importlib-metadata=8.0.0=pyha770c72_0
63
+ - intel-openmp=2023.1.0=hdb19cb5_46306
64
+ - isodate=0.6.1=pyhd8ed1ab_0
65
+ - jinja2=3.1.4=py312h06a4308_0
66
+ - joblib=1.4.2=pyhd8ed1ab_0
67
+ - jpeg=9e=h5eee18b_1
68
+ - jsonpointer=2.1=pyhd3eb1b0_0
69
+ - kiwisolver=1.4.4=py312h6a678d5_0
70
+ - krb5=1.20.1=h143b758_1
71
+ - lame=3.100=h7b6447c_0
72
+ - lcms2=2.12=h3be6417_0
73
+ - ld_impl_linux-64=2.38=h1181459_1
74
+ - lerc=3.0=h295c915_0
75
+ - libabseil=20240116.2=cxx17_h6a678d5_0
76
+ - libarchive=3.6.2=h6ac8c49_2
77
+ - libbrotlicommon=1.0.9=h5eee18b_8
78
+ - libbrotlidec=1.0.9=h5eee18b_8
79
+ - libbrotlienc=1.0.9=h5eee18b_8
80
+ - libclang=14.0.6=default_hc6dbbc7_1
81
+ - libclang13=14.0.6=default_he11475f_1
82
+ - libcublas=12.1.0.26=0
83
+ - libcufft=11.0.2.4=0
84
+ - libcufile=1.9.1.3=0
85
+ - libcups=2.4.2=h2d74bed_1
86
+ - libcurand=10.3.5.147=0
87
+ - libcurl=8.5.0=h251f7ec_0
88
+ - libcusolver=11.4.4.55=0
89
+ - libcusparse=12.0.2.55=0
90
+ - libdeflate=1.17=h5eee18b_1
91
+ - libedit=3.1.20230828=h5eee18b_0
92
+ - libev=4.33=h7f8727e_1
93
+ - libffi=3.4.4=h6a678d5_0
94
+ - libgcc-ng=11.2.0=h1234567_1
95
+ - libglib=2.78.4=hdc74915_0
96
+ - libgomp=11.2.0=h1234567_1
97
+ - libiconv=1.16=h5eee18b_3
98
+ - libidn2=2.3.4=h5eee18b_0
99
+ - libjpeg-turbo=2.0.0=h9bf148f_0
100
+ - libllvm14=14.0.6=hdb19cb5_3
101
+ - libmamba=1.5.3=haf1ee3a_0
102
+ - libmambapy=1.5.3=py312h2dafd23_0
103
+ - libnghttp2=1.57.0=h2d74bed_0
104
+ - libnpp=12.0.2.50=0
105
+ - libnvjitlink=12.1.105=0
106
+ - libnvjpeg=12.1.1.14=0
107
+ - libpng=1.6.39=h5eee18b_0
108
+ - libpq=12.17=hdbd6064_0
109
+ - libprotobuf=4.25.3=he621ea3_0
110
+ - libsolv=0.7.24=he621ea3_0
111
+ - libssh2=1.10.0=hdbd6064_2
112
+ - libstdcxx-ng=11.2.0=h1234567_1
113
+ - libtasn1=4.19.0=h5eee18b_0
114
+ - libtiff=4.5.1=h6a678d5_0
115
+ - libunistring=0.9.10=h27cfd23_0
116
+ - libuuid=1.41.5=h5eee18b_0
117
+ - libwebp-base=1.3.2=h5eee18b_0
118
+ - libxcb=1.15=h7f8727e_0
119
+ - libxkbcommon=1.0.1=h5eee18b_1
120
+ - libxml2=2.10.4=hf1b16e4_1
121
+ - llvm-openmp=14.0.6=h9e868ea_0
122
+ - lz4-c=1.9.4=h6a678d5_0
123
+ - markupsafe=2.1.3=py312h5eee18b_0
124
+ - menuinst=2.0.2=py312h06a4308_0
125
+ - mkl=2023.1.0=h213fc3f_46344
126
+ - mkl-service=2.4.0=py312h5eee18b_1
127
+ - mkl_fft=1.3.8=py312h5eee18b_0
128
+ - mkl_random=1.2.4=py312hdb19cb5_0
129
+ - mpmath=1.3.0=py312h06a4308_0
130
+ - mysql=5.7.24=h721c034_2
131
+ - ncurses=6.4=h6a678d5_0
132
+ - nettle=3.7.3=hbbd107a_1
133
+ - networkx=3.3=py312h06a4308_0
134
+ - nltk=3.8.1=pyhd8ed1ab_0
135
+ - numexpr=2.8.7=py312hf827012_0
136
+ - numpy=1.26.4=py312hc5e2394_0
137
+ - numpy-base=1.26.4=py312h0da6c21_0
138
+ - openh264=2.1.1=h4ff587b_0
139
+ - openjpeg=2.4.0=h9ca470c_2
140
+ - openssl=3.0.14=h5eee18b_0
141
+ - packaging=23.1=py312h06a4308_0
142
+ - pandas=2.2.2=py312h526ad5a_0
143
+ - pcre2=10.42=hebb0a14_0
144
+ - pillow=10.4.0=py312h5eee18b_0
145
+ - pip=23.3.1=py312h06a4308_0
146
+ - platformdirs=3.10.0=py312h06a4308_0
147
+ - plotly=5.24.1=py_0
148
+ - pluggy=1.0.0=py312h06a4308_1
149
+ - ply=3.11=py312h06a4308_1
150
+ - protobuf=4.25.3=py312h12ddb61_0
151
+ - pybind11-abi=4=hd3eb1b0_1
152
+ - pycosat=0.6.6=py312h5eee18b_0
153
+ - pycparser=2.21=pyhd3eb1b0_0
154
+ - pyparsing=3.1.2=pyhd8ed1ab_0
155
+ - pyqt=5.15.10=py312h6a678d5_0
156
+ - pyqt5-sip=12.13.0=py312h5eee18b_0
157
+ - pysocks=1.7.1=py312h06a4308_0
158
+ - python=3.12.1=h996f2a0_0
159
+ - python-benedict=0.34.0=pyhff2d567_0
160
+ - python-dateutil=2.9.0post0=py312h06a4308_2
161
+ - python-fsutil=0.14.1=pyhd8ed1ab_0
162
+ - python-slugify=8.0.4=pyhd8ed1ab_0
163
+ - python-tzdata=2024.1=pyhd8ed1ab_0
164
+ - pytorch=2.3.1=py3.12_cuda12.1_cudnn8.9.2_0
165
+ - pytorch-cuda=12.1=ha16c6d3_5
166
+ - pytorch-mutex=1.0=cuda
167
+ - pytz=2024.1=py312h06a4308_0
168
+ - pyyaml=6.0.1=py312h5eee18b_0
169
+ - qt-main=5.15.2=h53bd1ea_10
170
+ - rdflib=7.0.0=pyhd8ed1ab_0
171
+ - readline=8.2=h5eee18b_0
172
+ - reproc=14.2.4=h295c915_1
173
+ - reproc-cpp=14.2.4=h295c915_1
174
+ - requests=2.32.3=pyhd8ed1ab_0
175
+ - rouge-score=0.1.2=pyhd8ed1ab_0
176
+ - ruamel.yaml=0.17.21=py312h5eee18b_0
177
+ - setuptools=68.2.2=py312h06a4308_0
178
+ - sip=6.7.12=py312h6a678d5_0
179
+ - six=1.16.0=pyhd3eb1b0_1
180
+ - sqlite=3.41.2=h5eee18b_0
181
+ - sympy=1.13.0=pyh04b8f61_3
182
+ - tbb=2021.8.0=hdb19cb5_0
183
+ - text-unidecode=1.3=pyhd8ed1ab_1
184
+ - tk=8.6.12=h1ccaba5_0
185
+ - torchaudio=2.3.1=py312_cu121
186
+ - torchvision=0.18.1=py312_cu121
187
+ - tornado=6.4.1=py312h5eee18b_0
188
+ - tqdm=4.66.4=pyhd8ed1ab_0
189
+ - truststore=0.8.0=py312h06a4308_0
190
+ - typing_extensions=4.11.0=py312h06a4308_0
191
+ - tzdata=2024a=h0c530f3_0
192
+ - unicodedata2=15.1.0=py312h5eee18b_0
193
+ - urllib3=2.1.0=py312h06a4308_1
194
+ - wcwidth=0.2.13=pyhd8ed1ab_0
195
+ - wheel=0.41.2=py312h06a4308_0
196
+ - xz=5.4.5=h5eee18b_0
197
+ - yaml=0.2.5=h7b6447c_0
198
+ - yaml-cpp=0.8.0=h6a678d5_0
199
+ - zipp=3.19.2=pyhd8ed1ab_0
200
+ - zlib=1.2.13=h5eee18b_0
201
+ - zstandard=0.19.0=py312h5eee18b_0
202
+ - zstd=1.5.5=hc292b87_0
203
+ - pip:
204
+ - accelerate==0.32.1
205
+ - aiofiles==23.2.1
206
+ - aiohttp==3.9.5
207
+ - aiosignal==1.3.1
208
+ - annotated-types==0.7.0
209
+ - anyio==4.6.0
210
+ - attrs==23.2.0
211
+ - bert-score==0.3.13
212
+ - bitsandbytes==0.43.3
213
+ - coloredlogs==15.0.1
214
+ - cuda-python==12.4.0
215
+ - datasets==2.20.0
216
+ - deepspeed==0.14.4
217
+ - dill==0.3.8
218
+ - evaluate==0.4.2
219
+ - fastapi==0.115.0
220
+ - ffmpy==0.4.0
221
+ - flatbuffers==24.3.25
222
+ - frozenlist==1.4.1
223
+ - fsspec==2024.5.0
224
+ - gliner==0.2.13
225
+ - gradio==5.0.1
226
+ - gradio-client==1.4.0
227
+ - greenlet==3.1.1
228
+ - h11==0.14.0
229
+ - hjson==3.1.0
230
+ - httpcore==1.0.6
231
+ - httpx==0.27.2
232
+ - huggingface-hub==0.25.2
233
+ - humanfriendly==10.0
234
+ - ipynb-py-convert==0.4.6
235
+ - jiter==0.6.1
236
+ - jsonpatch==1.33
237
+ - langchain==0.3.3
238
+ - langchain-core==0.3.10
239
+ - langchain-openai==0.2.2
240
+ - langchain-text-splitters==0.3.0
241
+ - langsmith==0.1.135
242
+ - markdown-it-py==3.0.0
243
+ - matplotlib==3.9.1.post1
244
+ - mdurl==0.1.2
245
+ - multidict==6.0.5
246
+ - multiprocess==0.70.16
247
+ - ninja==1.11.1.1
248
+ - nvidia-ml-py==12.555.43
249
+ - onnxruntime==1.19.2
250
+ - openai==1.51.2
251
+ - orjson==3.10.7
252
+ - peft==0.12.0
253
+ - psutil==6.0.0
254
+ - py-cpuinfo==9.0.0
255
+ - pyarrow==17.0.0
256
+ - pyarrow-hotfix==0.6
257
+ - pydantic==2.8.2
258
+ - pydantic-core==2.20.1
259
+ - pydantic-settings==2.5.2
260
+ - pydub==0.25.1
261
+ - pygments==2.18.0
262
+ - python-dotenv==1.0.1
263
+ - python-multipart==0.0.12
264
+ - regex==2024.5.15
265
+ - requests-toolbelt==1.0.0
266
+ - rich==13.9.2
267
+ - ruff==0.6.9
268
+ - safetensors==0.4.3
269
+ - scikit-learn==1.5.1
270
+ - scipy==1.14.0
271
+ - semantic-version==2.10.0
272
+ - sentencepiece==0.2.0
273
+ - shellingham==1.5.4
274
+ - sniffio==1.3.1
275
+ - sqlalchemy==2.0.36
276
+ - starlette==0.38.6
277
+ - tenacity==8.5.0
278
+ - threadpoolctl==3.5.0
279
+ - tiktoken==0.8.0
280
+ - tokenizers==0.19.1
281
+ - tomlkit==0.12.0
282
+ - torch-testing==0.0.2
283
+ - transformers==4.42.4
284
+ - typer==0.12.5
285
+ - uvicorn==0.31.1
286
+ - websockets==12.0
287
+ - xxhash==3.4.1
288
+ - yarl==1.9.4
289
+ prefix: /scratch/consose-011/conda/storylines-env-accel-011