Update app.py
Browse files
app.py
CHANGED
@@ -2,12 +2,13 @@ import os
|
|
2 |
import streamlit as st
|
3 |
import nest_asyncio
|
4 |
|
5 |
-
# βββ PATCH STREAMLITβS LOOP
|
6 |
-
nest_asyncio.apply()
|
7 |
import asyncio
|
8 |
-
|
|
|
9 |
|
10 |
-
# βββ LlamaIndex & Parser Imports
|
11 |
from llama_index.core import StorageContext, load_index_from_storage
|
12 |
from llama_index.llms.openai import OpenAI
|
13 |
from llama_parse import LlamaParse
|
@@ -16,7 +17,7 @@ from llama_index.embeddings.openai import OpenAIEmbedding
|
|
16 |
from llama_index.core.workflow import Event, StartEvent, StopEvent, Workflow, step, Context
|
17 |
from llama_index.core.memory import ChatMemoryBuffer
|
18 |
|
19 |
-
# βββ Constants
|
20 |
PDF_PATH = "./data/bank-of-america.pdf"
|
21 |
INDEX_DIR = "./index_data"
|
22 |
SYSTEM_PROMPT = (
|
@@ -24,7 +25,7 @@ SYSTEM_PROMPT = (
|
|
24 |
"Answer questions ONLY from the indexed document."
|
25 |
)
|
26 |
|
27 |
-
# βββ Workflow Definition
|
28 |
class ChatResponseEvent(Event):
|
29 |
response: str
|
30 |
memory: ChatMemoryBuffer
|
@@ -40,8 +41,7 @@ class ChatWorkflow(Workflow):
|
|
40 |
system_prompt=ev.system_prompt,
|
41 |
llm=ev.llm
|
42 |
)
|
43 |
-
#
|
44 |
-
# you could also `await chat_engine.achat(...)` if available
|
45 |
resp = chat_engine.chat(ev.query)
|
46 |
return ChatResponseEvent(response=resp.response, memory=ev.memory)
|
47 |
|
@@ -49,24 +49,31 @@ class ChatWorkflow(Workflow):
|
|
49 |
async def finalize(self, ev: ChatResponseEvent) -> StopEvent:
|
50 |
return StopEvent(result=ev.response)
|
51 |
|
52 |
-
# βββ Streamlit UI & Session State
|
53 |
st.set_page_config(page_title="PDF Chatbot", layout="wide")
|
54 |
st.title("π Chat with Your PDF")
|
55 |
|
56 |
-
#
|
57 |
if "index_ready" not in st.session_state:
|
58 |
os.makedirs(INDEX_DIR, exist_ok=True)
|
59 |
index_meta = os.path.join(INDEX_DIR, "index_store.json")
|
60 |
if os.path.isfile(index_meta):
|
61 |
st.session_state.index_ready = True
|
62 |
-
st.success("π Loaded existing index
|
63 |
else:
|
64 |
docs = LlamaParse(
|
65 |
result_type="markdown",
|
66 |
-
content_guideline_instruction=
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
).load_data(PDF_PATH)
|
71 |
idx = VectorStoreIndex.from_documents(
|
72 |
docs,
|
@@ -76,7 +83,7 @@ if "index_ready" not in st.session_state:
|
|
76 |
st.session_state.index_ready = True
|
77 |
st.success("π Indexed your document and created index_store.json!")
|
78 |
|
79 |
-
#
|
80 |
if "memory" not in st.session_state:
|
81 |
st.session_state.memory = ChatMemoryBuffer.from_defaults(
|
82 |
llm=OpenAI(model="gpt-4o"), token_limit=1500
|
@@ -84,10 +91,13 @@ if "memory" not in st.session_state:
|
|
84 |
if "workflow" not in st.session_state:
|
85 |
st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
|
86 |
|
87 |
-
#
|
88 |
user_input = st.text_input("Ask a question about the document:")
|
89 |
if user_input:
|
90 |
-
#
|
|
|
|
|
|
|
91 |
future = asyncio.run_coroutine_threadsafe(
|
92 |
st.session_state.workflow.run(
|
93 |
index_dir=INDEX_DIR,
|
@@ -98,14 +108,15 @@ if user_input:
|
|
98 |
),
|
99 |
loop
|
100 |
)
|
101 |
-
# Wait for it to finish (non-blocking at the loop level)
|
102 |
-
stop_evt: StopEvent = future.result()
|
103 |
|
104 |
-
#
|
|
|
|
|
|
|
105 |
st.session_state.memory = stop_evt.memory
|
106 |
st.markdown(f"**Bot:** {stop_evt.result}")
|
107 |
|
108 |
-
#
|
109 |
if st.button("End Chat"):
|
110 |
st.write("Chat ended. Refresh to start over.")
|
111 |
st.stop()
|
|
|
2 |
import streamlit as st
|
3 |
import nest_asyncio
|
4 |
|
5 |
+
# βββ 1) PATCH STREAMLITβS EVENT LOOP βββββββββββββββββββββββββββββ
|
6 |
+
nest_asyncio.apply() # allow nested awaits on Tornadoβs loop :contentReference[oaicite:3]{index=3}
|
7 |
import asyncio
|
8 |
+
# No new_event_loop / set_event_loop here!
|
9 |
+
# Weβll grab the existing loop when we need it.
|
10 |
|
11 |
+
# βββ 2) LlamaIndex & Parser Imports ββββββββββββββββββββββββββββββ
|
12 |
from llama_index.core import StorageContext, load_index_from_storage
|
13 |
from llama_index.llms.openai import OpenAI
|
14 |
from llama_parse import LlamaParse
|
|
|
17 |
from llama_index.core.workflow import Event, StartEvent, StopEvent, Workflow, step, Context
|
18 |
from llama_index.core.memory import ChatMemoryBuffer
|
19 |
|
20 |
+
# βββ 3) Constants βββββββββββββββββββββββββββββββββββββββββββββββββ
|
21 |
PDF_PATH = "./data/bank-of-america.pdf"
|
22 |
INDEX_DIR = "./index_data"
|
23 |
SYSTEM_PROMPT = (
|
|
|
25 |
"Answer questions ONLY from the indexed document."
|
26 |
)
|
27 |
|
28 |
+
# βββ 4) Workflow Definition ββββββββββββββββββββββββββββββββββββββββ
|
29 |
class ChatResponseEvent(Event):
|
30 |
response: str
|
31 |
memory: ChatMemoryBuffer
|
|
|
41 |
system_prompt=ev.system_prompt,
|
42 |
llm=ev.llm
|
43 |
)
|
44 |
+
# Still using sync .chat(), but you could switch to an async method if available :contentReference[oaicite:4]{index=4}
|
|
|
45 |
resp = chat_engine.chat(ev.query)
|
46 |
return ChatResponseEvent(response=resp.response, memory=ev.memory)
|
47 |
|
|
|
49 |
async def finalize(self, ev: ChatResponseEvent) -> StopEvent:
|
50 |
return StopEvent(result=ev.response)
|
51 |
|
52 |
+
# βββ 5) Streamlit UI & Session State βββββββββββββββββββββββββββββββ
|
53 |
st.set_page_config(page_title="PDF Chatbot", layout="wide")
|
54 |
st.title("π Chat with Your PDF")
|
55 |
|
56 |
+
# Build or load the index
|
57 |
if "index_ready" not in st.session_state:
|
58 |
os.makedirs(INDEX_DIR, exist_ok=True)
|
59 |
index_meta = os.path.join(INDEX_DIR, "index_store.json")
|
60 |
if os.path.isfile(index_meta):
|
61 |
st.session_state.index_ready = True
|
62 |
+
st.success("π Loaded existing index!") # reuse existing index
|
63 |
else:
|
64 |
docs = LlamaParse(
|
65 |
result_type="markdown",
|
66 |
+
content_guideline_instruction="You are processing a companyβs quarterly earnings-call slide deck. "
|
67 |
+
"For each slide, produce a clearly sectioned Markdown fragment that includes:\n\n"
|
68 |
+
"1. **Slide metadata**: slide number, title, and any subtitle or date\n"
|
69 |
+
"2. **Key bullet points**: preserve existing bullets, but rewrite for clarity\n"
|
70 |
+
"3. **Tables**: convert any tables into Markdown tables, capturing headers and all rows\n"
|
71 |
+
"4. **Charts & graphs**: summarize each chart/graph in prose, highlighting axes labels, trends, and top 3 data points or percentage changes\n"
|
72 |
+
"5. **Figures & images**: if thereβs a figure caption, include it verbatim; otherwise, describe the visual in one sentence\n"
|
73 |
+
"6. **Numeric callouts**: pull out any KPIs (revenue, EPS, growth rates) into a βMetricsβ subsection\n"
|
74 |
+
"7. **Overall slide summary**: a 1β2-sentence plain-English takeaway for the slideβs purpose or conclusion\n\n"
|
75 |
+
"Keep the output strictly in Markdown, using headings (`##`, `###`), lists (`-`), and tables syntax. "
|
76 |
+
"Do not include any LLM-specific commentary or markdown outside these rules."
|
77 |
).load_data(PDF_PATH)
|
78 |
idx = VectorStoreIndex.from_documents(
|
79 |
docs,
|
|
|
83 |
st.session_state.index_ready = True
|
84 |
st.success("π Indexed your document and created index_store.json!")
|
85 |
|
86 |
+
# Initialize memory & workflow
|
87 |
if "memory" not in st.session_state:
|
88 |
st.session_state.memory = ChatMemoryBuffer.from_defaults(
|
89 |
llm=OpenAI(model="gpt-4o"), token_limit=1500
|
|
|
91 |
if "workflow" not in st.session_state:
|
92 |
st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
|
93 |
|
94 |
+
# User input & async scheduling
|
95 |
user_input = st.text_input("Ask a question about the document:")
|
96 |
if user_input:
|
97 |
+
# 1) Grab the running loop (patched by nest_asyncio)
|
98 |
+
loop = asyncio.get_event_loop() # returns Tornadoβs loop :contentReference[oaicite:5]{index=5}
|
99 |
+
|
100 |
+
# 2) Schedule the workflow.run coroutine on that loop
|
101 |
future = asyncio.run_coroutine_threadsafe(
|
102 |
st.session_state.workflow.run(
|
103 |
index_dir=INDEX_DIR,
|
|
|
108 |
),
|
109 |
loop
|
110 |
)
|
|
|
|
|
111 |
|
112 |
+
# 3) Wait for the result (non-blocking at the loop level)
|
113 |
+
stop_evt: StopEvent = future.result() # avoids run_until_complete errors :contentReference[oaicite:6]{index=6}
|
114 |
+
|
115 |
+
# 4) Update state & display
|
116 |
st.session_state.memory = stop_evt.memory
|
117 |
st.markdown(f"**Bot:** {stop_evt.result}")
|
118 |
|
119 |
+
# End Chat button
|
120 |
if st.button("End Chat"):
|
121 |
st.write("Chat ended. Refresh to start over.")
|
122 |
st.stop()
|