getGO007 commited on
Commit
1bfc642
Β·
verified Β·
1 Parent(s): 594bcbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -22
app.py CHANGED
@@ -2,12 +2,13 @@ import os
2
  import streamlit as st
3
  import nest_asyncio
4
 
5
- # ─── PATCH STREAMLIT’S LOOP ──────────────────────────────────────
6
- nest_asyncio.apply() # allow nested awaits on Tornado’s loop
7
  import asyncio
8
- loop = asyncio.get_event_loop() # grab the running Streamlit/Tornado loop
 
9
 
10
- # ─── LlamaIndex & Parser Imports ────────────────────────────────
11
  from llama_index.core import StorageContext, load_index_from_storage
12
  from llama_index.llms.openai import OpenAI
13
  from llama_parse import LlamaParse
@@ -16,7 +17,7 @@ from llama_index.embeddings.openai import OpenAIEmbedding
16
  from llama_index.core.workflow import Event, StartEvent, StopEvent, Workflow, step, Context
17
  from llama_index.core.memory import ChatMemoryBuffer
18
 
19
- # ─── Constants ───────────────────────────────────────────────────
20
  PDF_PATH = "./data/bank-of-america.pdf"
21
  INDEX_DIR = "./index_data"
22
  SYSTEM_PROMPT = (
@@ -24,7 +25,7 @@ SYSTEM_PROMPT = (
24
  "Answer questions ONLY from the indexed document."
25
  )
26
 
27
- # ─── Workflow Definition ─────────────────────────────────────────
28
  class ChatResponseEvent(Event):
29
  response: str
30
  memory: ChatMemoryBuffer
@@ -40,8 +41,7 @@ class ChatWorkflow(Workflow):
40
  system_prompt=ev.system_prompt,
41
  llm=ev.llm
42
  )
43
- # Use sync call inside async stepβ€”but it's fine since it's small;
44
- # you could also `await chat_engine.achat(...)` if available
45
  resp = chat_engine.chat(ev.query)
46
  return ChatResponseEvent(response=resp.response, memory=ev.memory)
47
 
@@ -49,24 +49,31 @@ class ChatWorkflow(Workflow):
49
  async def finalize(self, ev: ChatResponseEvent) -> StopEvent:
50
  return StopEvent(result=ev.response)
51
 
52
- # ─── Streamlit UI & Session State ────────────────────────────────
53
  st.set_page_config(page_title="PDF Chatbot", layout="wide")
54
  st.title("πŸ“„ Chat with Your PDF")
55
 
56
- # 1) Build or load the index once
57
  if "index_ready" not in st.session_state:
58
  os.makedirs(INDEX_DIR, exist_ok=True)
59
  index_meta = os.path.join(INDEX_DIR, "index_store.json")
60
  if os.path.isfile(index_meta):
61
  st.session_state.index_ready = True
62
- st.success("πŸ“š Loaded existing index from index_store.json!")
63
  else:
64
  docs = LlamaParse(
65
  result_type="markdown",
66
- content_guideline_instruction=(
67
- "You are processing a company’s quarterly earnings-call slide deck. "
68
- "For each slide, produce a clearly sectioned Markdown fragment..."
69
- )
 
 
 
 
 
 
 
70
  ).load_data(PDF_PATH)
71
  idx = VectorStoreIndex.from_documents(
72
  docs,
@@ -76,7 +83,7 @@ if "index_ready" not in st.session_state:
76
  st.session_state.index_ready = True
77
  st.success("πŸ“š Indexed your document and created index_store.json!")
78
 
79
- # 2) Initialize memory & workflow
80
  if "memory" not in st.session_state:
81
  st.session_state.memory = ChatMemoryBuffer.from_defaults(
82
  llm=OpenAI(model="gpt-4o"), token_limit=1500
@@ -84,10 +91,13 @@ if "memory" not in st.session_state:
84
  if "workflow" not in st.session_state:
85
  st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
86
 
87
- # 3) User input & async invocation
88
  user_input = st.text_input("Ask a question about the document:")
89
  if user_input:
90
- # Schedule the coroutine on Streamlit's running loop
 
 
 
91
  future = asyncio.run_coroutine_threadsafe(
92
  st.session_state.workflow.run(
93
  index_dir=INDEX_DIR,
@@ -98,14 +108,15 @@ if user_input:
98
  ),
99
  loop
100
  )
101
- # Wait for it to finish (non-blocking at the loop level)
102
- stop_evt: StopEvent = future.result()
103
 
104
- # Update session state & display
 
 
 
105
  st.session_state.memory = stop_evt.memory
106
  st.markdown(f"**Bot:** {stop_evt.result}")
107
 
108
- # 4) End Chat
109
  if st.button("End Chat"):
110
  st.write("Chat ended. Refresh to start over.")
111
  st.stop()
 
2
  import streamlit as st
3
  import nest_asyncio
4
 
5
+ # ─── 1) PATCH STREAMLIT’S EVENT LOOP ─────────────────────────────
6
+ nest_asyncio.apply() # allow nested awaits on Tornado’s loop :contentReference[oaicite:3]{index=3}
7
  import asyncio
8
+ # No new_event_loop / set_event_loop here!
9
+ # We’ll grab the existing loop when we need it.
10
 
11
+ # ─── 2) LlamaIndex & Parser Imports ──────────────────────────────
12
  from llama_index.core import StorageContext, load_index_from_storage
13
  from llama_index.llms.openai import OpenAI
14
  from llama_parse import LlamaParse
 
17
  from llama_index.core.workflow import Event, StartEvent, StopEvent, Workflow, step, Context
18
  from llama_index.core.memory import ChatMemoryBuffer
19
 
20
+ # ─── 3) Constants ─────────────────────────────────────────────────
21
  PDF_PATH = "./data/bank-of-america.pdf"
22
  INDEX_DIR = "./index_data"
23
  SYSTEM_PROMPT = (
 
25
  "Answer questions ONLY from the indexed document."
26
  )
27
 
28
+ # ─── 4) Workflow Definition ────────────────────────────────────────
29
  class ChatResponseEvent(Event):
30
  response: str
31
  memory: ChatMemoryBuffer
 
41
  system_prompt=ev.system_prompt,
42
  llm=ev.llm
43
  )
44
+ # Still using sync .chat(), but you could switch to an async method if available :contentReference[oaicite:4]{index=4}
 
45
  resp = chat_engine.chat(ev.query)
46
  return ChatResponseEvent(response=resp.response, memory=ev.memory)
47
 
 
49
  async def finalize(self, ev: ChatResponseEvent) -> StopEvent:
50
  return StopEvent(result=ev.response)
51
 
52
+ # ─── 5) Streamlit UI & Session State ───────────────────────────────
53
  st.set_page_config(page_title="PDF Chatbot", layout="wide")
54
  st.title("πŸ“„ Chat with Your PDF")
55
 
56
+ # Build or load the index
57
  if "index_ready" not in st.session_state:
58
  os.makedirs(INDEX_DIR, exist_ok=True)
59
  index_meta = os.path.join(INDEX_DIR, "index_store.json")
60
  if os.path.isfile(index_meta):
61
  st.session_state.index_ready = True
62
+ st.success("πŸ“š Loaded existing index!") # reuse existing index
63
  else:
64
  docs = LlamaParse(
65
  result_type="markdown",
66
+ content_guideline_instruction="You are processing a company’s quarterly earnings-call slide deck. "
67
+ "For each slide, produce a clearly sectioned Markdown fragment that includes:\n\n"
68
+ "1. **Slide metadata**: slide number, title, and any subtitle or date\n"
69
+ "2. **Key bullet points**: preserve existing bullets, but rewrite for clarity\n"
70
+ "3. **Tables**: convert any tables into Markdown tables, capturing headers and all rows\n"
71
+ "4. **Charts & graphs**: summarize each chart/graph in prose, highlighting axes labels, trends, and top 3 data points or percentage changes\n"
72
+ "5. **Figures & images**: if there’s a figure caption, include it verbatim; otherwise, describe the visual in one sentence\n"
73
+ "6. **Numeric callouts**: pull out any KPIs (revenue, EPS, growth rates) into a β€œMetrics” subsection\n"
74
+ "7. **Overall slide summary**: a 1–2-sentence plain-English takeaway for the slide’s purpose or conclusion\n\n"
75
+ "Keep the output strictly in Markdown, using headings (`##`, `###`), lists (`-`), and tables syntax. "
76
+ "Do not include any LLM-specific commentary or markdown outside these rules."
77
  ).load_data(PDF_PATH)
78
  idx = VectorStoreIndex.from_documents(
79
  docs,
 
83
  st.session_state.index_ready = True
84
  st.success("πŸ“š Indexed your document and created index_store.json!")
85
 
86
+ # Initialize memory & workflow
87
  if "memory" not in st.session_state:
88
  st.session_state.memory = ChatMemoryBuffer.from_defaults(
89
  llm=OpenAI(model="gpt-4o"), token_limit=1500
 
91
  if "workflow" not in st.session_state:
92
  st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
93
 
94
+ # User input & async scheduling
95
  user_input = st.text_input("Ask a question about the document:")
96
  if user_input:
97
+ # 1) Grab the running loop (patched by nest_asyncio)
98
+ loop = asyncio.get_event_loop() # returns Tornado’s loop :contentReference[oaicite:5]{index=5}
99
+
100
+ # 2) Schedule the workflow.run coroutine on that loop
101
  future = asyncio.run_coroutine_threadsafe(
102
  st.session_state.workflow.run(
103
  index_dir=INDEX_DIR,
 
108
  ),
109
  loop
110
  )
 
 
111
 
112
+ # 3) Wait for the result (non-blocking at the loop level)
113
+ stop_evt: StopEvent = future.result() # avoids run_until_complete errors :contentReference[oaicite:6]{index=6}
114
+
115
+ # 4) Update state & display
116
  st.session_state.memory = stop_evt.memory
117
  st.markdown(f"**Bot:** {stop_evt.result}")
118
 
119
+ # End Chat button
120
  if st.button("End Chat"):
121
  st.write("Chat ended. Refresh to start over.")
122
  st.stop()