Spaces:

getGO007
/

Chat_with_an_earnings_call_slide_deck

Sleeping

App Files Files Community

getGO007 commited on Apr 24

Commit

1bfc642

verified ·

1 Parent(s): 594bcbb

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -22

app.py CHANGED Viewed

@@ -2,12 +2,13 @@ import os
 import streamlit as st
 import nest_asyncio
-# ─── PATCH STREAMLIT’S LOOP ──────────────────────────────────────
-nest_asyncio.apply()                     # allow nested awaits on Tornado’s loop
 import asyncio
-loop = asyncio.get_event_loop()         # grab the running Streamlit/Tornado loop
-# ─── LlamaIndex & Parser Imports ────────────────────────────────
 from llama_index.core import StorageContext, load_index_from_storage
 from llama_index.llms.openai import OpenAI
 from llama_parse import LlamaParse
@@ -16,7 +17,7 @@ from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.core.workflow import Event, StartEvent, StopEvent, Workflow, step, Context
 from llama_index.core.memory import ChatMemoryBuffer
-# ─── Constants ───────────────────────────────────────────────────
 PDF_PATH     = "./data/bank-of-america.pdf"
 INDEX_DIR    = "./index_data"
 SYSTEM_PROMPT = (
@@ -24,7 +25,7 @@ SYSTEM_PROMPT = (
     "Answer questions ONLY from the indexed document."
 )
-# ─── Workflow Definition ─────────────────────────────────────────
 class ChatResponseEvent(Event):
     response: str
     memory: ChatMemoryBuffer
@@ -40,8 +41,7 @@ class ChatWorkflow(Workflow):
             system_prompt=ev.system_prompt,
             llm=ev.llm
         )
-        # Use sync call inside async step—but it's fine since it's small;
-        # you could also `await chat_engine.achat(...)` if available
         resp = chat_engine.chat(ev.query)
         return ChatResponseEvent(response=resp.response, memory=ev.memory)
@@ -49,24 +49,31 @@ class ChatWorkflow(Workflow):
     async def finalize(self, ev: ChatResponseEvent) -> StopEvent:
         return StopEvent(result=ev.response)
-# ─── Streamlit UI & Session State ────────────────────────────────
 st.set_page_config(page_title="PDF Chatbot", layout="wide")
 st.title("📄 Chat with Your PDF")
-# 1) Build or load the index once
 if "index_ready" not in st.session_state:
     os.makedirs(INDEX_DIR, exist_ok=True)
     index_meta = os.path.join(INDEX_DIR, "index_store.json")
     if os.path.isfile(index_meta):
         st.session_state.index_ready = True
-        st.success("📚 Loaded existing index from index_store.json!")
     else:
         docs = LlamaParse(
             result_type="markdown",
-            content_guideline_instruction=(
-                "You are processing a company’s quarterly earnings-call slide deck. "
-                "For each slide, produce a clearly sectioned Markdown fragment..."
-            )
         ).load_data(PDF_PATH)
         idx = VectorStoreIndex.from_documents(
             docs,
@@ -76,7 +83,7 @@ if "index_ready" not in st.session_state:
         st.session_state.index_ready = True
         st.success("📚 Indexed your document and created index_store.json!")
-# 2) Initialize memory & workflow
 if "memory" not in st.session_state:
     st.session_state.memory = ChatMemoryBuffer.from_defaults(
         llm=OpenAI(model="gpt-4o"), token_limit=1500
@@ -84,10 +91,13 @@ if "memory" not in st.session_state:
 if "workflow" not in st.session_state:
     st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
-# 3) User input & async invocation
 user_input = st.text_input("Ask a question about the document:")
 if user_input:
-    # Schedule the coroutine on Streamlit's running loop
     future = asyncio.run_coroutine_threadsafe(
         st.session_state.workflow.run(
             index_dir=INDEX_DIR,
@@ -98,14 +108,15 @@ if user_input:
         ),
         loop
     )
-    # Wait for it to finish (non-blocking at the loop level)
-    stop_evt: StopEvent = future.result()
-    # Update session state & display
     st.session_state.memory = stop_evt.memory
     st.markdown(f"**Bot:** {stop_evt.result}")
-# 4) End Chat
 if st.button("End Chat"):
     st.write("Chat ended. Refresh to start over.")
     st.stop()

 import streamlit as st
 import nest_asyncio
+# ─── 1) PATCH STREAMLIT’S EVENT LOOP ─────────────────────────────
+nest_asyncio.apply()                   # allow nested awaits on Tornado’s loop :contentReference[oaicite:3]{index=3}
 import asyncio
+# No new_event_loop / set_event_loop here!
+# We’ll grab the existing loop when we need it.
+# ─── 2) LlamaIndex & Parser Imports ──────────────────────────────
 from llama_index.core import StorageContext, load_index_from_storage
 from llama_index.llms.openai import OpenAI
 from llama_parse import LlamaParse
 from llama_index.core.workflow import Event, StartEvent, StopEvent, Workflow, step, Context
 from llama_index.core.memory import ChatMemoryBuffer
+# ─── 3) Constants ─────────────────────────────────────────────────
 PDF_PATH     = "./data/bank-of-america.pdf"
 INDEX_DIR    = "./index_data"
 SYSTEM_PROMPT = (
     "Answer questions ONLY from the indexed document."
 )
+# ─── 4) Workflow Definition ────────────────────────────────────────
 class ChatResponseEvent(Event):
     response: str
     memory: ChatMemoryBuffer
             system_prompt=ev.system_prompt,
             llm=ev.llm
         )
+        # Still using sync .chat(), but you could switch to an async method if available :contentReference[oaicite:4]{index=4}
         resp = chat_engine.chat(ev.query)
         return ChatResponseEvent(response=resp.response, memory=ev.memory)
     async def finalize(self, ev: ChatResponseEvent) -> StopEvent:
         return StopEvent(result=ev.response)
+# ─── 5) Streamlit UI & Session State ───────────────────────────────
 st.set_page_config(page_title="PDF Chatbot", layout="wide")
 st.title("📄 Chat with Your PDF")
+# Build or load the index
 if "index_ready" not in st.session_state:
     os.makedirs(INDEX_DIR, exist_ok=True)
     index_meta = os.path.join(INDEX_DIR, "index_store.json")
     if os.path.isfile(index_meta):
         st.session_state.index_ready = True
+        st.success("📚 Loaded existing index!")  # reuse existing index
     else:
         docs = LlamaParse(
             result_type="markdown",
+            content_guideline_instruction="You are processing a company’s quarterly earnings-call slide deck. "
+                                          "For each slide, produce a clearly sectioned Markdown fragment that includes:\n\n"
+                                          "1. **Slide metadata**: slide number, title, and any subtitle or date\n"
+                                          "2. **Key bullet points**: preserve existing bullets, but rewrite for clarity\n"
+                                          "3. **Tables**: convert any tables into Markdown tables, capturing headers and all rows\n"
+                                          "4. **Charts & graphs**: summarize each chart/graph in prose, highlighting axes labels, trends, and top 3 data points or percentage changes\n"
+                                          "5. **Figures & images**: if there’s a figure caption, include it verbatim; otherwise, describe the visual in one sentence\n"
+                                          "6. **Numeric callouts**: pull out any KPIs (revenue, EPS, growth rates) into a “Metrics” subsection\n"
+                                          "7. **Overall slide summary**: a 1–2-sentence plain-English takeaway for the slide’s purpose or conclusion\n\n"
+                                          "Keep the output strictly in Markdown, using headings (`##`, `###`), lists (`-`), and tables syntax. "
+                                          "Do not include any LLM-specific commentary or markdown outside these rules."
         ).load_data(PDF_PATH)
         idx = VectorStoreIndex.from_documents(
             docs,
         st.session_state.index_ready = True
         st.success("📚 Indexed your document and created index_store.json!")
+# Initialize memory & workflow
 if "memory" not in st.session_state:
     st.session_state.memory = ChatMemoryBuffer.from_defaults(
         llm=OpenAI(model="gpt-4o"), token_limit=1500
 if "workflow" not in st.session_state:
     st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
+# User input & async scheduling
 user_input = st.text_input("Ask a question about the document:")
 if user_input:
+    # 1) Grab the running loop (patched by nest_asyncio)
+    loop = asyncio.get_event_loop()  # returns Tornado’s loop :contentReference[oaicite:5]{index=5}
+    # 2) Schedule the workflow.run coroutine on that loop
     future = asyncio.run_coroutine_threadsafe(
         st.session_state.workflow.run(
             index_dir=INDEX_DIR,
         ),
         loop
     )
+    # 3) Wait for the result (non-blocking at the loop level)
+    stop_evt: StopEvent = future.result()  # avoids run_until_complete errors :contentReference[oaicite:6]{index=6}
+    # 4) Update state & display
     st.session_state.memory = stop_evt.memory
     st.markdown(f"**Bot:** {stop_evt.result}")
+# End Chat button
 if st.button("End Chat"):
     st.write("Chat ended. Refresh to start over.")
     st.stop()