Spaces:

mtwesley
/

boston-school-choice

Sleeping

App Files Files Community

mtwesley commited on Apr 21

Commit

252940e

1 Parent(s): 6d429bd

removing too many comments

Browse files

Files changed (2) hide show

.gitignore +2 -0
app.py +40 -116

.gitignore CHANGED Viewed

@@ -20,6 +20,8 @@ wheels/
 .installed.cfg
 *.egg
 # Virtual Environment
 venv/
 env/

 .installed.cfg
 *.egg
+old/*
 # Virtual Environment
 venv/
 env/

app.py CHANGED Viewed

@@ -2,12 +2,12 @@ import streamlit as st
 import os
 import re
 import json
-import demjson3 # Using demjson3 for potentially less strict JSON parsing
 import requests
 import faiss
 import numpy as np
 import multiprocessing
-import time # For adding slight delay if needed
 from huggingface_hub import hf_hub_download, login
 from sentence_transformers import SentenceTransformer
@@ -31,27 +31,25 @@ except KeyError:
 # Model and RAG configuration
 MODEL_REPO_ID = "bartowski/gemma-2-2b-it-GGUF"
-MODEL_FILENAME = "gemma-2-2b-it-Q8_0.gguf" # Using Q8 quantization from notebook
 EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
 DOCS_PATH = "docs"
-FAISS_INDEX_PATH = "bps_faiss.index" # Path to save/load the index
 # LLM parameters
-N_CTX = 2048 # Context window size
-MAX_TOKENS_RESPONSE = 350 # Max tokens for the LLM response generation
-TEMPERATURE = 0.5 # LLM temperature
-N_THREADS = multiprocessing.cpu_count() - 1 # Use half the CPU cores
 # RAG parameters
-TOP_K_DOCS = 3 # Number of relevant document chunks to retrieve
 # Import prompts from prompts.py
-# NOTE: Error message here is for app stability, not part of LLM prompts.
 try:
     from prompts import system_prompt, json_prompt, initial_school_search_prompt
 except ImportError:
     st.error("Could not import prompts from prompts.py. Make sure the file exists.")
-    # Define fallbacks directly if import fails - Using exact text from notebook prompts
     system_prompt = """
 You are a professional assistant that answers questions about enrollment in Boston Public Schools.
 Be friendly and helpful. Families will ask questions and provide information, such as the child's residence, grade, and school preference.
@@ -66,19 +64,15 @@ Keep the conversation going and ask questions one at a time until you have all i
     st.stop()
-# --- Helper Functions (ported from Notebook) ---
 def clean_reply_text(reply: str) -> str:
     """Removes potential JSON blocks and cleans up common LLM artifacts."""
-    # Remove ```json ... ``` blocks or similar markdown code blocks
     reply = re.sub(r"```[jJ][sS][oO][nN]?\s*(\{.*?\})\s*```", "", reply, flags=re.DOTALL)
-    # Remove trailing JSON object if it's at the very end after potential whitespace
     reply = re.sub(r"\s*\{.*\}\s*$", "", reply, flags=re.DOTALL)
-    # Remove stray backticks, `json` keywords, and unmatched brackets
     reply = re.sub(r"`", "", reply)
     reply = re.sub(r"(?i)\bjson\b", "", reply)
-    reply = re.sub(r"[\[\]]", "", reply) # Remove only square brackets if dangling
-    # Collapse multiple blank lines
     reply = re.sub(r"\n{2,}", "\n", reply)
     return reply.strip()
@@ -88,12 +82,10 @@ def extract_reply_and_json(text: str) -> tuple[str, dict]:
     Uses demjson3 for potentially more lenient parsing.
     """
     json_part = {}
-    reply_part = text # Default reply is the whole text initially
-    # Find the last potential JSON object (heuristic: starts with { ends with })
     last_brace_open = text.rfind('{')
     if last_brace_open != -1:
-        # Try to find the matching closing brace
         brace_level = 0
         last_brace_close = -1
         potential_json_str = text[last_brace_open:]
@@ -109,47 +101,34 @@ def extract_reply_and_json(text: str) -> tuple[str, dict]:
         if last_brace_close != -1:
             json_str = text[last_brace_open : last_brace_close + 1]
             try:
-                # Use demjson3 to decode
                 parsed = demjson3.decode(json_str)
                 if isinstance(parsed, dict):
                     json_part = parsed
-                    # If JSON is successfully parsed, assume text before it is the reply
                     reply_part = text[:last_brace_open].strip()
-                else:
-                    # Parsed but not a dict, might be noise, keep original reply
-                    pass
-            except demjson3.JSONDecodeError as e:
-                # Decoding failed, assume it wasn't valid JSON
-                # print(f"JSON decode failed: {e}")
-                # print(f"Offending string segment:\n{json_str}")
-                pass # Keep original reply_part and empty json_part
-    # Clean the reply part further
     cleaned_reply = clean_reply_text(reply_part)
-    # --- MODIFICATION: Removed the fallback message I added ---
-    # If cleaning removed everything, return empty string for reply.
-    # The notebook didn't specify fallback behavior here.
     if not cleaned_reply and json_part:
          cleaned_reply = clean_reply_text(text[:last_brace_open])
     elif not cleaned_reply and not json_part:
-         cleaned_reply = "" # Return empty if nothing is left
     return cleaned_reply, json_part
 def geocode_address(address: str) -> tuple[float | None, float | None]:
     """Turn a free‑form address into (lat, lon) using Geoapify."""
-    # NOTE: Error/warning messages here are for app stability/user feedback, not part of LLM prompts.
     if not GEOAPIFY_KEY:
         return None, None
     try:
         resp = requests.get(
             "https://api.geoapify.com/v1/geocode/search",
             params={"text": address, "limit": 1, "apiKey": GEOAPIFY_KEY},
-            timeout=10 # Add a timeout
         )
-        resp.raise_for_status() # Raise an exception for bad status codes
         features = resp.json().get("features", [])
         if not features:
             return None, None
@@ -164,7 +143,6 @@ def geocode_address(address: str) -> tuple[float | None, float | None]:
 def get_nearby_schools(address: str, radius: int = 2000, limit: int = 10) -> list[dict]:
     """Get nearby schools using Geoapify."""
-    # NOTE: Error/warning messages here are for app stability/user feedback, not part of LLM prompts.
     if not GEOAPIFY_KEY:
         return []
@@ -177,12 +155,12 @@ def get_nearby_schools(address: str, radius: int = 2000, limit: int = 10) -> lis
         resp = requests.get(
             "https://api.geoapify.com/v2/places",
             params={
-                "categories": "education.school", # Use the specific category
                 "filter": f"circle:{lon},{lat},{radius}",
                 "limit": limit,
                 "apiKey": GEOAPIFY_KEY,
             },
-            timeout=10 # Add a timeout
         )
         resp.raise_for_status()
@@ -190,7 +168,7 @@ def get_nearby_schools(address: str, radius: int = 2000, limit: int = 10) -> lis
         for feat in resp.json().get("features", []):
             prop = feat.get("properties", {})
             name = prop.get("name")
-            addr = prop.get("formatted") # Full address string
             if name and addr:
                 schools.append({"name": name, "address": addr})
         return schools
@@ -204,17 +182,14 @@ def get_nearby_schools(address: str, radius: int = 2000, limit: int = 10) -> lis
 def build_school_search_prompt(address: str) -> str:
     """Builds the prompt section listing nearby schools."""
     if not address:
-        # Use the exact initial prompt text from prompts.py (originating from notebook)
         return initial_school_search_prompt
     nearby_schools = get_nearby_schools(address, radius=2000, limit=10)
     if not nearby_schools:
-        # This text is generated dynamically based on API results, not a fixed prompt string.
         return f"No schools found near '{address}'. Please ensure the address is correct or try a broader area if applicable."
     school_list_str = "\n".join(f"- {s['name']}: {s['address']}" for s in nearby_schools)
-    # This text is also generated dynamically.
     return (
         f"Based on the residence '{address}', here are some nearby schools:\n{school_list_str}\n\n"
         "Use this information and the provided documents to answer eligibility questions for the user's grade level."
@@ -224,35 +199,27 @@ def update_context(context_json: dict, new_data: dict) -> tuple[dict, bool]:
     """
     Updates context_json in-place based on new_data extracted from LLM response.
     Returns the updated context and a boolean indicating if residence changed.
-    (Logic directly based on notebook implementation)
     """
     residence_changed = False
     current_res = context_json.get("residence", "").strip()
     new_res = new_data.get("residence", "").strip()
-    # Update residence only if it's new and different
     if new_res and new_res != current_res:
         context_json["residence"] = new_res
         residence_changed = True
-    # Handle case where residence might be explicitly cleared in new_data
     elif "residence" in new_data and not new_res and current_res:
          context_json["residence"] = ""
-         residence_changed = True # Clearing is also a change
-    # Update other fields (skip 'residence') if they exist in new_data and are different
     for key, value in new_data.items():
         if key != "residence":
-            # Ensure comparison handles various types by converting to string for check
             new_val_str = str(value).strip() if value is not None else ""
             old_val_str = str(context_json.get(key, "")).strip()
-            # Update if new value is provided and different from old value
             if new_val_str and new_val_str != old_val_str:
-                 context_json[key] = value # Store original type from new_data
-            # Update if key is in new_data, new value is empty, but old value was not
             elif key in new_data and not new_val_str and old_val_str:
-                 # Handle explicit clearing of other fields
-                 context_json[key] = "" # Store empty string or appropriate null value
     return context_json, residence_changed
@@ -261,7 +228,6 @@ def update_context(context_json: dict, new_data: dict) -> tuple[dict, bool]:
 @st.cache_resource
 def load_embedding_model():
     """Loads the Sentence Transformer model."""
-    # NOTE: Error message here is for app stability, not part of LLM prompts.
     try:
         return SentenceTransformer(EMBEDDING_MODEL_NAME)
     except Exception as e:
@@ -271,7 +237,6 @@ def load_embedding_model():
 @st.cache_data
 def load_documents(docs_path: str) -> tuple[list[str], list[str]]:
     """Loads text documents from the specified directory."""
-    # NOTE: Error/warning messages here are for app stability, not part of LLM prompts.
     doc_texts = []
     filenames = []
     if not os.path.isdir(docs_path):
@@ -297,7 +262,6 @@ def load_documents(docs_path: str) -> tuple[list[str], list[str]]:
 @st.cache_resource(show_spinner="Creating document embeddings and FAISS index...")
 def create_faiss_index(_embedder, doc_texts):
     """Creates FAISS index from document texts."""
-    # NOTE: Error messages here are for app stability, not part of LLM prompts.
     if not doc_texts:
         return None
     try:
@@ -306,12 +270,10 @@ def create_faiss_index(_embedder, doc_texts):
              st.error("Embedding failed, no document embeddings generated.")
              return None
-        # Normalize embeddings for Inner Product (IP) search
         faiss.normalize_L2(doc_embeddings)
         dimension = doc_embeddings.shape[1]
-        index = faiss.IndexFlatIP(dimension) # Using Inner Product
         index.add(doc_embeddings)
-        # Option to save/load index can be added here if needed
         return index
     except Exception as e:
         st.error(f"Error creating FAISS index: {e}")
@@ -319,7 +281,6 @@ def create_faiss_index(_embedder, doc_texts):
 def query_docs(query: str, _index, _embedder, doc_texts, top_k=TOP_K_DOCS) -> list[str]:
     """Queries the FAISS index to retrieve relevant document chunks."""
-    # NOTE: Error/warning messages here are for app stability, not part of LLM prompts.
     if _index is None or not doc_texts:
         return []
     try:
@@ -327,10 +288,9 @@ def query_docs(query: str, _index, _embedder, doc_texts, top_k=TOP_K_DOCS) -> li
         if query_embedding is None or query_embedding.shape[0] == 0:
              st.warning("Failed to generate query embedding.")
              return []
-        faiss.normalize_L2(query_embedding) # Normalize query embedding
         distances, indices = _index.search(query_embedding, top_k)
-        # Return the text of the k nearest neighbors
         return [doc_texts[i] for i in indices[0] if i != -1]
     except Exception as e:
         st.error(f"Error querying FAISS index: {e}")
@@ -345,8 +305,8 @@ def load_llm():
         model_path = hf_hub_download(
             repo_id=MODEL_REPO_ID,
             filename=MODEL_FILENAME,
-            local_dir="models", # Download to a local 'models' directory
-            local_dir_use_symlinks=False # Avoid symlinks issues in some environments
         )
         st.success(f"Model found at: {model_path}")
     except Exception as e:
@@ -359,7 +319,7 @@ def load_llm():
             model_path=model_path,
             n_ctx=N_CTX,
             n_threads=N_THREADS,
-            verbose=False # Set to True for more llama.cpp logging
         )
         return llm
     except Exception as e:
@@ -370,43 +330,34 @@ def load_llm():
 def build_full_prompt(
     context_json: dict,
-    school_search_prompt: str, # This now correctly uses initial_school_search_prompt from prompts.py when address is missing
     history: list[dict],
-    max_history=5 # Keep last 5 turns (user + assistant)
 ) -> str:
-    """Builds the final prompt string for the LLM, using exact prompt texts where specified."""
-    # 1. Get the latest user input from history
     last_user_input = ""
     if history and history[-1]["role"] == "user":
         last_user_input = history[-1]["content"]
-    # 2. Create a query string for RAG (last user input + context summary)
     summary_info = context_json.get("summary", "")
     rag_query = f"{last_user_input}\n\nContext Summary: {summary_info}".strip()
-    # 3. Retrieve relevant documents
     retrieved_docs = query_docs(rag_query, faiss_index, embedder, doc_texts_global, top_k=TOP_K_DOCS)
     docs_context_str = "\n\n---\n\n".join(retrieved_docs)
     if docs_context_str:
-        # This text is dynamically generated based on RAG results
         docs_context_str = f"DOCUMENT CONTEXT:\n{docs_context_str}\n---"
     else:
-        # This text is dynamically generated
         docs_context_str = "DOCUMENT CONTEXT: None available."
-    # 4. Format conversation history
-    recent_history = history[-(max_history * 2):] # Get last N turns
     conversation = []
     for msg in recent_history:
         role = "User" if msg["role"] == "user" else "Assistant"
-        # History content comes directly from user input or previous LLM output
         conversation.append(f"{role}: {msg['content']}")
     conversation_str = "\n".join(conversation)
-    # 5. Assemble the final prompt using exact texts from prompts.py where applicable
     prompt = f"""{system_prompt}
 {docs_context_str}
@@ -421,112 +372,85 @@ SCHOOL SEARCH INFO:
 CONVERSATION HISTORY:
 {conversation_str}
-Assistant:""" # Note: Ends with "Assistant:", prompting the model to respond
     return prompt
 # --- Streamlit App UI and Logic ---
-# NOTE: UI text here is for the Streamlit interface, not part of LLM prompts.
 st.set_page_config(page_title="Boston School Choice Chatbot", page_icon="🏫", layout="wide")
 st.title("Boston Public Schools Enrollment Assistant 🏫")
 st.markdown("Ask questions about enrolling in Boston Public Schools. I can help find nearby schools if you provide a residence address.")
-# Load models and data
 llm = load_llm()
 embedder = load_embedding_model()
-doc_texts_global, filenames_global = load_documents(DOCS_PATH) # Load once
 faiss_index = create_faiss_index(embedder, doc_texts_global)
-# Initialize session state
 if "messages" not in st.session_state:
-    st.session_state.messages = [] # Stores chat history
 if "context_json" not in st.session_state:
-    # Initial context structure from notebook
     st.session_state.context_json = {
         "residence": "",
         "grade": "",
         "school_choice": "",
-        "summary": "" # LLM can update this summary
     }
 if "school_search" not in st.session_state:
-    # Use the exact initial prompt text from prompts.py
     st.session_state.school_search = initial_school_search_prompt
-# Display chat messages from history
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
-# --- Main Chat Loop ---
-# NOTE: UI text here (chat_input prompt) is for the Streamlit interface.
 if prompt := st.chat_input("What is your question? (e.g., 'I live at 123 Main St, my child is going into grade 2')"):
-    # Add user message to history and display it
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
-    # Prepare and generate response
     with st.chat_message("assistant"):
         message_placeholder = st.empty()
-        # NOTE: UI text here is for the Streamlit interface.
         message_placeholder.markdown("Thinking...")
-        # Build the prompt using current state and exact prompt texts
         full_prompt = build_full_prompt(
             st.session_state.context_json,
             st.session_state.school_search,
             st.session_state.messages
         )
-        # Display prompt for debugging if needed (optional)
-        # with st.expander("DEBUG: View Full Prompt"):
-        #     st.text(full_prompt)
         try:
-            # Call the LLM
             response = llm(
                 full_prompt,
                 max_tokens=MAX_TOKENS_RESPONSE,
                 temperature=TEMPERATURE,
-                stop=["\nUser:", "\nAssistant:", "<|end_header_id|>", "<|eot_id|>"], # Stop tokens
-                echo=False # Don't echo the prompt in the output
             )
             raw_output = response["choices"][0]["text"].strip()
-            # Parse the response using the corrected function
             reply_text, new_data = extract_reply_and_json(raw_output)
-            # Update context JSON state using the notebook's logic
             updated_context, residence_changed = update_context(st.session_state.context_json, new_data)
             st.session_state.context_json = updated_context
-            # If residence changed, update the school search prompt text for the *next* turn
             if residence_changed:
-                # This calls build_school_search_prompt which uses initial_school_search_prompt if address is now empty
                 st.session_state.school_search = build_school_search_prompt(st.session_state.context_json.get("residence", ""))
-            # Display the final reply from LLM (or empty string if parsing failed)
-            message_placeholder.markdown(reply_text if reply_text else "_Assistant had trouble generating a response._") # Provide minimal feedback if reply is empty
-            # Add assistant response (or empty string) to history
             st.session_state.messages.append({"role": "assistant", "content": reply_text})
         except Exception as e:
-            # NOTE: Error message here is for app stability, not part of LLM prompts.
             st.error(f"An error occurred during response generation: {e}")
             error_message = "Sorry, I encountered an error processing your request."
             message_placeholder.markdown(error_message)
             st.session_state.messages.append({"role": "assistant", "content": error_message})
-# Optional: Display current context JSON for debugging
-# NOTE: UI text here is for the Streamlit interface.
 with st.sidebar:
     st.subheader("ℹ️ Current Context")
     st.json(st.session_state.context_json)
     st.subheader("🏫 School Search Status")
-    # Display the current text being used for the school search part of the prompt
     st.text(st.session_state.school_search)

 import os
 import re
 import json
+import demjson3
 import requests
 import faiss
 import numpy as np
 import multiprocessing
+import time
 from huggingface_hub import hf_hub_download, login
 from sentence_transformers import SentenceTransformer
 # Model and RAG configuration
 MODEL_REPO_ID = "bartowski/gemma-2-2b-it-GGUF"
+MODEL_FILENAME = "gemma-2-2b-it-Q8_0.gguf"
 EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
 DOCS_PATH = "docs"
+FAISS_INDEX_PATH = "bps_faiss.index"
 # LLM parameters
+N_CTX = 2048
+MAX_TOKENS_RESPONSE = 350
+TEMPERATURE = 0.5
+N_THREADS = multiprocessing.cpu_count() - 1
 # RAG parameters
+TOP_K_DOCS = 3
 # Import prompts from prompts.py
 try:
     from prompts import system_prompt, json_prompt, initial_school_search_prompt
 except ImportError:
     st.error("Could not import prompts from prompts.py. Make sure the file exists.")
     system_prompt = """
 You are a professional assistant that answers questions about enrollment in Boston Public Schools.
 Be friendly and helpful. Families will ask questions and provide information, such as the child's residence, grade, and school preference.
     st.stop()
+# --- Helper Functions ---
 def clean_reply_text(reply: str) -> str:
     """Removes potential JSON blocks and cleans up common LLM artifacts."""
     reply = re.sub(r"```[jJ][sS][oO][nN]?\s*(\{.*?\})\s*```", "", reply, flags=re.DOTALL)
     reply = re.sub(r"\s*\{.*\}\s*$", "", reply, flags=re.DOTALL)
     reply = re.sub(r"`", "", reply)
     reply = re.sub(r"(?i)\bjson\b", "", reply)
+    reply = re.sub(r"[\[\]]", "", reply)
     reply = re.sub(r"\n{2,}", "\n", reply)
     return reply.strip()
     Uses demjson3 for potentially more lenient parsing.
     """
     json_part = {}
+    reply_part = text
     last_brace_open = text.rfind('{')
     if last_brace_open != -1:
         brace_level = 0
         last_brace_close = -1
         potential_json_str = text[last_brace_open:]
         if last_brace_close != -1:
             json_str = text[last_brace_open : last_brace_close + 1]
             try:
                 parsed = demjson3.decode(json_str)
                 if isinstance(parsed, dict):
                     json_part = parsed
                     reply_part = text[:last_brace_open].strip()
+            except demjson3.JSONDecodeError:
+                pass
     cleaned_reply = clean_reply_text(reply_part)
     if not cleaned_reply and json_part:
          cleaned_reply = clean_reply_text(text[:last_brace_open])
     elif not cleaned_reply and not json_part:
+         cleaned_reply = ""
     return cleaned_reply, json_part
 def geocode_address(address: str) -> tuple[float | None, float | None]:
     """Turn a free‑form address into (lat, lon) using Geoapify."""
     if not GEOAPIFY_KEY:
         return None, None
     try:
         resp = requests.get(
             "https://api.geoapify.com/v1/geocode/search",
             params={"text": address, "limit": 1, "apiKey": GEOAPIFY_KEY},
+            timeout=10
         )
+        resp.raise_for_status()
         features = resp.json().get("features", [])
         if not features:
             return None, None
 def get_nearby_schools(address: str, radius: int = 2000, limit: int = 10) -> list[dict]:
     """Get nearby schools using Geoapify."""
     if not GEOAPIFY_KEY:
         return []
         resp = requests.get(
             "https://api.geoapify.com/v2/places",
             params={
+                "categories": "education.school",
                 "filter": f"circle:{lon},{lat},{radius}",
                 "limit": limit,
                 "apiKey": GEOAPIFY_KEY,
             },
+            timeout=10
         )
         resp.raise_for_status()
         for feat in resp.json().get("features", []):
             prop = feat.get("properties", {})
             name = prop.get("name")
+            addr = prop.get("formatted")
             if name and addr:
                 schools.append({"name": name, "address": addr})
         return schools
 def build_school_search_prompt(address: str) -> str:
     """Builds the prompt section listing nearby schools."""
     if not address:
         return initial_school_search_prompt
     nearby_schools = get_nearby_schools(address, radius=2000, limit=10)
     if not nearby_schools:
         return f"No schools found near '{address}'. Please ensure the address is correct or try a broader area if applicable."
     school_list_str = "\n".join(f"- {s['name']}: {s['address']}" for s in nearby_schools)
     return (
         f"Based on the residence '{address}', here are some nearby schools:\n{school_list_str}\n\n"
         "Use this information and the provided documents to answer eligibility questions for the user's grade level."
     """
     Updates context_json in-place based on new_data extracted from LLM response.
     Returns the updated context and a boolean indicating if residence changed.
     """
     residence_changed = False
     current_res = context_json.get("residence", "").strip()
     new_res = new_data.get("residence", "").strip()
     if new_res and new_res != current_res:
         context_json["residence"] = new_res
         residence_changed = True
     elif "residence" in new_data and not new_res and current_res:
          context_json["residence"] = ""
+         residence_changed = True
     for key, value in new_data.items():
         if key != "residence":
             new_val_str = str(value).strip() if value is not None else ""
             old_val_str = str(context_json.get(key, "")).strip()
             if new_val_str and new_val_str != old_val_str:
+                 context_json[key] = value
             elif key in new_data and not new_val_str and old_val_str:
+                 context_json[key] = ""
     return context_json, residence_changed
 @st.cache_resource
 def load_embedding_model():
     """Loads the Sentence Transformer model."""
     try:
         return SentenceTransformer(EMBEDDING_MODEL_NAME)
     except Exception as e:
 @st.cache_data
 def load_documents(docs_path: str) -> tuple[list[str], list[str]]:
     """Loads text documents from the specified directory."""
     doc_texts = []
     filenames = []
     if not os.path.isdir(docs_path):
 @st.cache_resource(show_spinner="Creating document embeddings and FAISS index...")
 def create_faiss_index(_embedder, doc_texts):
     """Creates FAISS index from document texts."""
     if not doc_texts:
         return None
     try:
              st.error("Embedding failed, no document embeddings generated.")
              return None
         faiss.normalize_L2(doc_embeddings)
         dimension = doc_embeddings.shape[1]
+        index = faiss.IndexFlatIP(dimension)
         index.add(doc_embeddings)
         return index
     except Exception as e:
         st.error(f"Error creating FAISS index: {e}")
 def query_docs(query: str, _index, _embedder, doc_texts, top_k=TOP_K_DOCS) -> list[str]:
     """Queries the FAISS index to retrieve relevant document chunks."""
     if _index is None or not doc_texts:
         return []
     try:
         if query_embedding is None or query_embedding.shape[0] == 0:
              st.warning("Failed to generate query embedding.")
              return []
+        faiss.normalize_L2(query_embedding)
         distances, indices = _index.search(query_embedding, top_k)
         return [doc_texts[i] for i in indices[0] if i != -1]
     except Exception as e:
         st.error(f"Error querying FAISS index: {e}")
         model_path = hf_hub_download(
             repo_id=MODEL_REPO_ID,
             filename=MODEL_FILENAME,
+            local_dir="models",
+            local_dir_use_symlinks=False
         )
         st.success(f"Model found at: {model_path}")
     except Exception as e:
             model_path=model_path,
             n_ctx=N_CTX,
             n_threads=N_THREADS,
+            verbose=False
         )
         return llm
     except Exception as e:
 def build_full_prompt(
     context_json: dict,
+    school_search_prompt: str,
     history: list[dict],
+    max_history=5
 ) -> str:
+    """Builds the final prompt string for the LLM."""
     last_user_input = ""
     if history and history[-1]["role"] == "user":
         last_user_input = history[-1]["content"]
     summary_info = context_json.get("summary", "")
     rag_query = f"{last_user_input}\n\nContext Summary: {summary_info}".strip()
     retrieved_docs = query_docs(rag_query, faiss_index, embedder, doc_texts_global, top_k=TOP_K_DOCS)
     docs_context_str = "\n\n---\n\n".join(retrieved_docs)
     if docs_context_str:
         docs_context_str = f"DOCUMENT CONTEXT:\n{docs_context_str}\n---"
     else:
         docs_context_str = "DOCUMENT CONTEXT: None available."
+    recent_history = history[-(max_history * 2):]
     conversation = []
     for msg in recent_history:
         role = "User" if msg["role"] == "user" else "Assistant"
         conversation.append(f"{role}: {msg['content']}")
     conversation_str = "\n".join(conversation)
     prompt = f"""{system_prompt}
 {docs_context_str}
 CONVERSATION HISTORY:
 {conversation_str}
+Assistant:"""
     return prompt
 # --- Streamlit App UI and Logic ---
 st.set_page_config(page_title="Boston School Choice Chatbot", page_icon="🏫", layout="wide")
 st.title("Boston Public Schools Enrollment Assistant 🏫")
 st.markdown("Ask questions about enrolling in Boston Public Schools. I can help find nearby schools if you provide a residence address.")
 llm = load_llm()
 embedder = load_embedding_model()
+doc_texts_global, filenames_global = load_documents(DOCS_PATH)
 faiss_index = create_faiss_index(embedder, doc_texts_global)
 if "messages" not in st.session_state:
+    st.session_state.messages = []
 if "context_json" not in st.session_state:
     st.session_state.context_json = {
         "residence": "",
         "grade": "",
         "school_choice": "",
+        "summary": ""
     }
 if "school_search" not in st.session_state:
     st.session_state.school_search = initial_school_search_prompt
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
 if prompt := st.chat_input("What is your question? (e.g., 'I live at 123 Main St, my child is going into grade 2')"):
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
     with st.chat_message("assistant"):
         message_placeholder = st.empty()
         message_placeholder.markdown("Thinking...")
         full_prompt = build_full_prompt(
             st.session_state.context_json,
             st.session_state.school_search,
             st.session_state.messages
         )
         try:
             response = llm(
                 full_prompt,
                 max_tokens=MAX_TOKENS_RESPONSE,
                 temperature=TEMPERATURE,
+                stop=["\nUser:", "\nAssistant:", "<|end_header_id|>", "<|eot_id|>"],
+                echo=False
             )
             raw_output = response["choices"][0]["text"].strip()
             reply_text, new_data = extract_reply_and_json(raw_output)
             updated_context, residence_changed = update_context(st.session_state.context_json, new_data)
             st.session_state.context_json = updated_context
             if residence_changed:
                 st.session_state.school_search = build_school_search_prompt(st.session_state.context_json.get("residence", ""))
+            message_placeholder.markdown(reply_text if reply_text else "_Assistant had trouble generating a response._")
             st.session_state.messages.append({"role": "assistant", "content": reply_text})
         except Exception as e:
             st.error(f"An error occurred during response generation: {e}")
             error_message = "Sorry, I encountered an error processing your request."
             message_placeholder.markdown(error_message)
             st.session_state.messages.append({"role": "assistant", "content": error_message})
 with st.sidebar:
     st.subheader("ℹ️ Current Context")
     st.json(st.session_state.context_json)
     st.subheader("🏫 School Search Status")
     st.text(st.session_state.school_search)