Spaces:

scdong
/

mental_health_chatbot

Sleeping

App Files Files Community

scdong commited on Apr 11

Commit

5bf9716

1 Parent(s): 1b76ca4

Remove llama_cpp and use hosted model

Browse files

Files changed (1) hide show

app.py +64 -25

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import streamlit as st
-from utils.helper_functions import *
 import os
 import pandas as pd
 import json
@@ -10,10 +11,12 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.preprocessing import LabelEncoder
 from sklearn.model_selection import train_test_split
 from xgboost import XGBClassifier
-from llama_cpp import Llama
 st.set_page_config(page_title="Counselor Assistant", layout="centered")
 st.markdown("""
     <style>
         .main { background-color: #f9f9f9; padding: 1rem 2rem; border-radius: 12px; }
@@ -23,23 +26,26 @@ st.markdown("""
     </style>
 """, unsafe_allow_html=True)
 st.title("🧠 Mental Health Counselor Assistant")
 st.markdown("""
-Hi there, counselor 👋
-This tool is here to offer **supportive, AI-generated suggestions** when you’re not quite sure how to respond to a patient.
-### How it helps:
-- 🧩 Predicts the type of support your patient might need (advice, validation, information, & question.)
-- 💬 Generates a supportive counselor response
-- 📁 Lets you save and track conversations for reflection
-It's a sidekick, not a substitute for your clinical judgment 💚
 """)
 df = pd.read_csv("dataset/Kaggle_Mental_Health_Conversations_train.csv")
 df = df[['Context', 'Response']].dropna().copy()
 keywords_to_labels = {
     'advice': ['try', 'should', 'suggest', 'recommend'],
     'validation': ['understand', 'feel', 'valid', 'normal'],
@@ -57,14 +63,20 @@ def auto_label_response(response):
 df['response_type'] = df['Response'].apply(auto_label_response)
 df['combined_text'] = df['Context'] + " " + df['Response']
 le = LabelEncoder()
 y = le.fit_transform(df['response_type'])
 vectorizer = TfidfVectorizer(max_features=2000, ngram_range=(1, 2))
 X = vectorizer.fit_transform(df['combined_text'])
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
 xgb_model = XGBClassifier(
     objective='multi:softmax',
     num_class=len(le.classes_),
@@ -76,13 +88,22 @@ xgb_model = XGBClassifier(
 )
 xgb_model.fit(X_train, y_train)
-MODEL_PATH = os.path.expanduser("/Users/Pi/models/mistral/mistral-7b-instruct-v0.1.Q4_K_M.gguf")
-@st.cache_resource(show_spinner=True)
-def load_llm():
-    return Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=os.cpu_count())
-llm = load_llm()
 def predict_response_type(user_input):
     vec = vectorizer.transform([user_input])
     pred = xgb_model.predict(vec)
@@ -103,37 +124,47 @@ def generate_llm_response(user_input, response_type):
     prompt = build_prompt(user_input, response_type)
     start = time.time()
     with st.spinner("Thinking through a helpful response for your patient..."):
-        result = llm(prompt, max_tokens=300, temperature=0.7)
     end = time.time()
     st.info(f"Response generated in {end - start:.1f} seconds")
-    return result['choices'][0]['text'].strip()
 def trim_memory(history, max_turns=6):
     return history[-max_turns * 2:]
 def save_conversation(history):
-    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    with open("logs/chat_log_combined.csv", "w", newline='') as f:
         writer = csv.writer(f)
-        writer.writerow(["Timestamp", "Role", "Content", "Intent", "Confidence"])
         for entry in history:
             writer.writerow([
-                now,
                 entry.get("role", ""),
                 entry.get("content", ""),
                 entry.get("label", ""),
-                round(float(entry.get("confidence", 0)), 2)
             ])
-    st.success("Saved to chat_log_combined.csv")
 if "history" not in st.session_state:
     st.session_state.history = []
 if "user_input" not in st.session_state:
     st.session_state.user_input = ""
 MAX_WORDS = 1000
 word_count = len(st.session_state.user_input.split())
 st.markdown(f"**📝 Input Length:** {word_count} / {MAX_WORDS} words")
 st.session_state.user_input = st.text_area(
     "💬 What did your patient say?",
     value=st.session_state.user_input,
@@ -141,6 +172,7 @@ st.session_state.user_input = st.text_area(
     height=100
 )
 col1, col2, col3 = st.columns([2, 1, 1])
 with col1:
     send = st.button("💡 Suggest Response")
@@ -149,13 +181,19 @@ with col2:
 with col3:
     reset = st.button("🔁 Reset")
 if send and st.session_state.user_input:
     user_input = st.session_state.user_input
     predicted_type, confidence = predict_response_type(user_input)
     reply = generate_llm_response(user_input, predicted_type)
     st.session_state.history.append({"role": "user", "content": user_input})
-    st.session_state.history.append({"role": "assistant", "content": reply, "label": predicted_type, "confidence": confidence})
     st.session_state.history = trim_memory(st.session_state.history)
 if save:
@@ -166,12 +204,13 @@ if reset:
     st.session_state.user_input = ""
     st.success("Conversation has been cleared.")
 st.markdown("---")
 for turn in st.session_state.history:
     if turn["role"] == "user":
         st.markdown(f"🧍‍♀️ **Patient:** {turn['content']}")
     else:
-        st.markdown(f"👨‍⚕️ **Suggested Counselor Response:** {turn['content']}")
         st.caption(f"_Intent: {turn['label']} (Confidence: {turn['confidence']:.0%})_")
     st.markdown("---")

+# Streamlit App: Counselor Assistant (XGBoost + Selectable LLMs from Hugging Face)
 import streamlit as st
 import os
 import pandas as pd
 import json
 from sklearn.preprocessing import LabelEncoder
 from sklearn.model_selection import train_test_split
 from xgboost import XGBClassifier
+from transformers import pipeline
+# --- Page Setup ---
 st.set_page_config(page_title="Counselor Assistant", layout="centered")
+# --- Styling ---
 st.markdown("""
     <style>
         .main { background-color: #f9f9f9; padding: 1rem 2rem; border-radius: 12px; }
     </style>
 """, unsafe_allow_html=True)
+# --- App Header ---
 st.title("🧠 Mental Health Counselor Assistant")
 st.markdown("""
+Welcome, counselor 👋
+This tool offers **AI-powered suggestions** to support you when responding to your patients.
+### What it does:
+- 🧩 Predicts what type of support is best: *Advice*, *Validation*, *Information*, or *Question*
+- 💬 Generates an LLM-powered suggestion for you
+- 💾 Lets you save your session for reflection
+This is here to support — not replace — your clinical instincts 💚
 """)
+# --- Load and label dataset ---
 df = pd.read_csv("dataset/Kaggle_Mental_Health_Conversations_train.csv")
 df = df[['Context', 'Response']].dropna().copy()
+# Auto-labeling: heuristics for labeling responses
 keywords_to_labels = {
     'advice': ['try', 'should', 'suggest', 'recommend'],
     'validation': ['understand', 'feel', 'valid', 'normal'],
 df['response_type'] = df['Response'].apply(auto_label_response)
 df['combined_text'] = df['Context'] + " " + df['Response']
+# Encode labels
 le = LabelEncoder()
 y = le.fit_transform(df['response_type'])
+# TF-IDF vectorizer on combined text
 vectorizer = TfidfVectorizer(max_features=2000, ngram_range=(1, 2))
 X = vectorizer.fit_transform(df['combined_text'])
+# Train-test split
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.2, stratify=y, random_state=42
+)
+# XGBoost Classifier
 xgb_model = XGBClassifier(
     objective='multi:softmax',
     num_class=len(le.classes_),
 )
 xgb_model.fit(X_train, y_train)
+# --- Select Model Option ---
+model_options = {
+    "google/flan-t5-base": "✅ Flan-T5 (Fast, Clean)",
+    "declare-lab/flan-alpaca-gpt4-xl": "💬 Flan Alpaca GPT4 (Human-sounding)",
+    "google/flan-ul2": "🧠 Flan-UL2 (Deeper reasoning)"
+}
+model_choice = st.selectbox("🧠 Choose a Response Model", list(model_options.keys()), format_func=lambda x: model_options[x])
+@st.cache_resource(show_spinner="Loading selected language model...")
+def load_llm(model_name):
+    return pipeline("text2text-generation", model=model_name)
+llm = load_llm(model_choice)
+# --- Utility Functions ---
 def predict_response_type(user_input):
     vec = vectorizer.transform([user_input])
     pred = xgb_model.predict(vec)
     prompt = build_prompt(user_input, response_type)
     start = time.time()
     with st.spinner("Thinking through a helpful response for your patient..."):
+        result = llm(prompt, max_length=150, do_sample=True, temperature=0.7)
     end = time.time()
     st.info(f"Response generated in {end - start:.1f} seconds")
+    return result[0]["generated_text"].strip()
 def trim_memory(history, max_turns=6):
     return history[-max_turns * 2:]
 def save_conversation(history):
+    now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    with open(f"chat_log_{now}.csv", "w", newline='') as f:
         writer = csv.writer(f)
+        writer.writerow(["Role", "Content", "Intent", "Confidence"])
         for entry in history:
             writer.writerow([
                 entry.get("role", ""),
                 entry.get("content", ""),
                 entry.get("label", ""),
+                round(float(entry.get("confidence", 0)) * 100)
             ])
+    st.success(f"Saved to chat_log_{now}.csv")
+# --- Session State Setup ---
 if "history" not in st.session_state:
     st.session_state.history = []
 if "user_input" not in st.session_state:
     st.session_state.user_input = ""
+# --- Display Sample Prompts ---
+with st.expander("💡 Sample inputs you can try"):
+    st.markdown("""
+    - My patient is constantly feeling overwhelmed at work.
+    - A student says they panic every time they have to speak in class.
+    - Someone told me they think they’ll never feel okay again.
+    """)
+# --- Text Area + Word Counter ---
 MAX_WORDS = 1000
 word_count = len(st.session_state.user_input.split())
 st.markdown(f"**📝 Input Length:** {word_count} / {MAX_WORDS} words")
 st.session_state.user_input = st.text_area(
     "💬 What did your patient say?",
     value=st.session_state.user_input,
     height=100
 )
+# --- Button Layout ---
 col1, col2, col3 = st.columns([2, 1, 1])
 with col1:
     send = st.button("💡 Suggest Response")
 with col3:
     reset = st.button("🔁 Reset")
+# --- Button Logic ---
 if send and st.session_state.user_input:
     user_input = st.session_state.user_input
     predicted_type, confidence = predict_response_type(user_input)
     reply = generate_llm_response(user_input, predicted_type)
     st.session_state.history.append({"role": "user", "content": user_input})
+    st.session_state.history.append({
+        "role": "assistant",
+        "content": reply,
+        "label": predicted_type,
+        "confidence": confidence
+    })
     st.session_state.history = trim_memory(st.session_state.history)
 if save:
     st.session_state.user_input = ""
     st.success("Conversation has been cleared.")
+# --- Chat History Display ---
 st.markdown("---")
 for turn in st.session_state.history:
     if turn["role"] == "user":
         st.markdown(f"🧍‍♀️ **Patient:** {turn['content']}")
     else:
+        st.markdown(f"👩‍⚕️👨‍⚕️ **Suggested Counselor Response:** {turn['content']}")
         st.caption(f"_Intent: {turn['label']} (Confidence: {turn['confidence']:.0%})_")
     st.markdown("---")