scdong commited on
Commit
5bf9716
Β·
1 Parent(s): 1b76ca4

Remove llama_cpp and use hosted model

Browse files
Files changed (1) hide show
  1. app.py +64 -25
app.py CHANGED
@@ -1,5 +1,6 @@
 
 
1
  import streamlit as st
2
- from utils.helper_functions import *
3
  import os
4
  import pandas as pd
5
  import json
@@ -10,10 +11,12 @@ from sklearn.feature_extraction.text import TfidfVectorizer
10
  from sklearn.preprocessing import LabelEncoder
11
  from sklearn.model_selection import train_test_split
12
  from xgboost import XGBClassifier
13
- from llama_cpp import Llama
14
 
 
15
  st.set_page_config(page_title="Counselor Assistant", layout="centered")
16
 
 
17
  st.markdown("""
18
  <style>
19
  .main { background-color: #f9f9f9; padding: 1rem 2rem; border-radius: 12px; }
@@ -23,23 +26,26 @@ st.markdown("""
23
  </style>
24
  """, unsafe_allow_html=True)
25
 
 
26
  st.title("🧠 Mental Health Counselor Assistant")
27
  st.markdown("""
28
- Hi there, counselor πŸ‘‹
29
 
30
- This tool is here to offer **supportive, AI-generated suggestions** when you’re not quite sure how to respond to a patient.
31
 
32
- ### How it helps:
33
- - 🧩 Predicts the type of support your patient might need (advice, validation, information, & question.)
34
- - πŸ’¬ Generates a supportive counselor response
35
- - πŸ“ Lets you save and track conversations for reflection
36
 
37
- It's a sidekick, not a substitute for your clinical judgment πŸ’š
38
  """)
39
 
 
40
  df = pd.read_csv("dataset/Kaggle_Mental_Health_Conversations_train.csv")
41
  df = df[['Context', 'Response']].dropna().copy()
42
 
 
43
  keywords_to_labels = {
44
  'advice': ['try', 'should', 'suggest', 'recommend'],
45
  'validation': ['understand', 'feel', 'valid', 'normal'],
@@ -57,14 +63,20 @@ def auto_label_response(response):
57
  df['response_type'] = df['Response'].apply(auto_label_response)
58
  df['combined_text'] = df['Context'] + " " + df['Response']
59
 
 
60
  le = LabelEncoder()
61
  y = le.fit_transform(df['response_type'])
62
 
 
63
  vectorizer = TfidfVectorizer(max_features=2000, ngram_range=(1, 2))
64
  X = vectorizer.fit_transform(df['combined_text'])
65
 
66
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
 
 
 
67
 
 
68
  xgb_model = XGBClassifier(
69
  objective='multi:softmax',
70
  num_class=len(le.classes_),
@@ -76,13 +88,22 @@ xgb_model = XGBClassifier(
76
  )
77
  xgb_model.fit(X_train, y_train)
78
 
79
- MODEL_PATH = os.path.expanduser("/Users/Pi/models/mistral/mistral-7b-instruct-v0.1.Q4_K_M.gguf")
80
- @st.cache_resource(show_spinner=True)
81
- def load_llm():
82
- return Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=os.cpu_count())
 
 
 
 
83
 
84
- llm = load_llm()
 
 
85
 
 
 
 
86
  def predict_response_type(user_input):
87
  vec = vectorizer.transform([user_input])
88
  pred = xgb_model.predict(vec)
@@ -103,37 +124,47 @@ def generate_llm_response(user_input, response_type):
103
  prompt = build_prompt(user_input, response_type)
104
  start = time.time()
105
  with st.spinner("Thinking through a helpful response for your patient..."):
106
- result = llm(prompt, max_tokens=300, temperature=0.7)
107
  end = time.time()
108
  st.info(f"Response generated in {end - start:.1f} seconds")
109
- return result['choices'][0]['text'].strip()
110
 
111
  def trim_memory(history, max_turns=6):
112
  return history[-max_turns * 2:]
113
 
114
  def save_conversation(history):
115
- now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
116
- with open("logs/chat_log_combined.csv", "w", newline='') as f:
117
  writer = csv.writer(f)
118
- writer.writerow(["Timestamp", "Role", "Content", "Intent", "Confidence"])
119
  for entry in history:
120
  writer.writerow([
121
- now,
122
  entry.get("role", ""),
123
  entry.get("content", ""),
124
  entry.get("label", ""),
125
- round(float(entry.get("confidence", 0)), 2)
126
  ])
127
- st.success("Saved to chat_log_combined.csv")
128
 
 
129
  if "history" not in st.session_state:
130
  st.session_state.history = []
131
  if "user_input" not in st.session_state:
132
  st.session_state.user_input = ""
133
 
 
 
 
 
 
 
 
 
 
134
  MAX_WORDS = 1000
135
  word_count = len(st.session_state.user_input.split())
136
  st.markdown(f"**πŸ“ Input Length:** {word_count} / {MAX_WORDS} words")
 
137
  st.session_state.user_input = st.text_area(
138
  "πŸ’¬ What did your patient say?",
139
  value=st.session_state.user_input,
@@ -141,6 +172,7 @@ st.session_state.user_input = st.text_area(
141
  height=100
142
  )
143
 
 
144
  col1, col2, col3 = st.columns([2, 1, 1])
145
  with col1:
146
  send = st.button("πŸ’‘ Suggest Response")
@@ -149,13 +181,19 @@ with col2:
149
  with col3:
150
  reset = st.button("πŸ” Reset")
151
 
 
152
  if send and st.session_state.user_input:
153
  user_input = st.session_state.user_input
154
  predicted_type, confidence = predict_response_type(user_input)
155
  reply = generate_llm_response(user_input, predicted_type)
156
 
157
  st.session_state.history.append({"role": "user", "content": user_input})
158
- st.session_state.history.append({"role": "assistant", "content": reply, "label": predicted_type, "confidence": confidence})
 
 
 
 
 
159
  st.session_state.history = trim_memory(st.session_state.history)
160
 
161
  if save:
@@ -166,12 +204,13 @@ if reset:
166
  st.session_state.user_input = ""
167
  st.success("Conversation has been cleared.")
168
 
 
169
  st.markdown("---")
170
  for turn in st.session_state.history:
171
  if turn["role"] == "user":
172
  st.markdown(f"πŸ§β€β™€οΈ **Patient:** {turn['content']}")
173
  else:
174
- st.markdown(f"πŸ‘¨β€βš•οΈ **Suggested Counselor Response:** {turn['content']}")
175
  st.caption(f"_Intent: {turn['label']} (Confidence: {turn['confidence']:.0%})_")
176
  st.markdown("---")
177
 
 
1
+ # Streamlit App: Counselor Assistant (XGBoost + Selectable LLMs from Hugging Face)
2
+
3
  import streamlit as st
 
4
  import os
5
  import pandas as pd
6
  import json
 
11
  from sklearn.preprocessing import LabelEncoder
12
  from sklearn.model_selection import train_test_split
13
  from xgboost import XGBClassifier
14
+ from transformers import pipeline
15
 
16
+ # --- Page Setup ---
17
  st.set_page_config(page_title="Counselor Assistant", layout="centered")
18
 
19
+ # --- Styling ---
20
  st.markdown("""
21
  <style>
22
  .main { background-color: #f9f9f9; padding: 1rem 2rem; border-radius: 12px; }
 
26
  </style>
27
  """, unsafe_allow_html=True)
28
 
29
+ # --- App Header ---
30
  st.title("🧠 Mental Health Counselor Assistant")
31
  st.markdown("""
32
+ Welcome, counselor πŸ‘‹
33
 
34
+ This tool offers **AI-powered suggestions** to support you when responding to your patients.
35
 
36
+ ### What it does:
37
+ - 🧩 Predicts what type of support is best: *Advice*, *Validation*, *Information*, or *Question*
38
+ - πŸ’¬ Generates an LLM-powered suggestion for you
39
+ - πŸ’Ύ Lets you save your session for reflection
40
 
41
+ This is here to support β€” not replace β€” your clinical instincts πŸ’š
42
  """)
43
 
44
+ # --- Load and label dataset ---
45
  df = pd.read_csv("dataset/Kaggle_Mental_Health_Conversations_train.csv")
46
  df = df[['Context', 'Response']].dropna().copy()
47
 
48
+ # Auto-labeling: heuristics for labeling responses
49
  keywords_to_labels = {
50
  'advice': ['try', 'should', 'suggest', 'recommend'],
51
  'validation': ['understand', 'feel', 'valid', 'normal'],
 
63
  df['response_type'] = df['Response'].apply(auto_label_response)
64
  df['combined_text'] = df['Context'] + " " + df['Response']
65
 
66
+ # Encode labels
67
  le = LabelEncoder()
68
  y = le.fit_transform(df['response_type'])
69
 
70
+ # TF-IDF vectorizer on combined text
71
  vectorizer = TfidfVectorizer(max_features=2000, ngram_range=(1, 2))
72
  X = vectorizer.fit_transform(df['combined_text'])
73
 
74
+ # Train-test split
75
+ X_train, X_test, y_train, y_test = train_test_split(
76
+ X, y, test_size=0.2, stratify=y, random_state=42
77
+ )
78
 
79
+ # XGBoost Classifier
80
  xgb_model = XGBClassifier(
81
  objective='multi:softmax',
82
  num_class=len(le.classes_),
 
88
  )
89
  xgb_model.fit(X_train, y_train)
90
 
91
+ # --- Select Model Option ---
92
+ model_options = {
93
+ "google/flan-t5-base": "βœ… Flan-T5 (Fast, Clean)",
94
+ "declare-lab/flan-alpaca-gpt4-xl": "πŸ’¬ Flan Alpaca GPT4 (Human-sounding)",
95
+ "google/flan-ul2": "🧠 Flan-UL2 (Deeper reasoning)"
96
+ }
97
+
98
+ model_choice = st.selectbox("🧠 Choose a Response Model", list(model_options.keys()), format_func=lambda x: model_options[x])
99
 
100
+ @st.cache_resource(show_spinner="Loading selected language model...")
101
+ def load_llm(model_name):
102
+ return pipeline("text2text-generation", model=model_name)
103
 
104
+ llm = load_llm(model_choice)
105
+
106
+ # --- Utility Functions ---
107
  def predict_response_type(user_input):
108
  vec = vectorizer.transform([user_input])
109
  pred = xgb_model.predict(vec)
 
124
  prompt = build_prompt(user_input, response_type)
125
  start = time.time()
126
  with st.spinner("Thinking through a helpful response for your patient..."):
127
+ result = llm(prompt, max_length=150, do_sample=True, temperature=0.7)
128
  end = time.time()
129
  st.info(f"Response generated in {end - start:.1f} seconds")
130
+ return result[0]["generated_text"].strip()
131
 
132
  def trim_memory(history, max_turns=6):
133
  return history[-max_turns * 2:]
134
 
135
  def save_conversation(history):
136
+ now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
137
+ with open(f"chat_log_{now}.csv", "w", newline='') as f:
138
  writer = csv.writer(f)
139
+ writer.writerow(["Role", "Content", "Intent", "Confidence"])
140
  for entry in history:
141
  writer.writerow([
 
142
  entry.get("role", ""),
143
  entry.get("content", ""),
144
  entry.get("label", ""),
145
+ round(float(entry.get("confidence", 0)) * 100)
146
  ])
147
+ st.success(f"Saved to chat_log_{now}.csv")
148
 
149
+ # --- Session State Setup ---
150
  if "history" not in st.session_state:
151
  st.session_state.history = []
152
  if "user_input" not in st.session_state:
153
  st.session_state.user_input = ""
154
 
155
+ # --- Display Sample Prompts ---
156
+ with st.expander("πŸ’‘ Sample inputs you can try"):
157
+ st.markdown("""
158
+ - My patient is constantly feeling overwhelmed at work.
159
+ - A student says they panic every time they have to speak in class.
160
+ - Someone told me they think they’ll never feel okay again.
161
+ """)
162
+
163
+ # --- Text Area + Word Counter ---
164
  MAX_WORDS = 1000
165
  word_count = len(st.session_state.user_input.split())
166
  st.markdown(f"**πŸ“ Input Length:** {word_count} / {MAX_WORDS} words")
167
+
168
  st.session_state.user_input = st.text_area(
169
  "πŸ’¬ What did your patient say?",
170
  value=st.session_state.user_input,
 
172
  height=100
173
  )
174
 
175
+ # --- Button Layout ---
176
  col1, col2, col3 = st.columns([2, 1, 1])
177
  with col1:
178
  send = st.button("πŸ’‘ Suggest Response")
 
181
  with col3:
182
  reset = st.button("πŸ” Reset")
183
 
184
+ # --- Button Logic ---
185
  if send and st.session_state.user_input:
186
  user_input = st.session_state.user_input
187
  predicted_type, confidence = predict_response_type(user_input)
188
  reply = generate_llm_response(user_input, predicted_type)
189
 
190
  st.session_state.history.append({"role": "user", "content": user_input})
191
+ st.session_state.history.append({
192
+ "role": "assistant",
193
+ "content": reply,
194
+ "label": predicted_type,
195
+ "confidence": confidence
196
+ })
197
  st.session_state.history = trim_memory(st.session_state.history)
198
 
199
  if save:
 
204
  st.session_state.user_input = ""
205
  st.success("Conversation has been cleared.")
206
 
207
+ # --- Chat History Display ---
208
  st.markdown("---")
209
  for turn in st.session_state.history:
210
  if turn["role"] == "user":
211
  st.markdown(f"πŸ§β€β™€οΈ **Patient:** {turn['content']}")
212
  else:
213
+ st.markdown(f"πŸ‘©β€βš•οΈπŸ‘¨β€βš•οΈ **Suggested Counselor Response:** {turn['content']}")
214
  st.caption(f"_Intent: {turn['label']} (Confidence: {turn['confidence']:.0%})_")
215
  st.markdown("---")
216