In [None]:
# Multi-Turn Mental Health Chatbot using XGBoost + Mistral-7B

import os
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from llama_cpp import Llama
import json

# --- Load and prepare the dataset ---
df = pd.read_csv("dataset/Kaggle_Mental_Health_Conversations_train.csv")
df = df[['Context', 'Response']].dropna().copy()

# Auto-labeling
keywords_to_labels = {
    'advice': ['try', 'should', 'suggest', 'recommend'],
    'validation': ['understand', 'feel', 'valid', 'normal'],
    'information': ['cause', 'often', 'disorder', 'symptom'],
    'question': ['how', 'what', 'why', 'have you']
}

def auto_label_response(response):
    response = response.lower()
    for label, keywords in keywords_to_labels.items():
        if any(word in response for word in keywords):
            return label
    return 'information'

df['response_type'] = df['Response'].apply(auto_label_response)

# Combine Context and Response for richer features
df['combined_text'] = df['Context'] + " " + df['Response']

# Encode labels
le = LabelEncoder()
y = le.fit_transform(df['response_type'])

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=2000, ngram_range=(1, 2))
X = vectorizer.fit_transform(df['combined_text'])

# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Train XGBoost Classifier
xgb_model = XGBClassifier(
    objective='multi:softmax',
    num_class=len(le.classes_),
    eval_metric='mlogloss',
    use_label_encoder=False,
    max_depth=6,
    learning_rate=0.1,
    n_estimators=100
)
xgb_model.fit(X_train, y_train)

# Load Mistral model
MODEL_PATH = os.path.expanduser("/Users/Pi/models/mistral/mistral-7b-instruct-v0.1.Q4_K_M.gguf")
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=os.cpu_count())

# --- Chatbot Functions ---
def predict_response_type(user_input):
    vec = vectorizer.transform([user_input])
    pred = xgb_model.predict(vec)
    proba = xgb_model.predict_proba(vec).max()
    label = le.inverse_transform(pred)[0]
    return label, proba

def build_prompt(user_input, response_type):
    prompts = {
        "advice": f"A student said: \"{user_input}\". What advice should a counselor give to help?",
        "validation": f"A student said: \"{user_input}\". How can a counselor validate the student's emotions?",
        "information": f"A student said: \"{user_input}\". Explain what might be happening from a mental health perspective.",
        "question": f"A student said: \"{user_input}\". What follow-up questions should a counselor ask?"
    }
    return prompts.get(response_type, prompts["information"])

def generate_llm_response(user_input, response_type):
    prompt = build_prompt(user_input, response_type)
    result = llm(prompt, max_tokens=150, temperature=0.7)
    return result['choices'][0]['text'].strip()

def trim_memory(history, max_turns=6):
    return history[-max_turns * 2:]

def save_conversation(history):
    with open("chat_history.json", "w") as f:
        json.dump(history, f, indent=2)
    print("✅ Conversation saved to chat_history.json\n")

def show_intro():
    print("\n--- Multi-Turn Mental Health Chatbot ---")
    print("This assistant simulates a counselor's conversation using AI.")
    print("- Type something your patient/student might say")
    print("- Type 'save' to export the conversation")
    print("- Type 'exit' to quit\n")

    print("Example:")
    print("User: I feel like I’ll mess up my big presentation tomorrow.")
    print("Counselor: It’s completely normal to feel nervous before a big event...\n")

# --- Run Chatbot ---
history = []
show_intro()

while True:
    user_input = input("User: ").strip()

    if user_input.lower() == "exit":
        print("Goodbye")
        break
    elif user_input.lower() == "save":
        save_conversation(history)
        continue

    predicted_type, confidence = predict_response_type(user_input)
    print(f"(Prfedicted: {predicted_type}, Confidence: {confidence:.1%})")

    llm_reply = generate_llm_response(user_input, predicted_type)

    history.append({"role": "user", "content": user_input})
    history.append({"role": "assistant", "content": llm_reply})
    history = trim_memory(history)

    print("Counselor:", llm_reply)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
llama_model_load_from_file_impl: using device Metal (Apple M2 Pro) - 21845 MiB free
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/Pi/models/mistral/mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                 


--- Multi-Turn Mental Health Chatbot ---
This assistant simulates a counselor's conversation using AI.
- Type something your patient/student might say
- Type 'save' to export the conversation
- Type 'exit' to quit

Example:
User: I feel like I’ll mess up my big presentation tomorrow.
Counselor: It’s completely normal to feel nervous before a big event...



User:  i'm nervous


(Predicted: information, Confidence: 85.5%)


llama_perf_context_print:        load time =     766.62 ms
llama_perf_context_print: prompt eval time =     766.43 ms /    23 tokens (   33.32 ms per token,    30.01 tokens per second)
llama_perf_context_print:        eval time =   17184.04 ms /   149 runs   (  115.33 ms per token,     8.67 tokens per second)
llama_perf_context_print:       total time =   17971.68 ms /   172 tokens


Counselor: From a mental health perspective, feeling nervous or anxious is a common and normal human experience. It's a natural response to stress or a challenging situation. However, if the feelings of nervousness persist or interfere with daily life, it could be a sign of an anxiety disorder.

Anxiety disorders are a group of mental health conditions in which a person feels excessive and persistent worry or fear. These feelings can interfere with daily life, such as work, school, relationships, and personal activities. The symptoms of anxiety disorders vary from person to person, but typically include physical symptoms like sweating, trembling, and difficulty sleeping, as well as psychological symptoms like feelings of worry, fear, and avoidance
