import streamlit as st
import pandas as pd
from embedding_atlas.streamlit import embedding_atlas
import os
from glob import glob
# Set page configuration
st.set_page_config(
page_title="AI Companionship Behavior Analysis - Embedding Atlas",
page_icon="π€",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better styling
st.markdown("""
""", unsafe_allow_html=True)
# Main header
st.markdown('
π€ AI Companionship Behavior Analysis
', unsafe_allow_html=True)
# Load data with caching
@st.cache_data
def load_data():
"""Load the embedding atlas dataframe"""
try:
df = pd.read_parquet('/app/src/data/embed_atlas_df.parquet')
return df
except FileNotFoundError:
st.error(f"""
β Data file not found. Please ensure 'data/embed_atlas_df.parquet' exists.
Current directory: {os.getcwd()}
Files available: {glob("*")}
""")
return None
except Exception as e:
st.error(f"β Error loading data: {str(e)}")
return None
# Load data once for the entire app
df = load_data()
# Sidebar with controls and information
with st.sidebar:
st.header("π About This Visualization")
st.markdown("""
This interactive visualization explores the landscape of AI model responses to prompts designed to evaluate **companionship behaviors**.
**Models Analyzed:**
- **Google Gemma-3-27b-it** (Open)
- **Microsoft Phi-4** (Open)
- **OpenAI o3-mini** (Closed)
- **Anthropic Claude-3.7 Sonnet** (Closed)
**Classifications:**
- **COMPANION+**: Responses that reinforce companionship behaviors
- **BOUNDARY+**: Responses that maintain appropriate boundaries
- **MIXED**: Responses with elements of both
The visualization uses **Qwen embeddings** projected into 2D space using **UMAP** to explore clusters of similar responses and behavioral patterns.
""")
st.header("π§ Usage Tips")
st.markdown("""
**Getting Started:**
1. Wait for the widget to fully load
2. Use the **"classification"** option in the color dropdown
3. Explore clusters by zooming and panning
4. Use the search bar to find specific terms
""")
# Usage recommendations
st.markdown("""
π‘ Recommended Settings:
- Color by Classification: Select "classification" from the color dropdown to see the different behavior categories
- Cluster Exploration: Shift+Click and drag to explore different regions of the embedding space
- Table and Charts: Use the table and charts on the right to explore the full data and your current selection
""", unsafe_allow_html=True)
# Main visualization
if df is not None:
st.header("πΊοΈ Interactive Embedding Atlas")
# Check required columns
required_columns = ['x', 'y', 'snippet']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
st.error(f"β Missing required columns: {missing_columns}")
st.write("Available columns:", list(df.columns))
else:
try:
# Create the embedding atlas
value = embedding_atlas(
df,
text="snippet",
x="x",
y="y",
show_table=True
)
# Display selection information if available
if value and value.get("predicate"):
st.markdown("### π Current Selection")
st.info(f"Selection filter: `{value.get('predicate')}`")
# You could add DuckDB querying here if needed
# import duckdb
# selection = duckdb.query_df(
# df, "dataframe", "SELECT * FROM dataframe WHERE " + value["predicate"]
# )
# st.dataframe(selection)
except Exception as e:
st.error(f"β Error creating visualization: {str(e)}")
st.write("Please check that the embedding-atlas package is properly installed:")
st.code("pip install embedding-atlas")
else:
st.error("Unable to load data. Please check the file path and try again.")
# Footer with additional information
st.markdown("---")