import streamlit as st import pandas as pd from embedding_atlas.streamlit import embedding_atlas import os from glob import glob # Set page configuration st.set_page_config( page_title="AI Companionship Behavior Analysis - Embedding Atlas", page_icon="🤖", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS for better styling st.markdown(""" """, unsafe_allow_html=True) # Main header st.markdown('

🤖 AI Companionship Behavior Analysis

', unsafe_allow_html=True) # Load data with caching @st.cache_data def load_data(): """Load the embedding atlas dataframe""" try: df = pd.read_parquet('/app/src/data/embed_atlas_df.parquet') return df except FileNotFoundError: st.error(f""" ❌ Data file not found. Please ensure 'data/embed_atlas_df.parquet' exists. Current directory: {os.getcwd()} Files available: {glob("*")} """) return None except Exception as e: st.error(f"❌ Error loading data: {str(e)}") return None # Load data once for the entire app df = load_data() # Sidebar with controls and information with st.sidebar: st.header("📚 About This Visualization") st.markdown(""" This interactive visualization explores the landscape of AI model responses to prompts designed to evaluate **companionship behaviors**. **Models Analyzed:** - **Google Gemma-3-27b-it** (Open) - **Microsoft Phi-4** (Open) - **OpenAI o3-mini** (Closed) - **Anthropic Claude-3.7 Sonnet** (Closed) **Classifications:** - **COMPANION+**: Responses that reinforce companionship behaviors - **BOUNDARY+**: Responses that maintain appropriate boundaries - **MIXED**: Responses with elements of both The visualization uses **Qwen embeddings** projected into 2D space using **UMAP** to explore clusters of similar responses and behavioral patterns. """) st.header("🔧 Usage Tips") st.markdown(""" **Getting Started:** 1. Wait for the widget to fully load 2. Use the **"classification"** option in the color dropdown 3. Explore clusters by zooming and panning 4. Use the search bar to find specific terms """) # Usage recommendations st.markdown("""

💡 Recommended Settings:

Color by Classification: Select "classification" from the color dropdown to see the different behavior categories
Cluster Exploration: Shift+Click and drag to explore different regions of the embedding space
Table and Charts: Use the table and charts on the right to explore the full data and your current selection

""", unsafe_allow_html=True) # Main visualization if df is not None: st.header("🗺️ Interactive Embedding Atlas") # Check required columns required_columns = ['x', 'y', 'snippet'] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: st.error(f"❌ Missing required columns: {missing_columns}") st.write("Available columns:", list(df.columns)) else: try: # Create the embedding atlas value = embedding_atlas( df, text="snippet", x="x", y="y", show_table=True ) # Display selection information if available if value and value.get("predicate"): st.markdown("### 📋 Current Selection") st.info(f"Selection filter: `{value.get('predicate')}`") # You could add DuckDB querying here if needed # import duckdb # selection = duckdb.query_df( # df, "dataframe", "SELECT * FROM dataframe WHERE " + value["predicate"] # ) # st.dataframe(selection) except Exception as e: st.error(f"❌ Error creating visualization: {str(e)}") st.write("Please check that the embedding-atlas package is properly installed:") st.code("pip install embedding-atlas") else: st.error("Unable to load data. Please check the file path and try again.") # Footer with additional information st.markdown("---")