Spaces:

kikikara
/

TUFA-Explainable_AI

Sleeping

App Files Files Community

kikikara commited on Jun 4

Commit

9a618da

verified ·

1 Parent(s): c9044fd

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -36

app.py CHANGED Viewed

@@ -17,12 +17,11 @@ hf_logging.set_verbosity_error()
 MODEL_NAME = "bert-base-uncased"
 DEVICE     = "cpu"
-SAVE_DIR   = "저장저장1" # This folder name is from your setup
 LAYER_ID   = 4
 SEED       = 0
 CLF_NAME   = "linear"
-# Class label mapping provided by user
 CLASS_LABEL_MAP = {
     0: "World",
     1: "Sports",
@@ -32,7 +31,6 @@ CLASS_LABEL_MAP = {
 TOKENIZER_GLOBAL, MODEL_GLOBAL = None, None
 W_GLOBAL, MU_GLOBAL, W_P_GLOBAL, B_P_GLOBAL = None, None, None, None
-# CLASS_NAMES_GLOBAL = None # We'll use CLASS_LABEL_MAP instead for clarity
 MODELS_LOADED_SUCCESSFULLY = False
 MODEL_LOADING_ERROR_MESSAGE = ""
@@ -73,24 +71,20 @@ except Exception as e:
 # Helper function: 3D PCA Visualization using Plotly
 def plot_token_pca_3d_plotly(token_embeddings_3d, tokens, scores, title="Token Embeddings 3D PCA (Colored by Importance)"):
-    num_annotations = min(len(tokens), 20) # Annotate up to 20 most important tokens
-    # Ensure scores is a 1D numpy array for Plotly marker color processing
     scores_array = np.array(scores).flatten()
-    # Prepare text annotations (only for most important tokens to avoid clutter)
     text_annotations = [''] * len(tokens)
     if len(scores_array) > 0 and len(tokens) > 0:
         indices_to_annotate = np.argsort(scores_array)[-num_annotations:]
         for i in indices_to_annotate:
-            if i < len(tokens): # Ensure index is valid
                  text_annotations[i] = tokens[i]
     fig = go.Figure(data=[go.Scatter3d(
         x=token_embeddings_3d[:, 0],
         y=token_embeddings_3d[:, 1],
         z=token_embeddings_3d[:, 2],
-        mode='markers+text', # Show markers, text for selected
         text=text_annotations,
         textfont=dict(size=9, color='#333333'),
         textposition='top center',
@@ -98,25 +92,26 @@ def plot_token_pca_3d_plotly(token_embeddings_3d, tokens, scores, title="Token E
             size=6,
             color=scores_array,
             colorscale='RdBu',
-            reversescale=True, # Makes red high, blue low (like coolwarm_r)
             opacity=0.8,
             colorbar=dict(title='Importance', tickfont=dict(size=9), len=0.75, yanchor='middle')
         ),
-        hoverinfo='text', # Show full token text on hover
-        hovertext=[f"Token: {t}<br>Score: {s:.3f}" for t, s in zip(tokens, scores_array)] # Custom hover text
     )])
     fig.update_layout(
         title=dict(text=title, x=0.5, font=dict(size=16)),
         scene=dict(
-            xaxis=dict(title='PCA Comp 1', titlefont=dict(size=10), tickfont=dict(size=9), backgroundcolor="rgba(230, 230, 230, 0.8)"),
-            yaxis=dict(title='PCA Comp 2', titlefont=dict(size=10), tickfont=dict(size=9), backgroundcolor="rgba(230, 230, 230, 0.8)"),
-            zaxis=dict(title='PCA Comp 3', titlefont=dict(size=10), tickfont=dict(size=9), backgroundcolor="rgba(230, 230, 230, 0.8)"),
             bgcolor="rgba(255, 255, 255, 0.95)",
-            camera_eye=dict(x=1.5, y=1.5, z=0.5) # Initial camera angle
         ),
         margin=dict(l=5, r=5, b=5, t=45),
-        paper_bgcolor='rgba(0,0,0,0)' # Transparent paper background
     )
     return fig
@@ -127,7 +122,7 @@ def create_empty_plotly_figure(message="N/A"):
     fig.update_layout(
         xaxis={'visible': False},
         yaxis={'visible': False},
-        height=300, # Define a height for empty plot
         paper_bgcolor='rgba(0,0,0,0)',
         plot_bgcolor='rgba(0,0,0,0)'
         )
@@ -139,7 +134,8 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
         error_html = f"<p style='color:red;'>Initialization Error: {html.escape(MODEL_LOADING_ERROR_MESSAGE)}</p>"
         empty_df = pd.DataFrame(columns=['token', 'score'])
         empty_fig = create_empty_plotly_figure("Model Loading Failed")
-        return error_html, [], "Model Loading Failed", {"Error":"Model Loading Failed"}, [], empty_df, empty_fig
     try:
         tokenizer, model = TOKENIZER_GLOBAL, MODEL_GLOBAL
@@ -151,7 +147,7 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
         if input_ids.shape[1] == 0:
             empty_df = pd.DataFrame(columns=['token', 'score'])
             empty_fig = create_empty_plotly_figure("Invalid Input")
-            return "<p style='color:orange;'>Input Error: No valid tokens found.</p>", [], "Input Error", {"Error":"Input Error"}, [], empty_df, empty_fig
         input_embeds_detached = model.embeddings.word_embeddings(input_ids).clone().detach()
         input_embeds_for_grad = input_embeds_detached.clone().requires_grad_(True)
@@ -170,12 +166,12 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
         if input_embeds_for_grad.grad is None:
             empty_df = pd.DataFrame(columns=['token', 'score'])
             empty_fig = create_empty_plotly_figure("Gradient Error")
-            return "<p style='color:red;'>Analysis Error: Gradient calculation failed.</p>", [],"Analysis Error", {"Error":"Analysis Error"}, [], empty_df, empty_fig
         grads = input_embeds_for_grad.grad.clone().detach()
         scores = (grads * input_embeds_detached).norm(dim=2).squeeze(0)
         scores_np = scores.cpu().numpy()
-        valid_scores_for_norm = scores_np[np.isfinite(scores_np)] # Renamed to avoid conflict
         scores_np = scores_np / (valid_scores_for_norm.max() + 1e-9) if len(valid_scores_for_norm) > 0 and valid_scores_for_norm.max() > 0 else np.zeros_like(scores_np)
         tokens_raw = tokenizer.convert_ids_to_tokens(input_ids[0], skip_special_tokens=False)
@@ -196,7 +192,7 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
                 html_tokens_list.append(f"<span style='font-weight:bold;'>{html.escape(clean_tok_str)}</span>")
                 highlighted_text_data.append((clean_tok_str + " ", None))
             else:
-                color = f"rgba(220, 50, 50, {current_score_clipped:.2f})" # Slightly adjusted red
                 html_tokens_list.append(f"<span style='background-color:{color}; color:white; padding: 1px 3px; margin: 1px; border-radius: 4px; display:inline-block;'>{html.escape(clean_tok_str)}</span>")
                 highlighted_text_data.append((clean_tok_str + " ", round(current_score_clipped, 3)))
@@ -227,7 +223,7 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
             pca_tokens = [actual_tokens[i] for i in non_special_token_indices]
             if len(pca_tokens) > 0:
                 pca_embeddings = actual_input_embeds[non_special_token_indices, :]
-                pca_scores_for_plot = actual_scores_np[non_special_token_indices] # Use this for coloring
                 pca = PCA(n_components=3, random_state=SEED)
                 token_embeddings_3d = pca.fit_transform(pca_embeddings)
@@ -245,10 +241,10 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
         print(f"analyze_sentence_for_gradio error: {e}\n{tb_str}")
         empty_df = pd.DataFrame(columns=['token', 'score'])
         empty_fig = create_empty_plotly_figure("Analysis Error")
-        return error_html, [], "Analysis Failed", {"Error": str(e)}, [], empty_df, empty_fig
 # --- Gradio UI Definition (Translated and Enhanced) ---
-# Using a built-in theme and some CSS for aesthetics
 theme = gr.themes.Monochrome(
     primary_hue=gr.themes.colors.blue,
     secondary_hue=gr.themes.colors.sky,
@@ -260,14 +256,13 @@ theme = gr.themes.Monochrome(
     button_primary_text_color="white",
 )
 with gr.Blocks(title="AI Sentence Analyzer XAI 🚀", theme=theme, css=".gradio-container {max-width: 98% !important;}") as demo:
     gr.Markdown("# 🚀 AI Sentence Analyzer XAI: Exploring Model Explanations")
     gr.Markdown("Analyze English sentences to understand BERT model predictions through various XAI visualization techniques. "
                 "Explore token importance and their distribution in the embedding space.")
     with gr.Row(equal_height=False):
-        with gr.Column(scale=1, min_width=350): # Increased min_width slightly
             with gr.Group():
                 gr.Markdown("### ✏️ Input Sentence & Settings")
                 input_sentence = gr.Textbox(lines=5, label="English Sentence to Analyze", placeholder="Enter the English sentence you want to analyze here...")
@@ -289,10 +284,6 @@ with gr.Blocks(title="AI Sentence Analyzer XAI 🚀", theme=theme, css=".gradio-
             output_highlighted_text = gr.HighlightedText(
                 label="Token Importance (Score: 0-1)",
                 show_legend=True,
-                # Color map can be more sophisticated if scores are categorical
-                # For numerical scores (0-1), Gradio tries to infer intensity.
-                # Example color map (if scores were categories like "LOW", "MEDIUM", "HIGH"):
-                # color_map={"LOW": "lightblue", "MEDIUM": "lightgreen", "HIGH": "pink"},
                 combine_adjacent=False
             )
         with gr.TabItem("📊 Top-K Bar Plot", id=2):
@@ -301,8 +292,7 @@ with gr.Blocks(title="AI Sentence Analyzer XAI 🚀", theme=theme, css=".gradio-
                 x="token",
                 y="score",
                 tooltip=['token', 'score'],
-                min_width=300,
-                # title="Top-K Most Important Tokens" # BarPlot may not have a direct title prop
             )
         with gr.TabItem("🌐 Token Embeddings 3D PCA (Interactive)", id=3):
             output_pca_plot = gr.Plot(label="3D PCA of Token Embeddings (Colored by Importance Score)")
@@ -322,7 +312,7 @@ with gr.Blocks(title="AI Sentence Analyzer XAI 🚀", theme=theme, css=".gradio-
             output_pca_plot
         ],
         fn=analyze_sentence_for_gradio,
-        cache_examples=False # Set to True for faster loading of examples if inputs/outputs are static
     )
     gr.HTML("<p style='text-align: center; color: #4a5568;'>Explainable AI Demo powered by Gradio & Hugging Face Transformers</p>")

 MODEL_NAME = "bert-base-uncased"
 DEVICE     = "cpu"
+SAVE_DIR   = "저장저장1"
 LAYER_ID   = 4
 SEED       = 0
 CLF_NAME   = "linear"
 CLASS_LABEL_MAP = {
     0: "World",
     1: "Sports",
 TOKENIZER_GLOBAL, MODEL_GLOBAL = None, None
 W_GLOBAL, MU_GLOBAL, W_P_GLOBAL, B_P_GLOBAL = None, None, None, None
 MODELS_LOADED_SUCCESSFULLY = False
 MODEL_LOADING_ERROR_MESSAGE = ""
 # Helper function: 3D PCA Visualization using Plotly
 def plot_token_pca_3d_plotly(token_embeddings_3d, tokens, scores, title="Token Embeddings 3D PCA (Colored by Importance)"):
+    num_annotations = min(len(tokens), 20)
     scores_array = np.array(scores).flatten()
     text_annotations = [''] * len(tokens)
     if len(scores_array) > 0 and len(tokens) > 0:
         indices_to_annotate = np.argsort(scores_array)[-num_annotations:]
         for i in indices_to_annotate:
+            if i < len(tokens):
                  text_annotations[i] = tokens[i]
     fig = go.Figure(data=[go.Scatter3d(
         x=token_embeddings_3d[:, 0],
         y=token_embeddings_3d[:, 1],
         z=token_embeddings_3d[:, 2],
+        mode='markers+text',
         text=text_annotations,
         textfont=dict(size=9, color='#333333'),
         textposition='top center',
             size=6,
             color=scores_array,
             colorscale='RdBu',
+            reversescale=True,
             opacity=0.8,
             colorbar=dict(title='Importance', tickfont=dict(size=9), len=0.75, yanchor='middle')
         ),
+        hoverinfo='text',
+        hovertext=[f"Token: {t}<br>Score: {s:.3f}" for t, s in zip(tokens, scores_array)]
     )])
     fig.update_layout(
         title=dict(text=title, x=0.5, font=dict(size=16)),
         scene=dict(
+            # 수정된 부분: title 속성 내에 text와 font를 포함
+            xaxis=dict(title=dict(text='PCA Comp 1', font=dict(size=10)), tickfont=dict(size=9), backgroundcolor="rgba(230, 230, 230, 0.8)"),
+            yaxis=dict(title=dict(text='PCA Comp 2', font=dict(size=10)), tickfont=dict(size=9), backgroundcolor="rgba(230, 230, 230, 0.8)"),
+            zaxis=dict(title=dict(text='PCA Comp 3', font=dict(size=10)), tickfont=dict(size=9), backgroundcolor="rgba(230, 230, 230, 0.8)"),
             bgcolor="rgba(255, 255, 255, 0.95)",
+            camera_eye=dict(x=1.5, y=1.5, z=0.5)
         ),
         margin=dict(l=5, r=5, b=5, t=45),
+        paper_bgcolor='rgba(0,0,0,0)'
     )
     return fig
     fig.update_layout(
         xaxis={'visible': False},
         yaxis={'visible': False},
+        height=300,
         paper_bgcolor='rgba(0,0,0,0)',
         plot_bgcolor='rgba(0,0,0,0)'
         )
         error_html = f"<p style='color:red;'>Initialization Error: {html.escape(MODEL_LOADING_ERROR_MESSAGE)}</p>"
         empty_df = pd.DataFrame(columns=['token', 'score'])
         empty_fig = create_empty_plotly_figure("Model Loading Failed")
+        # gr.Label에 대한 오류 반환값 수정
+        return error_html, [], "Model Loading Failed", {"Status":"Error", "Message":"Model Loading Failed"}, [], empty_df, empty_fig
     try:
         tokenizer, model = TOKENIZER_GLOBAL, MODEL_GLOBAL
         if input_ids.shape[1] == 0:
             empty_df = pd.DataFrame(columns=['token', 'score'])
             empty_fig = create_empty_plotly_figure("Invalid Input")
+            return "<p style='color:orange;'>Input Error: No valid tokens found.</p>", [], "Input Error", {"Status":"Error", "Message":"Invalid Input"}, [], empty_df, empty_fig
         input_embeds_detached = model.embeddings.word_embeddings(input_ids).clone().detach()
         input_embeds_for_grad = input_embeds_detached.clone().requires_grad_(True)
         if input_embeds_for_grad.grad is None:
             empty_df = pd.DataFrame(columns=['token', 'score'])
             empty_fig = create_empty_plotly_figure("Gradient Error")
+            return "<p style='color:red;'>Analysis Error: Gradient calculation failed.</p>", [],"Analysis Error", {"Status":"Error", "Message":"Gradient Error"}, [], empty_df, empty_fig
         grads = input_embeds_for_grad.grad.clone().detach()
         scores = (grads * input_embeds_detached).norm(dim=2).squeeze(0)
         scores_np = scores.cpu().numpy()
+        valid_scores_for_norm = scores_np[np.isfinite(scores_np)]
         scores_np = scores_np / (valid_scores_for_norm.max() + 1e-9) if len(valid_scores_for_norm) > 0 and valid_scores_for_norm.max() > 0 else np.zeros_like(scores_np)
         tokens_raw = tokenizer.convert_ids_to_tokens(input_ids[0], skip_special_tokens=False)
                 html_tokens_list.append(f"<span style='font-weight:bold;'>{html.escape(clean_tok_str)}</span>")
                 highlighted_text_data.append((clean_tok_str + " ", None))
             else:
+                color = f"rgba(220, 50, 50, {current_score_clipped:.2f})"
                 html_tokens_list.append(f"<span style='background-color:{color}; color:white; padding: 1px 3px; margin: 1px; border-radius: 4px; display:inline-block;'>{html.escape(clean_tok_str)}</span>")
                 highlighted_text_data.append((clean_tok_str + " ", round(current_score_clipped, 3)))
             pca_tokens = [actual_tokens[i] for i in non_special_token_indices]
             if len(pca_tokens) > 0:
                 pca_embeddings = actual_input_embeds[non_special_token_indices, :]
+                pca_scores_for_plot = actual_scores_np[non_special_token_indices]
                 pca = PCA(n_components=3, random_state=SEED)
                 token_embeddings_3d = pca.fit_transform(pca_embeddings)
         print(f"analyze_sentence_for_gradio error: {e}\n{tb_str}")
         empty_df = pd.DataFrame(columns=['token', 'score'])
         empty_fig = create_empty_plotly_figure("Analysis Error")
+        # gr.Label에 대한 오류 반환값 수정
+        return error_html, [], "Analysis Failed", {"Status":"Error", "Message": str(e)}, [], empty_df, empty_fig
 # --- Gradio UI Definition (Translated and Enhanced) ---
 theme = gr.themes.Monochrome(
     primary_hue=gr.themes.colors.blue,
     secondary_hue=gr.themes.colors.sky,
     button_primary_text_color="white",
 )
 with gr.Blocks(title="AI Sentence Analyzer XAI 🚀", theme=theme, css=".gradio-container {max-width: 98% !important;}") as demo:
     gr.Markdown("# 🚀 AI Sentence Analyzer XAI: Exploring Model Explanations")
     gr.Markdown("Analyze English sentences to understand BERT model predictions through various XAI visualization techniques. "
                 "Explore token importance and their distribution in the embedding space.")
     with gr.Row(equal_height=False):
+        with gr.Column(scale=1, min_width=350):
             with gr.Group():
                 gr.Markdown("### ✏️ Input Sentence & Settings")
                 input_sentence = gr.Textbox(lines=5, label="English Sentence to Analyze", placeholder="Enter the English sentence you want to analyze here...")
             output_highlighted_text = gr.HighlightedText(
                 label="Token Importance (Score: 0-1)",
                 show_legend=True,
                 combine_adjacent=False
             )
         with gr.TabItem("📊 Top-K Bar Plot", id=2):
                 x="token",
                 y="score",
                 tooltip=['token', 'score'],
+                min_width=300
             )
         with gr.TabItem("🌐 Token Embeddings 3D PCA (Interactive)", id=3):
             output_pca_plot = gr.Plot(label="3D PCA of Token Embeddings (Colored by Importance Score)")
             output_pca_plot
         ],
         fn=analyze_sentence_for_gradio,
+        cache_examples=False
     )
     gr.HTML("<p style='text-align: center; color: #4a5568;'>Explainable AI Demo powered by Gradio & Hugging Face Transformers</p>")