Spaces:

kikikara
/

TUFA-Explainable_AI

Sleeping

App Files Files Community

kikikara commited on Jun 4

Commit

234dafc

verified ·

1 Parent(s): 72b741e

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -38

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import joblib
 import torch
 import numpy as np
-import html
 from transformers import AutoTokenizer, AutoModel, logging as hf_logging
 import pandas as pd
 import matplotlib
@@ -17,7 +17,7 @@ hf_logging.set_verbosity_error()
 MODEL_NAME = "bert-base-uncased"
 DEVICE     = "cpu"
-SAVE_DIR   = "저장저장1" # This folder name is from your setup
 LAYER_ID   = 4
 SEED       = 0
 CLF_NAME   = "linear"
@@ -127,14 +127,14 @@ def create_empty_plotly_figure(message="N/A"):
         )
     return fig
-# --- Core Analysis Function (returns 7 items for Gradio UI) ---
 def analyze_sentence_for_gradio(sentence_text, top_k_value):
     if not MODELS_LOADED_SUCCESSFULLY:
-        error_html = f"<p style='color:red;'>Initialization Error: {html.escape(MODEL_LOADING_ERROR_MESSAGE)}</p>"
         empty_df = pd.DataFrame(columns=['token', 'score'])
         empty_fig = create_empty_plotly_figure("Model Loading Failed")
         error_label_output = {"Status": "Error", "Message": "Model Loading Failed. Check logs."}
-        return error_html, [], "Model Loading Failed", error_label_output, [], empty_df, empty_fig
     try:
         tokenizer, model = TOKENIZER_GLOBAL, MODEL_GLOBAL
@@ -147,7 +147,7 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
             empty_df = pd.DataFrame(columns=['token', 'score'])
             empty_fig = create_empty_plotly_figure("Invalid Input")
             error_label_output = {"Status": "Error", "Message": "Invalid input, no valid tokens."}
-            return "<p style='color:orange;'>Input Error: No valid tokens found.</p>", [], "Input Error", error_label_output, [], empty_df, empty_fig
         input_embeds_detached = model.embeddings.word_embeddings(input_ids).clone().detach()
         input_embeds_for_grad = input_embeds_detached.clone().requires_grad_(True)
@@ -167,7 +167,7 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
             empty_df = pd.DataFrame(columns=['token', 'score'])
             empty_fig = create_empty_plotly_figure("Gradient Error")
             error_label_output = {"Status": "Error", "Message": "Gradient calculation failed."}
-            return "<p style='color:red;'>Analysis Error: Gradient calculation failed.</p>", [],"Analysis Error", error_label_output, [], empty_df, empty_fig
         grads = input_embeds_for_grad.grad.clone().detach()
         scores = (grads * input_embeds_detached).norm(dim=2).squeeze(0)
@@ -180,7 +180,8 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
         actual_scores_np = scores_np[:len(actual_tokens)]
         actual_input_embeds = input_embeds_detached[0, :len(actual_tokens), :].cpu().numpy()
-        html_tokens_list, highlighted_text_data = [], []
         cls_token_id, sep_token_id = tokenizer.cls_token_id, tokenizer.sep_token_id
         for i, tok_str in enumerate(actual_tokens):
@@ -190,15 +191,10 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
             current_token_id = input_ids[0, i].item()
             if current_token_id == cls_token_id or current_token_id == sep_token_id:
-                html_tokens_list.append(f"<span style='font-weight:bold;'>{html.escape(clean_tok_str)}</span>")
                 highlighted_text_data.append((clean_tok_str + " ", None))
             else:
-                color = f"rgba(220, 50, 50, {current_score_clipped:.2f})"
-                html_tokens_list.append(f"<span style='background-color:{color}; color:white; padding: 1px 3px; margin: 1px; border-radius: 4px; display:inline-block;'>{html.escape(clean_tok_str)}</span>")
                 highlighted_text_data.append((clean_tok_str + " ", round(current_score_clipped, 3)))
-        html_output_str = " ".join(html_tokens_list).replace(" ##", "")
         top_tokens_for_df, top_tokens_for_barplot_list = [], []
         valid_indices = [idx for idx, token_id in enumerate(input_ids[0,:len(actual_tokens)].tolist())
                          if token_id not in [cls_token_id, sep_token_id]]
@@ -230,22 +226,22 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
                 token_embeddings_3d = pca.fit_transform(pca_embeddings)
                 pca_fig = plot_token_pca_3d_plotly(token_embeddings_3d, pca_tokens, pca_scores_for_plot)
-        return (html_output_str, highlighted_text_data,
                 prediction_summary_text, prediction_details_for_label,
                 top_tokens_for_df, barplot_df,
-                pca_fig)
     except Exception as e:
         import traceback
         tb_str = traceback.format_exc()
-        error_html = f"<p style='color:red;'>Analysis Error: {html.escape(str(e))}</p><pre>{html.escape(tb_str)}</pre>"
         print(f"analyze_sentence_for_gradio error: {e}\n{tb_str}")
         empty_df = pd.DataFrame(columns=['token', 'score'])
         empty_fig = create_empty_plotly_figure("Analysis Error")
         error_label_output = {"Status": "Error", "Message": f"Analysis failed: {str(e)}"}
-        return error_html, [], "Analysis Failed", error_label_output, [], empty_df, empty_fig
-# --- Gradio UI Definition (Tabs removed, visualizations shown sequentially or in rows) ---
 theme = gr.themes.Monochrome(
     primary_hue=gr.themes.colors.blue,
     secondary_hue=gr.themes.colors.sky,
@@ -262,7 +258,6 @@ with gr.Blocks(title="AI Sentence Analyzer XAI 🚀", theme=theme, css=".gradio-
     gr.Markdown("Analyze English sentences to understand BERT model predictions through various XAI visualization techniques. "
                 "Explore token importance and their distribution in the embedding space.")
-    # Inputs and Summary Outputs Row
     with gr.Row(equal_height=False):
         with gr.Column(scale=1, min_width=350):
             with gr.Group():
@@ -278,18 +273,13 @@ with gr.Blocks(title="AI Sentence Analyzer XAI 🚀", theme=theme, css=".gradio-
             with gr.Accordion("⭐ Top-K Important Tokens (Table)", open=True):
                 output_top_tokens_df = gr.DataFrame(headers=["Token", "Score"], label="Most Important Tokens",
                                                     row_count=(1,"dynamic"), col_count=(2,"fixed"), interactive=False, wrap=True)
-    gr.Markdown("---") # Separator
-    # Visualization Section Title
     gr.Markdown("## 📊 Detailed Visualizations")
-    # HTML Highlight (Custom) - Full Width
-    with gr.Group():
-        gr.Markdown("### 🎨 HTML Highlight (Custom)")
-        output_html_visualization = gr.HTML(label="Token Importance (Gradient x Input based)")
-    # Highlighted Text (Gradio) - Full Width
-    with gr.Group():
         gr.Markdown("### 🖍️ Highlighted Text (Gradio)")
         output_highlighted_text = gr.HighlightedText(
             label="Token Importance (Score: 0-1)",
@@ -297,9 +287,8 @@ with gr.Blocks(title="AI Sentence Analyzer XAI 🚀", theme=theme, css=".gradio-
             combine_adjacent=False
         )
-    # BarPlot and PCA Plot Side-by-Side
-    with gr.Row():
-        with gr.Column(scale=1, min_width=400): # Adjusted min_width for BarPlot
             with gr.Group():
                 gr.Markdown("### 📊 Top-K Bar Plot")
                 output_top_tokens_barplot = gr.BarPlot(
@@ -307,14 +296,14 @@ with gr.Blocks(title="AI Sentence Analyzer XAI 🚀", theme=theme, css=".gradio-
                     x="token",
                     y="score",
                     tooltip=['token', 'score'],
-                    min_width=300 # BarPlot itself can define min_width
                 )
-        with gr.Column(scale=1, min_width=400): # Adjusted min_width for PCA
             with gr.Group():
                 gr.Markdown("### 🌐 Token Embeddings 3D PCA (Interactive)")
                 output_pca_plot = gr.Plot(label="3D PCA of Token Embeddings (Colored by Importance Score)")
-    gr.Markdown("---") # Separator
     gr.Examples(
         examples=[
@@ -323,8 +312,8 @@ with gr.Blocks(title="AI Sentence Analyzer XAI 🚀", theme=theme, css=".gradio-
             ["I was thoroughly disappointed with the lackluster performance and predictable plot.", 4]
         ],
         inputs=[input_sentence, input_top_k],
-        outputs=[
-            output_html_visualization, output_highlighted_text,
             output_prediction_summary, output_prediction_details,
             output_top_tokens_df, output_top_tokens_barplot,
             output_pca_plot
@@ -337,8 +326,8 @@ with gr.Blocks(title="AI Sentence Analyzer XAI 🚀", theme=theme, css=".gradio-
     submit_button.click(
         fn=analyze_sentence_for_gradio,
         inputs=[input_sentence, input_top_k],
-        outputs=[
-            output_html_visualization, output_highlighted_text,
             output_prediction_summary, output_prediction_details,
             output_top_tokens_df, output_top_tokens_barplot,
             output_pca_plot

 import joblib
 import torch
 import numpy as np
+import html # 여전히 highlighted_text_data 생성 시 html.escape를 사용할 수 있으므로 유지
 from transformers import AutoTokenizer, AutoModel, logging as hf_logging
 import pandas as pd
 import matplotlib
 MODEL_NAME = "bert-base-uncased"
 DEVICE     = "cpu"
+SAVE_DIR   = "저장저장1"
 LAYER_ID   = 4
 SEED       = 0
 CLF_NAME   = "linear"
         )
     return fig
+# --- Core Analysis Function (returns 6 items for Gradio UI) ---
 def analyze_sentence_for_gradio(sentence_text, top_k_value):
     if not MODELS_LOADED_SUCCESSFULLY:
+        # HTML output removed, adjust error return
         empty_df = pd.DataFrame(columns=['token', 'score'])
         empty_fig = create_empty_plotly_figure("Model Loading Failed")
         error_label_output = {"Status": "Error", "Message": "Model Loading Failed. Check logs."}
+        return [], "Model Loading Failed", error_label_output, [], empty_df, empty_fig # 6 items
     try:
         tokenizer, model = TOKENIZER_GLOBAL, MODEL_GLOBAL
             empty_df = pd.DataFrame(columns=['token', 'score'])
             empty_fig = create_empty_plotly_figure("Invalid Input")
             error_label_output = {"Status": "Error", "Message": "Invalid input, no valid tokens."}
+            return [], "Input Error", error_label_output, [], empty_df, empty_fig # 6 items
         input_embeds_detached = model.embeddings.word_embeddings(input_ids).clone().detach()
         input_embeds_for_grad = input_embeds_detached.clone().requires_grad_(True)
             empty_df = pd.DataFrame(columns=['token', 'score'])
             empty_fig = create_empty_plotly_figure("Gradient Error")
             error_label_output = {"Status": "Error", "Message": "Gradient calculation failed."}
+            return [],"Analysis Error", error_label_output, [], empty_df, empty_fig # 6 items
         grads = input_embeds_for_grad.grad.clone().detach()
         scores = (grads * input_embeds_detached).norm(dim=2).squeeze(0)
         actual_scores_np = scores_np[:len(actual_tokens)]
         actual_input_embeds = input_embeds_detached[0, :len(actual_tokens), :].cpu().numpy()
+        # HTML generation logic removed
+        highlighted_text_data = []
         cls_token_id, sep_token_id = tokenizer.cls_token_id, tokenizer.sep_token_id
         for i, tok_str in enumerate(actual_tokens):
             current_token_id = input_ids[0, i].item()
             if current_token_id == cls_token_id or current_token_id == sep_token_id:
                 highlighted_text_data.append((clean_tok_str + " ", None))
             else:
                 highlighted_text_data.append((clean_tok_str + " ", round(current_score_clipped, 3)))
         top_tokens_for_df, top_tokens_for_barplot_list = [], []
         valid_indices = [idx for idx, token_id in enumerate(input_ids[0,:len(actual_tokens)].tolist())
                          if token_id not in [cls_token_id, sep_token_id]]
                 token_embeddings_3d = pca.fit_transform(pca_embeddings)
                 pca_fig = plot_token_pca_3d_plotly(token_embeddings_3d, pca_tokens, pca_scores_for_plot)
+        return (highlighted_text_data, # HTML output removed
                 prediction_summary_text, prediction_details_for_label,
                 top_tokens_for_df, barplot_df,
+                pca_fig) # 6 items
     except Exception as e:
         import traceback
         tb_str = traceback.format_exc()
+        # HTML output removed
         print(f"analyze_sentence_for_gradio error: {e}\n{tb_str}")
         empty_df = pd.DataFrame(columns=['token', 'score'])
         empty_fig = create_empty_plotly_figure("Analysis Error")
         error_label_output = {"Status": "Error", "Message": f"Analysis failed: {str(e)}"}
+        return [], "Analysis Failed", error_label_output, [], empty_df, empty_fig # 6 items
+# --- Gradio UI Definition (HTML Highlight Tab removed) ---
 theme = gr.themes.Monochrome(
     primary_hue=gr.themes.colors.blue,
     secondary_hue=gr.themes.colors.sky,
     gr.Markdown("Analyze English sentences to understand BERT model predictions through various XAI visualization techniques. "
                 "Explore token importance and their distribution in the embedding space.")
     with gr.Row(equal_height=False):
         with gr.Column(scale=1, min_width=350):
             with gr.Group():
             with gr.Accordion("⭐ Top-K Important Tokens (Table)", open=True):
                 output_top_tokens_df = gr.DataFrame(headers=["Token", "Score"], label="Most Important Tokens",
                                                     row_count=(1,"dynamic"), col_count=(2,"fixed"), interactive=False, wrap=True)
+    gr.Markdown("---")
     gr.Markdown("## 📊 Detailed Visualizations")
+    # HTML Highlight (Custom) section removed
+    with gr.Group(): # HighlightedText
         gr.Markdown("### 🖍️ Highlighted Text (Gradio)")
         output_highlighted_text = gr.HighlightedText(
             label="Token Importance (Score: 0-1)",
             combine_adjacent=False
         )
+    with gr.Row(): # BarPlot and PCA Plot Side-by-Side
+        with gr.Column(scale=1, min_width=400):
             with gr.Group():
                 gr.Markdown("### 📊 Top-K Bar Plot")
                 output_top_tokens_barplot = gr.BarPlot(
                     x="token",
                     y="score",
                     tooltip=['token', 'score'],
+                    min_width=300
                 )
+        with gr.Column(scale=1, min_width=400):
             with gr.Group():
                 gr.Markdown("### 🌐 Token Embeddings 3D PCA (Interactive)")
                 output_pca_plot = gr.Plot(label="3D PCA of Token Embeddings (Colored by Importance Score)")
+    gr.Markdown("---")
     gr.Examples(
         examples=[
             ["I was thoroughly disappointed with the lackluster performance and predictable plot.", 4]
         ],
         inputs=[input_sentence, input_top_k],
+        outputs=[ # output_html_visualization removed
+            output_highlighted_text,
             output_prediction_summary, output_prediction_details,
             output_top_tokens_df, output_top_tokens_barplot,
             output_pca_plot
     submit_button.click(
         fn=analyze_sentence_for_gradio,
         inputs=[input_sentence, input_top_k],
+        outputs=[ # output_html_visualization removed
+            output_highlighted_text,
             output_prediction_summary, output_prediction_details,
             output_top_tokens_df, output_top_tokens_barplot,
             output_pca_plot