Spaces:

kikikara
/

TUFA-Explainable_AI

Sleeping

App Files Files Community

kikikara commited on Jun 4

Commit

2b56bba

verified ·

1 Parent(s): 1613a6f

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -21

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ import pandas as pd
 import matplotlib
 matplotlib.use('Agg') # Matplotlib 백엔드 설정 (Gradio와 함께 사용 시 중요)
 import matplotlib.pyplot as plt
-# from mpl_toolkits.mplot3d import Axes3D # 3D 플롯에 Axes3D 명시적 임포트는 최신 matplotlib에서 필수는 아닐 수 있음
 from sklearn.decomposition import PCA
 # --- 기존 설정 및 전역 모델 로드 부분 ---
@@ -55,7 +54,7 @@ try:
     TOKENIZER_GLOBAL = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
     MODEL_GLOBAL     = AutoModel.from_pretrained(
-        MODEL_NAME, output_hidden_states=True, output_attentions=False # 어텐션 불필요
     ).to(DEVICE).eval()
     if hasattr(lda, 'classes_'): CLASS_NAMES_GLOBAL = lda.classes_
@@ -75,14 +74,13 @@ def plot_token_pca_3d(token_embeddings_3d, tokens, scores, title="Token Embeddin
     ax = fig.add_subplot(111, projection='3d')
     num_annotations = min(len(tokens), 15)
-    # 점수가 높은 순으로 정렬하여 어노테이트할 인덱스 선택 (중요도 높은 토큰 위주)
-    # scores가 NumPy 배열이라고 가정
-    if len(scores) > 0:
-        indices_to_annotate = np.argsort(scores)[-num_annotations:]
-    else: # scores가 비어있거나 문제가 있는 경우
         indices_to_annotate = np.array([])
     scatter = ax.scatter(token_embeddings_3d[:, 0], token_embeddings_3d[:, 1], token_embeddings_3d[:, 2],
                          c=scores, cmap="coolwarm_r", s=50, alpha=0.8, depthshade=True)
@@ -110,10 +108,6 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
         ax = fig.add_subplot(111)
         ax.text(0.5, 0.5, message, ha='center', va='center', fontsize=10)
         ax.axis('off')
-        # Gradio가 Figure 객체를 처리하므로, 여기서는 close를 호출하지 않거나
-        # Gradio의 Plot 컴포넌트가 Figure를 어떻게 다루는지 확인 필요.
-        # 일반적으로는 close하지 않고 Figure 객체 자체를 반환합니다.
-        # plt.close(fig) # 일단 주석 처리하여 Gradio가 Figure를 받도록 함
         return fig
     if not MODELS_LOADED_SUCCESSFULLY:
@@ -204,23 +198,20 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
         prediction_summary_text = f"클래스: {predicted_class_label_str}\n확률: {pred_prob_val:.3f}"
         prediction_details_for_label = {"예측 클래스": predicted_class_label_str, "확률": f"{pred_prob_val:.3f}"}
-        pca_fig = create_empty_plot("PCA Plot N/A\n(Not enough non-special tokens for 3D)") # 기본 빈 플롯
         non_special_token_indices = [idx for idx, token_id in enumerate(input_ids[0,:len(actual_tokens)].tolist())
                                      if token_id not in [cls_token_id, sep_token_id]]
         if len(non_special_token_indices) >= 3 :
             pca_tokens = [actual_tokens[i] for i in non_special_token_indices]
-            # non_special_token_indices에 해당하는 임베딩과 점수만 추출
-            if len(pca_tokens) > 0: # pca_tokens가 비어있지 않은지 확인
                 pca_embeddings = actual_input_embeds[non_special_token_indices, :]
                 pca_scores = actual_scores_np[non_special_token_indices]
                 pca = PCA(n_components=3, random_state=SEED)
                 token_embeddings_3d = pca.fit_transform(pca_embeddings)
-                # 이전 그림이 있다면 닫고 새로 그림 (Gradio Plot이 Figure 객체를 직접 받으므로)
-                plt.close(pca_fig)
                 pca_fig = plot_token_pca_3d(token_embeddings_3d, pca_tokens, pca_scores)
-            # else: pca_fig는 이미 위에서 빈 플롯으로 초기화됨
         return (html_output_str, highlighted_text_data,
                 prediction_summary_text, prediction_details_for_label,
@@ -236,7 +227,6 @@ def analyze_sentence_for_gradio(sentence_text, top_k_value):
         empty_fig_placeholder = create_empty_plot("Error during plot generation")
         return error_html, [], "분석 실패", {"오류": str(e)}, [], empty_df, empty_fig_placeholder
 # ────────── Gradio 인터페이스 정의 ──────────
 theme = gr.themes.Glass(primary_hue="blue", secondary_hue="cyan", neutral_hue="sky").set(
     body_background_fill="linear-gradient(to right, #c9d6ff, #e2e2e2)",
@@ -279,7 +269,7 @@ with gr.Blocks(title="AI 문장 분석기 XAI 🚀", theme=theme, css=".gradio-c
                 label="Top-K 토큰 중요도",
                 x="token",
                 y="score",
-                tooltip=['token', 'score'], # 수정된 부분: 문제가 된 파라미터 삭제
                 min_width=300
             )
         with gr.TabItem("🌐 토큰 임베딩 3D PCA", id=3):
@@ -302,7 +292,8 @@ with gr.Blocks(title="AI 문장 분석기 XAI 🚀", theme=theme, css=".gradio-c
         fn=analyze_sentence_for_gradio,
         cache_examples=False
     )
-    gr.Markdown("<p style='text-align: center; color: #666;'>Explainable AI Demo with Gradio & Transformers</p>", unsafe_allow_html=True)
     submit_button.click(
         fn=analyze_sentence_for_gradio,

 import matplotlib
 matplotlib.use('Agg') # Matplotlib 백엔드 설정 (Gradio와 함께 사용 시 중요)
 import matplotlib.pyplot as plt
 from sklearn.decomposition import PCA
 # --- 기존 설정 및 전역 모델 로드 부분 ---
     TOKENIZER_GLOBAL = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
     MODEL_GLOBAL     = AutoModel.from_pretrained(
+        MODEL_NAME, output_hidden_states=True, output_attentions=False
     ).to(DEVICE).eval()
     if hasattr(lda, 'classes_'): CLASS_NAMES_GLOBAL = lda.classes_
     ax = fig.add_subplot(111, projection='3d')
     num_annotations = min(len(tokens), 15)
+    if len(scores) > 0 and len(tokens) > 0: # scores와 tokens가 비어있지 않은지 확인
+        # scores가 NumPy 배열이 아닐 수 있으므로, 리스트인 경우 np.array로 변환
+        scores_np_array = np.array(scores)
+        indices_to_annotate = np.argsort(scores_np_array)[-num_annotations:]
+    else:
         indices_to_annotate = np.array([])
     scatter = ax.scatter(token_embeddings_3d[:, 0], token_embeddings_3d[:, 1], token_embeddings_3d[:, 2],
                          c=scores, cmap="coolwarm_r", s=50, alpha=0.8, depthshade=True)
         ax = fig.add_subplot(111)
         ax.text(0.5, 0.5, message, ha='center', va='center', fontsize=10)
         ax.axis('off')
         return fig
     if not MODELS_LOADED_SUCCESSFULLY:
         prediction_summary_text = f"클래스: {predicted_class_label_str}\n확률: {pred_prob_val:.3f}"
         prediction_details_for_label = {"예측 클래스": predicted_class_label_str, "확률": f"{pred_prob_val:.3f}"}
+        pca_fig = create_empty_plot("PCA Plot N/A\n(Not enough non-special tokens for 3D)")
         non_special_token_indices = [idx for idx, token_id in enumerate(input_ids[0,:len(actual_tokens)].tolist())
                                      if token_id not in [cls_token_id, sep_token_id]]
         if len(non_special_token_indices) >= 3 :
             pca_tokens = [actual_tokens[i] for i in non_special_token_indices]
+            if len(pca_tokens) > 0:
                 pca_embeddings = actual_input_embeds[non_special_token_indices, :]
                 pca_scores = actual_scores_np[non_special_token_indices]
                 pca = PCA(n_components=3, random_state=SEED)
                 token_embeddings_3d = pca.fit_transform(pca_embeddings)
+                # plt.close(pca_fig) # 이전 빈 그림 닫기
                 pca_fig = plot_token_pca_3d(token_embeddings_3d, pca_tokens, pca_scores)
         return (html_output_str, highlighted_text_data,
                 prediction_summary_text, prediction_details_for_label,
         empty_fig_placeholder = create_empty_plot("Error during plot generation")
         return error_html, [], "분석 실패", {"오류": str(e)}, [], empty_df, empty_fig_placeholder
 # ────────── Gradio 인터페이스 정의 ──────────
 theme = gr.themes.Glass(primary_hue="blue", secondary_hue="cyan", neutral_hue="sky").set(
     body_background_fill="linear-gradient(to right, #c9d6ff, #e2e2e2)",
                 label="Top-K 토큰 중요도",
                 x="token",
                 y="score",
+                tooltip=['token', 'score'], # SyntaxError 수정됨
                 min_width=300
             )
         with gr.TabItem("🌐 토큰 임베딩 3D PCA", id=3):
         fn=analyze_sentence_for_gradio,
         cache_examples=False
     )
+    # gr.Markdown을 gr.HTML로 변경하여 HTML 태그 직접 사용
+    gr.HTML("<p style='text-align: center; color: #666;'>Explainable AI Demo with Gradio & Transformers</p>")
     submit_button.click(
         fn=analyze_sentence_for_gradio,