Spaces:

simondh
/

classifieur

Sleeping

simondh commited on Apr 15

Commit

442b8d8

1 Parent(s): d3bdf42

isolate prompts

Files changed (2) hide show

prompts.py CHANGED Viewed

@@ -61,3 +61,18 @@ Example texts:
 Return your answer as a comma-separated list of new category names only.
 """

 Return your answer as a comma-separated list of new category names only.
 """
+# Validation prompt
+VALIDATION_PROMPT = """
+As a validation expert, review the following text classifications and provide feedback.
+For each text, assess whether the assigned category seems appropriate:
+{}
+Provide a brief validation report with:
+1. Overall accuracy assessment (0-100%)
+2. Any potential misclassifications identified
+3. Suggestions for improvement
+Keep your response under 300 words.
+"""

utils.py CHANGED Viewed

@@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
 from sklearn.decomposition import PCA
 from sklearn.feature_extraction.text import TfidfVectorizer
 import tempfile
 def load_data(file_path):
@@ -133,7 +134,7 @@ def validate_results(df, text_columns, client):
         sample_size = min(5, len(df))
         sample_df = df.sample(n=sample_size, random_state=42)
-        # Build validation prompt
         validation_prompts = []
         for _, row in sample_df.iterrows():
             # Combine text from all selected columns
@@ -145,21 +146,8 @@ def validate_results(df, text_columns, client):
                 f"Text: {text}\nAssigned Category: {assigned_category}\nConfidence: {confidence}\n"
             )
-        prompt = """
-        As a validation expert, review the following text classifications and provide feedback.
-        For each text, assess whether the assigned category seems appropriate:
-        {}
-        Provide a brief validation report with:
-        1. Overall accuracy assessment (0-100%)
-        2. Any potential misclassifications identified
-        3. Suggestions for improvement
-        Keep your response under 300 words.
-        """.format(
-            "\n---\n".join(validation_prompts)
-        )
         # Call LLM API
         response = client.chat.completions.create(

 from sklearn.decomposition import PCA
 from sklearn.feature_extraction.text import TfidfVectorizer
 import tempfile
+from prompts import VALIDATION_PROMPT
 def load_data(file_path):
         sample_size = min(5, len(df))
         sample_df = df.sample(n=sample_size, random_state=42)
+        # Build validation prompts
         validation_prompts = []
         for _, row in sample_df.iterrows():
             # Combine text from all selected columns
                 f"Text: {text}\nAssigned Category: {assigned_category}\nConfidence: {confidence}\n"
             )
+        # Use the prompt from prompts.py
+        prompt = VALIDATION_PROMPT.format("\n---\n".join(validation_prompts))
         # Call LLM API
         response = client.chat.completions.create(