import pandas as pd # Load .txt or .csv with prompt,label format df = pd.read_csv("data/injection_prompts.txt", names=["prompt", "label"]) # Optional: strip quotes df["prompt"] = df["prompt"].str.strip('"') # Map labels to numeric df["label"] = df["label"].map({"safe": 0, "unsafe": 1}) # Shuffle the dataset for good measure df = df.sample(frac=1).reset_index(drop=True) # Check stats print(" Dataset Loaded") print(df["label"].value_counts()) # Preview print(df.head()) # Save to CSV (optional) df.to_csv("data/cleaned_injection_prompts.csv", index=False)