|
import streamlit as st |
|
import sys |
|
import os |
|
from fastapi import FastAPI |
|
from pydantic import BaseModel |
|
import threading |
|
import uvicorn |
|
|
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))) |
|
|
|
from app.utils.hf_model_wrapper import classify_prompt |
|
|
|
|
|
|
|
|
|
api = FastAPI() |
|
|
|
class PromptRequest(BaseModel): |
|
prompt: str |
|
|
|
@api.post("/classify") |
|
async def classify_endpoint(data: PromptRequest): |
|
label, confidence = classify_prompt(data.prompt) |
|
return {"label": label, "confidence": confidence} |
|
|
|
def run_api(): |
|
uvicorn.run(api, host="0.0.0.0", port=8000) |
|
|
|
|
|
threading.Thread(target=run_api, daemon=True).start() |
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="LLMGuard β Prompt Moderation", layout="centered") |
|
st.title("π‘οΈ LLMGuard β Prompt Moderation Tool") |
|
|
|
st.markdown( |
|
""" |
|
Enter a user prompt below. This tool will classify it using your custom injection detection model. |
|
- **Injection**: Detected as prompt injection attempt |
|
- **Safe**: Normal prompt |
|
""" |
|
) |
|
|
|
|
|
user_input = st.text_area("βοΈ User Prompt", placeholder="Enter your prompt here...", height=150) |
|
|
|
|
|
if "history" not in st.session_state: |
|
st.session_state.history = [] |
|
|
|
|
|
if st.button("π Moderate"): |
|
if user_input.strip(): |
|
label, confidence = classify_prompt(user_input) |
|
|
|
st.markdown(f"### π§Ύ Result: **{label}**") |
|
st.progress(min(confidence, 1.0), text=f"Confidence: {confidence:.2f}") |
|
|
|
|
|
st.session_state.history.insert(0, { |
|
"prompt": user_input, |
|
"label": label, |
|
"confidence": round(confidence, 3) |
|
}) |
|
else: |
|
st.warning("Please enter a prompt.") |
|
|
|
|
|
if st.session_state.history: |
|
st.markdown("---") |
|
st.subheader("π Moderation History") |
|
for i, entry in enumerate(st.session_state.history): |
|
with st.expander(f"π Prompt {i+1}: {entry['label']} (Confidence: {entry['confidence']})"): |
|
st.code(entry["prompt"], language="text") |
|
|