Spaces:

protectai
/

llm-guard-playground

Running

App Files Files Community

asofter commited on Nov 9, 2023

Commit

a6b53fb

1 Parent(s): 8950a27

* ONNX runtime

Browse files

* use llm-guard 0.3.1
* google analytics tracking
* linter to fix code

Files changed (6) hide show

.pre-commit-config.yaml +38 -0
Dockerfile +1 -1
app.py +28 -19
output.py +43 -27
prompt.py +25 -45
requirements.txt +4 -5

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+        types: [ python ]
+      - id: requirements-txt-fixer
+  - repo: https://github.com/psf/black
+    rev: 23.7.0
+    hooks:
+      - id: black
+        args: [ --line-length=100, --exclude="" ]
+  # this is not technically always safe but usually is
+  # use comments `# isort: off` and `# isort: on` to disable/re-enable isort
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: [ --line-length=100, --profile=black ]
+  # this is slightly dangerous because python imports have side effects
+  # and this tool removes unused imports, which may be providing
+  # necessary side effects for the code to run
+  - repo: https://github.com/PyCQA/autoflake
+    rev: v2.2.0
+    hooks:
+      - id: autoflake
+        args:
+          - "--in-place"
+          - "--expand-star-imports"
+          - "--remove-duplicate-keys"
+          - "--remove-unused-variables"
+          - "--remove-all-unused-imports"

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM python:3.10-slim
 RUN apt-get update && apt-get install -y \
     build-essential \

+FROM python:3.11-slim
 RUN apt-get update && apt-get install -y \
     build-essential \

app.py CHANGED Viewed

@@ -1,16 +1,33 @@
 import logging
-import time
 import traceback
-from datetime import timedelta
 import pandas as pd
 import streamlit as st
 from output import init_settings as init_output_settings
 from output import scan as scan_output
 from prompt import init_settings as init_prompt_settings
 from prompt import scan as scan_prompt
-from llm_guard.vault import Vault
 PROMPT = "prompt"
 OUTPUT = "output"
@@ -48,6 +65,8 @@ if scanner_type == PROMPT:
 elif scanner_type == OUTPUT:
     enabled_scanners, settings = init_output_settings()
 # Main pannel
 with st.expander("About", expanded=False):
     st.info(
@@ -93,32 +112,24 @@ elif scanner_type == OUTPUT:
 st_result_text = None
 st_analysis = None
 st_is_valid = None
-st_time_delta = None
 try:
     with st.form("text_form", clear_on_submit=False):
         submitted = st.form_submit_button("Process")
         if submitted:
-            results_valid = {}
-            results_score = {}
-            start_time = time.monotonic()
             if scanner_type == PROMPT:
-                st_result_text, results_valid, results_score = scan_prompt(
                     vault, enabled_scanners, settings, st_prompt_text, st_fail_fast
                 )
             elif scanner_type == OUTPUT:
-                st_result_text, results_valid, results_score = scan_output(
                     vault, enabled_scanners, settings, st_prompt_text, st_output_text, st_fail_fast
                 )
-            end_time = time.monotonic()
-            st_time_delta = timedelta(seconds=end_time - start_time)
-            st_is_valid = all(results_valid.values())
-            st_analysis = [
-                {"scanner": k, "is valid": results_valid[k], "risk score": results_score[k]}
-                for k in results_valid
-            ]
 except Exception as e:
     logger.error(e)
@@ -127,9 +138,7 @@ except Exception as e:
 # After:
 if st_is_valid is not None:
-    st.subheader(
-        f"Results - {'valid' if st_is_valid else 'invalid'} ({round(st_time_delta.total_seconds())} seconds)"
-    )
     col1, col2 = st.columns(2)

 import logging
 import traceback
 import pandas as pd
 import streamlit as st
+from llm_guard.vault import Vault
+from streamlit.components.v1 import html
 from output import init_settings as init_output_settings
 from output import scan as scan_output
 from prompt import init_settings as init_prompt_settings
 from prompt import scan as scan_prompt
+def add_google_analytics(ga4_id):
+    """
+    Add Google Analytics 4 to a Streamlit app
+    """
+    ga_code = f"""
+    <script async src="https://www.googletagmanager.com/gtag/js?id={ga4_id}"></script>
+    <script>
+      window.dataLayer = window.dataLayer || [];
+      function gtag(){{dataLayer.push(arguments);}}
+      gtag('js', new Date());
+      gtag('config', '{ga4_id}');
+    </script>
+    """
+    html(ga_code)
 PROMPT = "prompt"
 OUTPUT = "output"
 elif scanner_type == OUTPUT:
     enabled_scanners, settings = init_output_settings()
+add_google_analytics("G-0HBVNHEZBW")
 # Main pannel
 with st.expander("About", expanded=False):
     st.info(
 st_result_text = None
 st_analysis = None
 st_is_valid = None
 try:
     with st.form("text_form", clear_on_submit=False):
         submitted = st.form_submit_button("Process")
         if submitted:
+            results = {}
             if scanner_type == PROMPT:
+                st_result_text, results = scan_prompt(
                     vault, enabled_scanners, settings, st_prompt_text, st_fail_fast
                 )
             elif scanner_type == OUTPUT:
+                st_result_text, results = scan_output(
                     vault, enabled_scanners, settings, st_prompt_text, st_output_text, st_fail_fast
                 )
+            st_is_valid = all(item["is_valid"] for item in results)
+            st_analysis = results
 except Exception as e:
     logger.error(e)
 # After:
 if st_is_valid is not None:
+    st.subheader(f"Results - {'valid' if st_is_valid else 'invalid'}")
     col1, col2 = st.columns(2)

output.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import logging
 from typing import Dict, List
 import streamlit as st
-from streamlit_tags import st_tags
 from llm_guard.input_scanners.anonymize import default_entity_types
 from llm_guard.output_scanners import (
     JSON,
@@ -12,11 +12,11 @@ from llm_guard.output_scanners import (
     Bias,
     Code,
     Deanonymize,
     Language,
     LanguageSame,
     MaliciousURLs,
     NoRefusal,
-    Refutation,
     Regex,
     Relevance,
     Sensitive,
@@ -25,6 +25,7 @@ from llm_guard.output_scanners.relevance import all_models as relevance_models
 from llm_guard.output_scanners.sentiment import Sentiment
 from llm_guard.output_scanners.toxicity import Toxicity
 from llm_guard.vault import Vault
 logger = logging.getLogger("llm-guard-playground")
@@ -41,7 +42,7 @@ def init_settings() -> (List, Dict):
         "LanguageSame",
         "MaliciousURLs",
         "NoRefusal",
-        "Refutation",
         "Regex",
         "Relevance",
         "Sensitive",
@@ -163,7 +164,12 @@ def init_settings() -> (List, Dict):
                 help="The minimum number of JSON elements that should be present",
             )
-        settings["JSON"] = {"required_elements": st_json_required_elements}
     if "Language" in st_enabled_scanners:
         st_lan_expander = st.sidebar.expander(
@@ -274,23 +280,23 @@ def init_settings() -> (List, Dict):
         settings["NoRefusal"] = {"threshold": st_no_ref_threshold}
-    if "Refutation" in st_enabled_scanners:
-        st_refu_expander = st.sidebar.expander(
-            "Refutation",
             expanded=False,
         )
-        with st_refu_expander:
-            st_refu_threshold = st.slider(
-                label="Threshold",
                 value=0.5,
                 min_value=0.0,
                 max_value=1.0,
                 step=0.05,
-                key="refu_threshold",
             )
-        settings["Refutation"] = {"threshold": st_refu_threshold}
     if "Regex" in st_enabled_scanners:
         st_regex_expander = st.sidebar.expander(
@@ -359,7 +365,7 @@ def init_settings() -> (List, Dict):
                 key="sensitive_entity_types",
             )
             st.caption(
-                "Check all supported entities: https://microsoft.github.io/presidio/supported_entities/#list-of-supported-entities"
             )
             st_sens_redact = st.checkbox("Redact", value=False, key="sens_redact")
             st_sens_threshold = st.slider(
@@ -434,13 +440,13 @@ def get_scanner(scanner_name: str, vault: Vault, settings: Dict):
         return BanTopics(topics=settings["topics"], threshold=settings["threshold"])
     if scanner_name == "Bias":
-        return Bias(threshold=settings["threshold"])
     if scanner_name == "Deanonymize":
         return Deanonymize(vault=vault)
     if scanner_name == "JSON":
-        return JSON(required_elements=settings["required_elements"])
     if scanner_name == "Language":
         return Language(valid_languages=settings["valid_languages"])
@@ -458,16 +464,16 @@ def get_scanner(scanner_name: str, vault: Vault, settings: Dict):
         elif mode == "denied":
             denied_languages = settings["languages"]
-        return Code(allowed=allowed_languages, denied=denied_languages)
     if scanner_name == "MaliciousURLs":
-        return MaliciousURLs(threshold=settings["threshold"])
     if scanner_name == "NoRefusal":
         return NoRefusal(threshold=settings["threshold"])
-    if scanner_name == "Refutation":
-        return Refutation(threshold=settings["threshold"])
     if scanner_name == "Regex":
         match_type = settings["type"]
@@ -491,13 +497,14 @@ def get_scanner(scanner_name: str, vault: Vault, settings: Dict):
             entity_types=settings["entity_types"],
             redact=settings["redact"],
             threshold=settings["threshold"],
         )
     if scanner_name == "Sentiment":
         return Sentiment(threshold=settings["threshold"])
     if scanner_name == "Toxicity":
-        return Toxicity(threshold=settings["threshold"])
     raise ValueError("Unknown scanner name")
@@ -509,10 +516,9 @@ def scan(
     prompt: str,
     text: str,
     fail_fast: bool = False,
-) -> (str, Dict[str, bool], Dict[str, float]):
     sanitized_output = text
-    results_valid = {}
-    results_score = {}
     status_text = "Scanning prompt..."
     if fail_fast:
@@ -524,13 +530,23 @@ def scan(
             scanner = get_scanner(
                 scanner_name, vault, settings[scanner_name] if scanner_name in settings else {}
             )
             sanitized_output, is_valid, risk_score = scanner.scan(prompt, sanitized_output)
-            results_valid[scanner_name] = is_valid
-            results_score[scanner_name] = risk_score
             if fail_fast and not is_valid:
                 break
         status.update(label="Scanning complete", state="complete", expanded=False)
-    return sanitized_output, results_valid, results_score

 import logging
+import time
+from datetime import timedelta
 from typing import Dict, List
 import streamlit as st
 from llm_guard.input_scanners.anonymize import default_entity_types
 from llm_guard.output_scanners import (
     JSON,
     Bias,
     Code,
     Deanonymize,
+    FactualConsistency,
     Language,
     LanguageSame,
     MaliciousURLs,
     NoRefusal,
     Regex,
     Relevance,
     Sensitive,
 from llm_guard.output_scanners.sentiment import Sentiment
 from llm_guard.output_scanners.toxicity import Toxicity
 from llm_guard.vault import Vault
+from streamlit_tags import st_tags
 logger = logging.getLogger("llm-guard-playground")
         "LanguageSame",
         "MaliciousURLs",
         "NoRefusal",
+        "FactualConsistency",
         "Regex",
         "Relevance",
         "Sensitive",
                 help="The minimum number of JSON elements that should be present",
             )
+            st_json_repair = st.checkbox("Repair", value=False, help="Attempt to repair the JSON")
+        settings["JSON"] = {
+            "required_elements": st_json_required_elements,
+            "repair": st_json_repair,
+        }
     if "Language" in st_enabled_scanners:
         st_lan_expander = st.sidebar.expander(
         settings["NoRefusal"] = {"threshold": st_no_ref_threshold}
+    if "FactualConsistency" in st_enabled_scanners:
+        st_fc_expander = st.sidebar.expander(
+            "FactualConsistency",
             expanded=False,
         )
+        with st_fc_expander:
+            st_fc_minimum_score = st.slider(
+                label="Minimum score",
                 value=0.5,
                 min_value=0.0,
                 max_value=1.0,
                 step=0.05,
+                key="fc_threshold",
             )
+        settings["FactualConsistency"] = {"minimum_score": st_fc_minimum_score}
     if "Regex" in st_enabled_scanners:
         st_regex_expander = st.sidebar.expander(
                 key="sensitive_entity_types",
             )
             st.caption(
+                "Check all supported entities: https://llm-guard.com/input_scanners/anonymize/"
             )
             st_sens_redact = st.checkbox("Redact", value=False, key="sens_redact")
             st_sens_threshold = st.slider(
         return BanTopics(topics=settings["topics"], threshold=settings["threshold"])
     if scanner_name == "Bias":
+        return Bias(threshold=settings["threshold"], use_onnx=True)
     if scanner_name == "Deanonymize":
         return Deanonymize(vault=vault)
     if scanner_name == "JSON":
+        return JSON(required_elements=settings["required_elements"], repair=settings["repair"])
     if scanner_name == "Language":
         return Language(valid_languages=settings["valid_languages"])
         elif mode == "denied":
             denied_languages = settings["languages"]
+        return Code(allowed=allowed_languages, denied=denied_languages, use_onnx=True)
     if scanner_name == "MaliciousURLs":
+        return MaliciousURLs(threshold=settings["threshold"], use_onnx=True)
     if scanner_name == "NoRefusal":
         return NoRefusal(threshold=settings["threshold"])
+    if scanner_name == "FactualConsistency":
+        return FactualConsistency(minimum_score=settings["minimum_score"])
     if scanner_name == "Regex":
         match_type = settings["type"]
             entity_types=settings["entity_types"],
             redact=settings["redact"],
             threshold=settings["threshold"],
+            use_onnx=True,
         )
     if scanner_name == "Sentiment":
         return Sentiment(threshold=settings["threshold"])
     if scanner_name == "Toxicity":
+        return Toxicity(threshold=settings["threshold"], use_onnx=True)
     raise ValueError("Unknown scanner name")
     prompt: str,
     text: str,
     fail_fast: bool = False,
+) -> (str, List[Dict[str, any]]):
     sanitized_output = text
+    results = []
     status_text = "Scanning prompt..."
     if fail_fast:
             scanner = get_scanner(
                 scanner_name, vault, settings[scanner_name] if scanner_name in settings else {}
             )
+            start_time = time.monotonic()
             sanitized_output, is_valid, risk_score = scanner.scan(prompt, sanitized_output)
+            end_time = time.monotonic()
+            results.append(
+                {
+                    "scanner": scanner_name,
+                    "is_valid": is_valid,
+                    "risk_score": risk_score,
+                    "took_sec": round(timedelta(seconds=end_time - start_time).total_seconds(), 2),
+                }
+            )
             if fail_fast and not is_valid:
                 break
         status.update(label="Scanning complete", state="complete", expanded=False)
+    return sanitized_output, results

prompt.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import logging
 from typing import Dict, List
 import streamlit as st
-from streamlit_tags import st_tags
 from llm_guard.input_scanners import (
     Anonymize,
     BanSubstrings,
@@ -11,7 +11,6 @@ from llm_guard.input_scanners import (
     Code,
     Language,
     PromptInjection,
-    PromptInjectionV2,
     Regex,
     Secrets,
     Sentiment,
@@ -19,8 +18,9 @@ from llm_guard.input_scanners import (
     Toxicity,
 )
 from llm_guard.input_scanners.anonymize import default_entity_types
-from llm_guard.input_scanners.anonymize_helpers.analyzer import RECOGNIZER_SPACY_EN_PII_DISTILBERT, RECOGNIZER_SPACY_EN_PII_FAST
 from llm_guard.vault import Vault
 logger = logging.getLogger("llm-guard-playground")
@@ -33,7 +33,6 @@ def init_settings() -> (List, Dict):
         "Code",
         "Language",
         "PromptInjection",
-        "PromptInjectionV2",
         "Regex",
         "Secrets",
         "Sentiment",
@@ -67,7 +66,7 @@ def init_settings() -> (List, Dict):
                 key="anon_entity_types",
             )
             st.caption(
-                "Check all supported entities: https://microsoft.github.io/presidio/supported_entities/#list-of-supported-entities"
             )
             st_anon_hidden_names = st_tags(
                 label="Hidden names to be anonymized",
@@ -101,11 +100,6 @@ def init_settings() -> (List, Dict):
                 step=0.1,
                 key="anon_threshold",
             )
-            st_anon_recognizer = st.selectbox(
-                "Recognizer",
-                [RECOGNIZER_SPACY_EN_PII_DISTILBERT, RECOGNIZER_SPACY_EN_PII_FAST],
-                index=1,
-            )
         settings["Anonymize"] = {
             "entity_types": st_anon_entity_types,
@@ -114,7 +108,6 @@ def init_settings() -> (List, Dict):
             "preamble": st_anon_preamble,
             "use_faker": st_anon_use_faker,
             "threshold": st_anon_threshold,
-            "recognizer": st_anon_recognizer,
         }
     if "BanSubstrings" in st_enabled_scanners:
@@ -286,26 +279,6 @@ def init_settings() -> (List, Dict):
             "threshold": st_pi_threshold,
         }
-    if "PromptInjectionV2" in st_enabled_scanners:
-        st_piv2_expander = st.sidebar.expander(
-            "Prompt Injection V2",
-            expanded=False,
-        )
-        with st_piv2_expander:
-            st_piv2_threshold = st.slider(
-                label="Threshold",
-                value=0.5,
-                min_value=0.0,
-                max_value=1.0,
-                step=0.05,
-                key="prompt_injection_v2_threshold",
-            )
-        settings["PromptInjectionV2"] = {
-            "threshold": st_piv2_threshold,
-        }
     if "Regex" in st_enabled_scanners:
         st_regex_expander = st.sidebar.expander(
             "Regex",
@@ -427,7 +400,7 @@ def get_scanner(scanner_name: str, vault: Vault, settings: Dict):
             preamble=settings["preamble"],
             use_faker=settings["use_faker"],
             threshold=settings["threshold"],
-            recognizer=settings["recognizer"],
         )
     if scanner_name == "BanSubstrings":
@@ -452,16 +425,13 @@ def get_scanner(scanner_name: str, vault: Vault, settings: Dict):
         elif mode == "denied":
             denied_languages = settings["languages"]
-        return Code(allowed=allowed_languages, denied=denied_languages)
     if scanner_name == "Language":
         return Language(valid_languages=settings["valid_languages"])
     if scanner_name == "PromptInjection":
-        return PromptInjection(threshold=settings["threshold"])
-    if scanner_name == "PromptInjectionV2":
-        return PromptInjectionV2(threshold=settings["threshold"])
     if scanner_name == "Regex":
         match_type = settings["type"]
@@ -487,17 +457,16 @@ def get_scanner(scanner_name: str, vault: Vault, settings: Dict):
         return TokenLimit(limit=settings["limit"], encoding_name=settings["encoding_name"])
     if scanner_name == "Toxicity":
-        return Toxicity(threshold=settings["threshold"])
     raise ValueError("Unknown scanner name")
 def scan(
     vault: Vault, enabled_scanners: List[str], settings: Dict, text: str, fail_fast: bool = False
-) -> (str, Dict[str, bool], Dict[str, float]):
     sanitized_prompt = text
-    results_valid = {}
-    results_score = {}
     status_text = "Scanning prompt..."
     if fail_fast:
@@ -507,12 +476,23 @@ def scan(
         for scanner_name in enabled_scanners:
             st.write(f"{scanner_name} scanner...")
             scanner = get_scanner(scanner_name, vault, settings[scanner_name])
             sanitized_prompt, is_valid, risk_score = scanner.scan(sanitized_prompt)
-            results_valid[scanner_name] = is_valid
-            results_score[scanner_name] = risk_score
             if fail_fast and not is_valid:
                 break
         status.update(label="Scanning complete", state="complete", expanded=False)
-    return sanitized_prompt, results_valid, results_score

 import logging
+import time
+from datetime import timedelta
 from typing import Dict, List
 import streamlit as st
 from llm_guard.input_scanners import (
     Anonymize,
     BanSubstrings,
     Code,
     Language,
     PromptInjection,
     Regex,
     Secrets,
     Sentiment,
     Toxicity,
 )
 from llm_guard.input_scanners.anonymize import default_entity_types
+from llm_guard.input_scanners.prompt_injection import ALL_MODELS as PI_ALL_MODELS
 from llm_guard.vault import Vault
+from streamlit_tags import st_tags
 logger = logging.getLogger("llm-guard-playground")
         "Code",
         "Language",
         "PromptInjection",
         "Regex",
         "Secrets",
         "Sentiment",
                 key="anon_entity_types",
             )
             st.caption(
+                "Check all supported entities: https://llm-guard.com/input_scanners/anonymize/"
             )
             st_anon_hidden_names = st_tags(
                 label="Hidden names to be anonymized",
                 step=0.1,
                 key="anon_threshold",
             )
         settings["Anonymize"] = {
             "entity_types": st_anon_entity_types,
             "preamble": st_anon_preamble,
             "use_faker": st_anon_use_faker,
             "threshold": st_anon_threshold,
         }
     if "BanSubstrings" in st_enabled_scanners:
             "threshold": st_pi_threshold,
         }
     if "Regex" in st_enabled_scanners:
         st_regex_expander = st.sidebar.expander(
             "Regex",
             preamble=settings["preamble"],
             use_faker=settings["use_faker"],
             threshold=settings["threshold"],
+            use_onnx=True,
         )
     if scanner_name == "BanSubstrings":
         elif mode == "denied":
             denied_languages = settings["languages"]
+        return Code(allowed=allowed_languages, denied=denied_languages, use_onnx=True)
     if scanner_name == "Language":
         return Language(valid_languages=settings["valid_languages"])
     if scanner_name == "PromptInjection":
+        return PromptInjection(threshold=settings["threshold"], models=PI_ALL_MODELS, use_onnx=True)
     if scanner_name == "Regex":
         match_type = settings["type"]
         return TokenLimit(limit=settings["limit"], encoding_name=settings["encoding_name"])
     if scanner_name == "Toxicity":
+        return Toxicity(threshold=settings["threshold"], use_onnx=True)
     raise ValueError("Unknown scanner name")
 def scan(
     vault: Vault, enabled_scanners: List[str], settings: Dict, text: str, fail_fast: bool = False
+) -> (str, List[Dict[str, any]]):
     sanitized_prompt = text
+    results = []
     status_text = "Scanning prompt..."
     if fail_fast:
         for scanner_name in enabled_scanners:
             st.write(f"{scanner_name} scanner...")
             scanner = get_scanner(scanner_name, vault, settings[scanner_name])
+            start_time = time.monotonic()
             sanitized_prompt, is_valid, risk_score = scanner.scan(sanitized_prompt)
+            end_time = time.monotonic()
+            results.append(
+                {
+                    "scanner": scanner_name,
+                    "is_valid": is_valid,
+                    "risk_score": risk_score,
+                    "took_sec": round(timedelta(seconds=end_time - start_time).total_seconds(), 2),
+                }
+            )
             if fail_fast and not is_valid:
                 break
         status.update(label="Scanning complete", state="complete", expanded=False)
+    return sanitized_prompt, results

requirements.txt CHANGED Viewed

@@ -1,6 +1,5 @@
-https://huggingface.co/beki/en_spacy_pii_distilbert/resolve/main/en_spacy_pii_distilbert-any-py3-none-any.whl
-llm-guard==0.3.0
-pandas==2.1.0
-streamlit==1.27.2
 streamlit-tags==1.2.8
-https://huggingface.co/beki/en_spacy_pii_fast/resolve/main/en_spacy_pii_fast-any-py3-none-any.whl

+llm-guard==0.3.1
+llm-guard[onnxruntime]==0.3.1
+pandas==2.1.2
+streamlit==1.28.1
 streamlit-tags==1.2.8