Spaces:

protectai
/

llm-guard-playground

Running

App Files Files Community

asofter commited on Feb 2, 2024

Commit

5bfd036

1 Parent(s): 3373091

* upgrade libraries

Browse files

* ensure all new features of llm guard are supported

Files changed (5) hide show

.gitignore +1 -0
app.py +2 -29
output.py +121 -37
prompt.py +82 -33
requirements.txt +4 -4

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	venv


1	venv
2	+ .idea

app.py CHANGED Viewed

@@ -5,31 +5,12 @@ import traceback
 import pandas as pd
 import streamlit as st
 from llm_guard.vault import Vault
-from streamlit.components.v1 import html
 from output import init_settings as init_output_settings
 from output import scan as scan_output
 from prompt import init_settings as init_prompt_settings
 from prompt import scan as scan_prompt
-def add_google_analytics(ga4_id):
-    """
-    Add Google Analytics 4 to a Streamlit app
-    """
-    ga_code = f"""
-    <script async src="https://www.googletagmanager.com/gtag/js?id={ga4_id}"></script>
-    <script>
-      window.dataLayer = window.dataLayer || [];
-      function gtag(){{dataLayer.push(arguments);}}
-      gtag('js', new Date());
-      gtag('config', '{ga4_id}');
-    </script>
-    """
-    html(ga_code)
 PROMPT = "prompt"
 OUTPUT = "output"
 vault = Vault()
@@ -39,7 +20,7 @@ st.set_page_config(
     layout="wide",
     initial_sidebar_state="expanded",
     menu_items={
-        "About": "https://laiyer-ai.github.io/llm-guard/",
     },
 )
@@ -66,21 +47,13 @@ if scanner_type == PROMPT:
 elif scanner_type == OUTPUT:
     enabled_scanners, settings = init_output_settings()
-add_google_analytics("G-0HBVNHEZBW")
 # Main pannel
 st.subheader("Guard Prompt" if scanner_type == PROMPT else "Guard Output")
 with st.expander("About", expanded=False):
     st.info(
         """LLM-Guard is a comprehensive tool designed to fortify the security of Large Language Models (LLMs).
         \n\n[Code](https://github.com/laiyer-ai/llm-guard) |
-        [Documentation](https://laiyer-ai.github.io/llm-guard/)"""
-    )
-    st.markdown(
-        "[![Pypi Downloads](https://img.shields.io/pypi/dm/llm-guard.svg)](https://img.shields.io/pypi/dm/llm-guard.svg)"  # noqa
-        "[![MIT license](https://img.shields.io/badge/license-MIT-brightgreen.svg)](https://opensource.org/licenses/MIT)"
-        "![GitHub Repo stars](https://img.shields.io/github/stars/laiyer-ai/llm-guard?style=social)"
     )
 analyzer_load_state = st.info("Starting LLM Guard...")

 import pandas as pd
 import streamlit as st
 from llm_guard.vault import Vault
 from output import init_settings as init_output_settings
 from output import scan as scan_output
 from prompt import init_settings as init_prompt_settings
 from prompt import scan as scan_prompt
 PROMPT = "prompt"
 OUTPUT = "output"
 vault = Vault()
     layout="wide",
     initial_sidebar_state="expanded",
     menu_items={
+        "About": "https://llm-guard.com/",
     },
 )
 elif scanner_type == OUTPUT:
     enabled_scanners, settings = init_output_settings()
 # Main pannel
 st.subheader("Guard Prompt" if scanner_type == PROMPT else "Guard Output")
 with st.expander("About", expanded=False):
     st.info(
         """LLM-Guard is a comprehensive tool designed to fortify the security of Large Language Models (LLMs).
         \n\n[Code](https://github.com/laiyer-ai/llm-guard) |
+        [Documentation](https://llm-guard.com/)"""
     )
 analyzer_load_state = st.info("Starting LLM Guard...")

output.py CHANGED Viewed

@@ -6,8 +6,11 @@ from typing import Dict, List
 import streamlit as st
 from llm_guard.input_scanners.anonymize import default_entity_types
 from llm_guard.output_scanners import get_scanner_by_name
 from llm_guard.output_scanners.deanonymize import MatchingStrategy as DeanonymizeMatchingStrategy
 from llm_guard.output_scanners.relevance import all_models as relevance_models
 from llm_guard.vault import Vault
 from streamlit_tags import st_tags
@@ -16,6 +19,7 @@ logger = logging.getLogger("llm-guard-playground")
 def init_settings() -> (List, Dict):
     all_scanners = [
         "BanSubstrings",
         "BanTopics",
         "Bias",
@@ -26,12 +30,14 @@ def init_settings() -> (List, Dict):
         "LanguageSame",
         "MaliciousURLs",
         "NoRefusal",
         "FactualConsistency",
         "Regex",
         "Relevance",
         "Sensitive",
         "Sentiment",
         "Toxicity",
     ]
     st_enabled_scanners = st.sidebar.multiselect(
@@ -43,6 +49,36 @@ def init_settings() -> (List, Dict):
     settings = {}
     if "BanSubstrings" in st_enabled_scanners:
         st_bs_expander = st.sidebar.expander(
             "Ban Substrings",
@@ -56,10 +92,14 @@ def init_settings() -> (List, Dict):
                 height=200,
             ).split("\n")
-            st_bs_match_type = st.selectbox("Match type", ["str", "word"])
-            st_bs_case_sensitive = st.checkbox("Case sensitive", value=False)
-            st_bs_redact = st.checkbox("Redact", value=False)
-            st_bs_contains_all = st.checkbox("Contains all", value=False)
         settings["BanSubstrings"] = {
             "substrings": st_bs_substrings,
@@ -112,7 +152,14 @@ def init_settings() -> (List, Dict):
                 key="bias_threshold",
             )
-        settings["Bias"] = {"threshold": st_bias_threshold}
     if "Code" in st_enabled_scanners:
         st_cd_expander = st.sidebar.expander(
@@ -127,16 +174,12 @@ def init_settings() -> (List, Dict):
                 default=["python"],
             )
-            st_cd_mode = st.selectbox("Mode", ["allowed", "denied"], index=0)
-        allowed_languages = None
-        denied_languages = None
-        if st_cd_mode == "allowed":
-            allowed_languages = st_cd_languages
-        elif st_cd_mode == "denied":
-            denied_languages = st_cd_languages
-        settings["Code"] = {"allowed": allowed_languages, "denied": denied_languages}
     if "Deanonymize" in st_enabled_scanners:
         st_de_expander = st.sidebar.expander(
@@ -170,7 +213,9 @@ def init_settings() -> (List, Dict):
                 help="The minimum number of JSON elements that should be present",
             )
-            st_json_repair = st.checkbox("Repair", value=False, help="Attempt to repair the JSON")
         settings["JSON"] = {
             "required_elements": st_json_required_elements,
@@ -211,8 +256,13 @@ def init_settings() -> (List, Dict):
                 default=["en"],
             )
         settings["Language"] = {
             "valid_languages": st_lan_valid_language,
         }
     if "MaliciousURLs" in st_enabled_scanners:
@@ -251,6 +301,31 @@ def init_settings() -> (List, Dict):
         settings["NoRefusal"] = {"threshold": st_no_ref_threshold}
     if "FactualConsistency" in st_enabled_scanners:
         st_fc_expander = st.sidebar.expander(
             "FactualConsistency",
@@ -282,28 +357,19 @@ def init_settings() -> (List, Dict):
                 height=200,
             ).split("\n")
-            st_regex_type = st.selectbox(
-                "Match type",
-                ["good", "bad"],
-                index=1,
-                help="good: allow only good patterns, bad: ban bad patterns",
-            )
-            st_redact = st.checkbox(
-                "Redact", value=False, help="Replace the matched bad patterns with [REDACTED]"
             )
-        good_patterns = None
-        bad_patterns = None
-        if st_regex_type == "good":
-            good_patterns = st_regex_patterns
-        elif st_regex_type == "bad":
-            bad_patterns = st_regex_patterns
         settings["Regex"] = {
-            "good_patterns": good_patterns,
-            "bad_patterns": bad_patterns,
-            "redact": st_redact,
         }
     if "Relevance" in st_enabled_scanners:
@@ -389,15 +455,33 @@ def init_settings() -> (List, Dict):
         with st_tox_expander:
             st_tox_threshold = st.slider(
                 label="Threshold",
-                value=0.0,
-                min_value=-1.0,
                 max_value=1.0,
                 step=0.05,
                 key="toxicity_threshold",
-                help="A negative value (closer to 0 as the label output) indicates toxicity in the text, while a positive logit (closer to 1 as the label output) suggests non-toxicity.",
             )
-        settings["Toxicity"] = {"threshold": st_tox_threshold}
     return st_enabled_scanners, settings

 import streamlit as st
 from llm_guard.input_scanners.anonymize import default_entity_types
 from llm_guard.output_scanners import get_scanner_by_name
+from llm_guard.output_scanners.bias import MatchType as BiasMatchType
 from llm_guard.output_scanners.deanonymize import MatchingStrategy as DeanonymizeMatchingStrategy
+from llm_guard.output_scanners.language import MatchType as LanguageMatchType
 from llm_guard.output_scanners.relevance import all_models as relevance_models
+from llm_guard.output_scanners.toxicity import MatchType as ToxicityMatchType
 from llm_guard.vault import Vault
 from streamlit_tags import st_tags
 def init_settings() -> (List, Dict):
     all_scanners = [
+        "BanCompetitors",
         "BanSubstrings",
         "BanTopics",
         "Bias",
         "LanguageSame",
         "MaliciousURLs",
         "NoRefusal",
+        "ReadingTime",
         "FactualConsistency",
         "Regex",
         "Relevance",
         "Sensitive",
         "Sentiment",
         "Toxicity",
+        "URLReachability",
     ]
     st_enabled_scanners = st.sidebar.multiselect(
     settings = {}
+    if "BanCompetitors" in st_enabled_scanners:
+        st_bc_expander = st.sidebar.expander(
+            "Ban Competitors",
+            expanded=False,
+        )
+        with st_bc_expander:
+            st_bc_competitors = st_tags(
+                label="List of competitors",
+                text="Type and press enter",
+                value=["openai", "anthropic", "deepmind", "google"],
+                suggestions=[],
+                maxtags=30,
+                key="bc_competitors",
+            )
+            st_bc_threshold = st.slider(
+                label="Threshold",
+                value=0.5,
+                min_value=0.0,
+                max_value=1.0,
+                step=0.05,
+                key="ban_competitors_threshold",
+            )
+        settings["BanCompetitors"] = {
+            "competitors": st_bc_competitors,
+            "threshold": st_bc_threshold,
+        }
     if "BanSubstrings" in st_enabled_scanners:
         st_bs_expander = st.sidebar.expander(
             "Ban Substrings",
                 height=200,
             ).split("\n")
+            st_bs_match_type = st.selectbox(
+                "Match type", ["str", "word"], index=0, key="bs_match_type"
+            )
+            st_bs_case_sensitive = st.checkbox(
+                "Case sensitive", value=False, key="bs_case_sensitive"
+            )
+            st_bs_redact = st.checkbox("Redact", value=False, key="bs_redact")
+            st_bs_contains_all = st.checkbox("Contains all", value=False, key="bs_contains_all")
         settings["BanSubstrings"] = {
             "substrings": st_bs_substrings,
                 key="bias_threshold",
             )
+            st_bias_match_type = st.selectbox(
+                "Match type", [e.value for e in BiasMatchType], index=1, key="bias_match_type"
+            )
+        settings["Bias"] = {
+            "threshold": st_bias_threshold,
+            "match_type": BiasMatchType(st_bias_match_type),
+        }
     if "Code" in st_enabled_scanners:
         st_cd_expander = st.sidebar.expander(
                 default=["python"],
             )
+            st_cd_is_blocked = st.checkbox("Is blocked", value=False, key="cd_is_blocked")
+        settings["Code"] = {
+            "languages": st_cd_languages,
+            "is_blocked": st_cd_is_blocked,
+        }
     if "Deanonymize" in st_enabled_scanners:
         st_de_expander = st.sidebar.expander(
                 help="The minimum number of JSON elements that should be present",
             )
+            st_json_repair = st.checkbox(
+                "Repair", value=False, help="Attempt to repair the JSON", key="json_repair"
+            )
         settings["JSON"] = {
             "required_elements": st_json_required_elements,
                 default=["en"],
             )
+            st_lan_match_type = st.selectbox(
+                "Match type", [e.value for e in LanguageMatchType], index=1, key="lan_match_type"
+            )
         settings["Language"] = {
             "valid_languages": st_lan_valid_language,
+            "match_type": LanguageMatchType(st_lan_match_type),
         }
     if "MaliciousURLs" in st_enabled_scanners:
         settings["NoRefusal"] = {"threshold": st_no_ref_threshold}
+    if "ReadingTime" in st_enabled_scanners:
+        st_rt_expander = st.sidebar.expander(
+            "Reading Time",
+            expanded=False,
+        )
+        with st_rt_expander:
+            st_rt_max_reading_time = st.slider(
+                label="Max reading time (in minutes)",
+                value=5,
+                min_value=0,
+                max_value=3600,
+                step=5,
+                key="rt_max_reading_time",
+            )
+            st_rt_truncate = st.checkbox(
+                "Truncate",
+                value=False,
+                help="Truncate the text to the max reading time",
+                key="rt_truncate",
+            )
+        settings["ReadingTime"] = {"max_time": st_rt_max_reading_time, "truncate": st_rt_truncate}
     if "FactualConsistency" in st_enabled_scanners:
         st_fc_expander = st.sidebar.expander(
             "FactualConsistency",
                 height=200,
             ).split("\n")
+            st_regex_is_blocked = st.checkbox("Is blocked", value=False, key="regex_is_blocked")
+            st_regex_redact = st.checkbox(
+                "Redact",
+                value=False,
+                help="Replace the matched bad patterns with [REDACTED]",
+                key="regex_redact",
             )
         settings["Regex"] = {
+            "patterns": st_regex_patterns,
+            "is_blocked": st_regex_is_blocked,
+            "redact": st_regex_redact,
         }
     if "Relevance" in st_enabled_scanners:
         with st_tox_expander:
             st_tox_threshold = st.slider(
                 label="Threshold",
+                value=0.5,
+                min_value=0.0,
                 max_value=1.0,
                 step=0.05,
                 key="toxicity_threshold",
             )
+            st_tox_match_type = st.selectbox(
+                "Match type",
+                [e.value for e in ToxicityMatchType],
+                index=1,
+                key="toxicity_match_type",
+            )
+        settings["Toxicity"] = {
+            "threshold": st_tox_threshold,
+            "match_type": ToxicityMatchType(st_tox_match_type),
+        }
+    if "URLReachability" in st_enabled_scanners:
+        st_url_expander = st.sidebar.expander(
+            "URL Reachability",
+            expanded=False,
+        )
+        if st_url_expander:
+            settings["URLReachability"] = {}
     return st_enabled_scanners, settings

prompt.py CHANGED Viewed

@@ -6,6 +6,9 @@ from typing import Dict, List
 import streamlit as st
 from llm_guard.input_scanners import get_scanner_by_name
 from llm_guard.input_scanners.anonymize import default_entity_types
 from llm_guard.vault import Vault
 from streamlit_tags import st_tags
@@ -15,6 +18,7 @@ logger = logging.getLogger("llm-guard-playground")
 def init_settings() -> (List, Dict):
     all_scanners = [
         "Anonymize",
         "BanSubstrings",
         "BanTopics",
         "Code",
@@ -77,7 +81,10 @@ def init_settings() -> (List, Dict):
                 "Preamble", value="Text to prepend to sanitized prompt: "
             )
             st_anon_use_faker = st.checkbox(
-                "Use Faker", value=False, help="Use Faker library to generate fake data"
             )
             st_anon_threshold = st.slider(
                 label="Threshold",
@@ -97,6 +104,36 @@ def init_settings() -> (List, Dict):
             "threshold": st_anon_threshold,
         }
     if "BanSubstrings" in st_enabled_scanners:
         st_bs_expander = st.sidebar.expander(
             "Ban Substrings",
@@ -110,10 +147,14 @@ def init_settings() -> (List, Dict):
                 height=200,
             ).split("\n")
-            st_bs_match_type = st.selectbox("Match type", ["str", "word"])
-            st_bs_case_sensitive = st.checkbox("Case sensitive", value=False)
-            st_bs_redact = st.checkbox("Redact", value=False)
-            st_bs_contains_all = st.checkbox("Contains all", value=False)
         settings["BanSubstrings"] = {
             "substrings": st_bs_substrings,
@@ -166,18 +207,11 @@ def init_settings() -> (List, Dict):
                 default=["python"],
             )
-            st_cd_mode = st.selectbox("Mode", ["allowed", "denied"], index=0)
-        allowed_languages = None
-        denied_languages = None
-        if st_cd_mode == "allowed":
-            allowed_languages = st_cd_languages
-        elif st_cd_mode == "denied":
-            denied_languages = st_cd_languages
         settings["Code"] = {
-            "allowed": allowed_languages,
-            "denied": denied_languages,
         }
     if "Language" in st_enabled_scanners:
@@ -214,8 +248,16 @@ def init_settings() -> (List, Dict):
                 default=["en"],
             )
         settings["Language"] = {
             "valid_languages": st_lan_valid_language,
         }
     if "PromptInjection" in st_enabled_scanners:
@@ -234,8 +276,16 @@ def init_settings() -> (List, Dict):
                 key="prompt_injection_threshold",
             )
         settings["PromptInjection"] = {
             "threshold": st_pi_threshold,
         }
     if "Regex" in st_enabled_scanners:
@@ -251,28 +301,19 @@ def init_settings() -> (List, Dict):
                 height=200,
             ).split("\n")
-            st_regex_type = st.selectbox(
-                "Match type",
-                ["good", "bad"],
-                index=1,
-                help="good: allow only good patterns, bad: ban bad patterns",
-            )
-            st_redact = st.checkbox(
-                "Redact", value=False, help="Replace the matched bad patterns with [REDACTED]"
             )
-        good_patterns = None
-        bad_patterns = None
-        if st_regex_type == "good":
-            good_patterns = st_regex_patterns
-        elif st_regex_type == "bad":
-            bad_patterns = st_regex_patterns
         settings["Regex"] = {
-            "good_patterns": good_patterns,
-            "bad_patterns": bad_patterns,
-            "redact": st_redact,
         }
     if "Secrets" in st_enabled_scanners:
@@ -347,8 +388,16 @@ def init_settings() -> (List, Dict):
                 key="toxicity_threshold",
             )
         settings["Toxicity"] = {
             "threshold": st_tox_threshold,
         }
     return st_enabled_scanners, settings

 import streamlit as st
 from llm_guard.input_scanners import get_scanner_by_name
 from llm_guard.input_scanners.anonymize import default_entity_types
+from llm_guard.input_scanners.language import MatchType as LanguageMatchType
+from llm_guard.input_scanners.prompt_injection import MatchType as PromptInjectionMatchType
+from llm_guard.input_scanners.toxicity import MatchType as ToxicityMatchType
 from llm_guard.vault import Vault
 from streamlit_tags import st_tags
 def init_settings() -> (List, Dict):
     all_scanners = [
         "Anonymize",
+        "BanCompetitors",
         "BanSubstrings",
         "BanTopics",
         "Code",
                 "Preamble", value="Text to prepend to sanitized prompt: "
             )
             st_anon_use_faker = st.checkbox(
+                "Use Faker",
+                value=False,
+                help="Use Faker library to generate fake data",
+                key="anon_use_faker",
             )
             st_anon_threshold = st.slider(
                 label="Threshold",
             "threshold": st_anon_threshold,
         }
+    if "BanCompetitors" in st_enabled_scanners:
+        st_bc_expander = st.sidebar.expander(
+            "Ban Competitors",
+            expanded=False,
+        )
+        with st_bc_expander:
+            st_bc_competitors = st_tags(
+                label="List of competitors",
+                text="Type and press enter",
+                value=["openai", "anthropic", "deepmind", "google"],
+                suggestions=[],
+                maxtags=30,
+                key="bc_competitors",
+            )
+            st_bc_threshold = st.slider(
+                label="Threshold",
+                value=0.5,
+                min_value=0.0,
+                max_value=1.0,
+                step=0.05,
+                key="ban_competitors_threshold",
+            )
+        settings["BanCompetitors"] = {
+            "competitors": st_bc_competitors,
+            "threshold": st_bc_threshold,
+        }
     if "BanSubstrings" in st_enabled_scanners:
         st_bs_expander = st.sidebar.expander(
             "Ban Substrings",
                 height=200,
             ).split("\n")
+            st_bs_match_type = st.selectbox(
+                "Match type", ["str", "word"], index=0, key="bs_match_type"
+            )
+            st_bs_case_sensitive = st.checkbox(
+                "Case sensitive", value=False, key="bs_case_sensitive"
+            )
+            st_bs_redact = st.checkbox("Redact", value=False, key="bs_redact")
+            st_bs_contains_all = st.checkbox("Contains all", value=False, key="bs_contains_all")
         settings["BanSubstrings"] = {
             "substrings": st_bs_substrings,
                 default=["python"],
             )
+            st_cd_is_blocked = st.checkbox("Is blocked", value=False, key="code_is_blocked")
         settings["Code"] = {
+            "languages": st_cd_languages,
+            "is_blocked": st_cd_is_blocked,
         }
     if "Language" in st_enabled_scanners:
                 default=["en"],
             )
+            st_lan_match_type = st.selectbox(
+                "Match type",
+                [e.value for e in LanguageMatchType],
+                index=1,
+                key="language_match_type",
+            )
         settings["Language"] = {
             "valid_languages": st_lan_valid_language,
+            "match_type": st_lan_match_type,
         }
     if "PromptInjection" in st_enabled_scanners:
                 key="prompt_injection_threshold",
             )
+            st_pi_match_type = st.selectbox(
+                "Match type",
+                [e.value for e in PromptInjectionMatchType],
+                index=1,
+                key="prompt_injection_match_type",
+            )
         settings["PromptInjection"] = {
             "threshold": st_pi_threshold,
+            "match_type": st_pi_match_type,
         }
     if "Regex" in st_enabled_scanners:
                 height=200,
             ).split("\n")
+            st_regex_is_blocked = st.checkbox("Is blocked", value=False, key="regex_is_blocked")
+            st_regex_redact = st.checkbox(
+                "Redact",
+                value=False,
+                help="Replace the matched bad patterns with [REDACTED]",
+                key="regex_redact",
             )
         settings["Regex"] = {
+            "patterns": st_regex_patterns,
+            "is_blocked": st_regex_is_blocked,
+            "redact": st_regex_redact,
         }
     if "Secrets" in st_enabled_scanners:
                 key="toxicity_threshold",
             )
+            st_tox_match_type = st.selectbox(
+                "Match type",
+                [e.value for e in ToxicityMatchType],
+                index=1,
+                key="toxicity_match_type",
+            )
         settings["Toxicity"] = {
             "threshold": st_tox_threshold,
+            "match_type": st_tox_match_type,
         }
     return st_enabled_scanners, settings

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
-llm-guard==0.3.4
-llm-guard[onnxruntime]==0.3.4
-pandas==2.1.4
-streamlit==1.29.0
 streamlit-tags==1.2.8

+llm-guard==0.3.7
+llm-guard[onnxruntime]==0.3.7
+pandas==2.2.0
+streamlit==1.30.0
 streamlit-tags==1.2.8