Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	safety_checker
Browse files- app.py +34 -2
- safety_checker.py +137 -0
    	
        app.py
    CHANGED
    
    | @@ -4,7 +4,10 @@ from diffusers import StableDiffusionXLPipeline, EulerDiscreteScheduler | |
| 4 | 
             
            from huggingface_hub import hf_hub_download
         | 
| 5 | 
             
            from safetensors.torch import load_file
         | 
| 6 | 
             
            import spaces
         | 
|  | |
|  | |
| 7 |  | 
|  | |
| 8 |  | 
| 9 | 
             
            # Constants
         | 
| 10 | 
             
            base = "stabilityai/stable-diffusion-xl-base-1.0"
         | 
| @@ -21,6 +24,27 @@ checkpoints = { | |
| 21 | 
             
            if torch.cuda.is_available():
         | 
| 22 | 
             
                pipe = StableDiffusionXLPipeline.from_pretrained(base, torch_dtype=torch.float16, variant="fp16").to("cuda")
         | 
| 23 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 24 |  | 
| 25 | 
             
            # Function 
         | 
| 26 | 
             
            @spaces.GPU(enable_queue=True)
         | 
| @@ -37,8 +61,16 @@ def generate_image(prompt, ckpt): | |
| 37 | 
             
                    pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
         | 
| 38 |  | 
| 39 | 
             
                pipe.unet.load_state_dict(load_file(hf_hub_download(repo, checkpoint), device="cuda"))
         | 
| 40 | 
            -
                 | 
| 41 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 42 |  | 
| 43 |  | 
| 44 | 
             
            # Gradio Interface
         | 
|  | |
| 4 | 
             
            from huggingface_hub import hf_hub_download
         | 
| 5 | 
             
            from safetensors.torch import load_file
         | 
| 6 | 
             
            import spaces
         | 
| 7 | 
            +
            import os
         | 
| 8 | 
            +
            from PIL import Image
         | 
| 9 |  | 
| 10 | 
            +
            SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", "0") == "1"
         | 
| 11 |  | 
| 12 | 
             
            # Constants
         | 
| 13 | 
             
            base = "stabilityai/stable-diffusion-xl-base-1.0"
         | 
|  | |
| 24 | 
             
            if torch.cuda.is_available():
         | 
| 25 | 
             
                pipe = StableDiffusionXLPipeline.from_pretrained(base, torch_dtype=torch.float16, variant="fp16").to("cuda")
         | 
| 26 |  | 
| 27 | 
            +
            if SAFETY_CHECKER:
         | 
| 28 | 
            +
                from safety_checker import StableDiffusionSafetyChecker
         | 
| 29 | 
            +
                from transformers import CLIPFeatureExtractor
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                safety_checker = StableDiffusionSafetyChecker.from_pretrained(
         | 
| 32 | 
            +
                    "CompVis/stable-diffusion-safety-checker"
         | 
| 33 | 
            +
                ).to("cuda")
         | 
| 34 | 
            +
                feature_extractor = CLIPFeatureExtractor.from_pretrained(
         | 
| 35 | 
            +
                    "openai/clip-vit-base-patch32"
         | 
| 36 | 
            +
                )
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                def check_nsfw_images(
         | 
| 39 | 
            +
                    images: list[Image.Image],
         | 
| 40 | 
            +
                ) -> tuple[list[Image.Image], list[bool]]:
         | 
| 41 | 
            +
                    safety_checker_input = feature_extractor(images, return_tensors="pt").to("cuda")
         | 
| 42 | 
            +
                    has_nsfw_concepts = safety_checker(
         | 
| 43 | 
            +
                        images=[images],
         | 
| 44 | 
            +
                        clip_input=safety_checker_input.pixel_values.to("cuda")
         | 
| 45 | 
            +
                    )
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                    return images, has_nsfw_concepts
         | 
| 48 |  | 
| 49 | 
             
            # Function 
         | 
| 50 | 
             
            @spaces.GPU(enable_queue=True)
         | 
|  | |
| 61 | 
             
                    pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
         | 
| 62 |  | 
| 63 | 
             
                pipe.unet.load_state_dict(load_file(hf_hub_download(repo, checkpoint), device="cuda"))
         | 
| 64 | 
            +
                results = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=0)
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                if SAFETY_CHECKER:
         | 
| 67 | 
            +
                    images, has_nsfw_concepts = check_nsfw_images(results.images)
         | 
| 68 | 
            +
                    if any(has_nsfw_concepts):
         | 
| 69 | 
            +
                        gr.Warning("NSFW content detected.")
         | 
| 70 | 
            +
                        return Image.new("RGB", (512, 512))
         | 
| 71 | 
            +
                    return images[0]
         | 
| 72 | 
            +
                return results.images[0]
         | 
| 73 | 
            +
             | 
| 74 |  | 
| 75 |  | 
| 76 | 
             
            # Gradio Interface
         | 
    	
        safety_checker.py
    ADDED
    
    | @@ -0,0 +1,137 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Copyright 2023 The HuggingFace Team. All rights reserved.
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 4 | 
            +
            # you may not use this file except in compliance with the License.
         | 
| 5 | 
            +
            # You may obtain a copy of the License at
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            #     http://www.apache.org/licenses/LICENSE-2.0
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # Unless required by applicable law or agreed to in writing, software
         | 
| 10 | 
            +
            # distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 11 | 
            +
            # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 12 | 
            +
            # See the License for the specific language governing permissions and
         | 
| 13 | 
            +
            # limitations under the License.
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            import numpy as np
         | 
| 16 | 
            +
            import torch
         | 
| 17 | 
            +
            import torch.nn as nn
         | 
| 18 | 
            +
            from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel
         | 
| 19 | 
            +
             | 
| 20 | 
            +
             | 
| 21 | 
            +
            def cosine_distance(image_embeds, text_embeds):
         | 
| 22 | 
            +
                normalized_image_embeds = nn.functional.normalize(image_embeds)
         | 
| 23 | 
            +
                normalized_text_embeds = nn.functional.normalize(text_embeds)
         | 
| 24 | 
            +
                return torch.mm(normalized_image_embeds, normalized_text_embeds.t())
         | 
| 25 | 
            +
             | 
| 26 | 
            +
             | 
| 27 | 
            +
            class StableDiffusionSafetyChecker(PreTrainedModel):
         | 
| 28 | 
            +
                config_class = CLIPConfig
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                _no_split_modules = ["CLIPEncoderLayer"]
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                def __init__(self, config: CLIPConfig):
         | 
| 33 | 
            +
                    super().__init__(config)
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                    self.vision_model = CLIPVisionModel(config.vision_config)
         | 
| 36 | 
            +
                    self.visual_projection = nn.Linear(
         | 
| 37 | 
            +
                        config.vision_config.hidden_size, config.projection_dim, bias=False
         | 
| 38 | 
            +
                    )
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                    self.concept_embeds = nn.Parameter(
         | 
| 41 | 
            +
                        torch.ones(17, config.projection_dim), requires_grad=False
         | 
| 42 | 
            +
                    )
         | 
| 43 | 
            +
                    self.special_care_embeds = nn.Parameter(
         | 
| 44 | 
            +
                        torch.ones(3, config.projection_dim), requires_grad=False
         | 
| 45 | 
            +
                    )
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                    self.concept_embeds_weights = nn.Parameter(torch.ones(17), requires_grad=False)
         | 
| 48 | 
            +
                    self.special_care_embeds_weights = nn.Parameter(
         | 
| 49 | 
            +
                        torch.ones(3), requires_grad=False
         | 
| 50 | 
            +
                    )
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                @torch.no_grad()
         | 
| 53 | 
            +
                def forward(self, clip_input, images):
         | 
| 54 | 
            +
                    pooled_output = self.vision_model(clip_input)[1]  # pooled_output
         | 
| 55 | 
            +
                    image_embeds = self.visual_projection(pooled_output)
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                    # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
         | 
| 58 | 
            +
                    special_cos_dist = (
         | 
| 59 | 
            +
                        cosine_distance(image_embeds, self.special_care_embeds)
         | 
| 60 | 
            +
                        .cpu()
         | 
| 61 | 
            +
                        .float()
         | 
| 62 | 
            +
                        .numpy()
         | 
| 63 | 
            +
                    )
         | 
| 64 | 
            +
                    cos_dist = (
         | 
| 65 | 
            +
                        cosine_distance(image_embeds, self.concept_embeds).cpu().float().numpy()
         | 
| 66 | 
            +
                    )
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                    result = []
         | 
| 69 | 
            +
                    batch_size = image_embeds.shape[0]
         | 
| 70 | 
            +
                    for i in range(batch_size):
         | 
| 71 | 
            +
                        result_img = {
         | 
| 72 | 
            +
                            "special_scores": {},
         | 
| 73 | 
            +
                            "special_care": [],
         | 
| 74 | 
            +
                            "concept_scores": {},
         | 
| 75 | 
            +
                            "bad_concepts": [],
         | 
| 76 | 
            +
                        }
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                        # increase this value to create a stronger `nfsw` filter
         | 
| 79 | 
            +
                        # at the cost of increasing the possibility of filtering benign images
         | 
| 80 | 
            +
                        adjustment = 0.0
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                        for concept_idx in range(len(special_cos_dist[0])):
         | 
| 83 | 
            +
                            concept_cos = special_cos_dist[i][concept_idx]
         | 
| 84 | 
            +
                            concept_threshold = self.special_care_embeds_weights[concept_idx].item()
         | 
| 85 | 
            +
                            result_img["special_scores"][concept_idx] = round(
         | 
| 86 | 
            +
                                concept_cos - concept_threshold + adjustment, 3
         | 
| 87 | 
            +
                            )
         | 
| 88 | 
            +
                            if result_img["special_scores"][concept_idx] > 0:
         | 
| 89 | 
            +
                                result_img["special_care"].append(
         | 
| 90 | 
            +
                                    {concept_idx, result_img["special_scores"][concept_idx]}
         | 
| 91 | 
            +
                                )
         | 
| 92 | 
            +
                                adjustment = 0.01
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                        for concept_idx in range(len(cos_dist[0])):
         | 
| 95 | 
            +
                            concept_cos = cos_dist[i][concept_idx]
         | 
| 96 | 
            +
                            concept_threshold = self.concept_embeds_weights[concept_idx].item()
         | 
| 97 | 
            +
                            result_img["concept_scores"][concept_idx] = round(
         | 
| 98 | 
            +
                                concept_cos - concept_threshold + adjustment, 3
         | 
| 99 | 
            +
                            )
         | 
| 100 | 
            +
                            if result_img["concept_scores"][concept_idx] > 0:
         | 
| 101 | 
            +
                                result_img["bad_concepts"].append(concept_idx)
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                        result.append(result_img)
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                    has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result]
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                    return has_nsfw_concepts
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                @torch.no_grad()
         | 
| 110 | 
            +
                def forward_onnx(self, clip_input: torch.FloatTensor, images: torch.FloatTensor):
         | 
| 111 | 
            +
                    pooled_output = self.vision_model(clip_input)[1]  # pooled_output
         | 
| 112 | 
            +
                    image_embeds = self.visual_projection(pooled_output)
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                    special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds)
         | 
| 115 | 
            +
                    cos_dist = cosine_distance(image_embeds, self.concept_embeds)
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                    # increase this value to create a stronger `nsfw` filter
         | 
| 118 | 
            +
                    # at the cost of increasing the possibility of filtering benign images
         | 
| 119 | 
            +
                    adjustment = 0.0
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                    special_scores = (
         | 
| 122 | 
            +
                        special_cos_dist - self.special_care_embeds_weights + adjustment
         | 
| 123 | 
            +
                    )
         | 
| 124 | 
            +
                    # special_scores = special_scores.round(decimals=3)
         | 
| 125 | 
            +
                    special_care = torch.any(special_scores > 0, dim=1)
         | 
| 126 | 
            +
                    special_adjustment = special_care * 0.01
         | 
| 127 | 
            +
                    special_adjustment = special_adjustment.unsqueeze(1).expand(
         | 
| 128 | 
            +
                        -1, cos_dist.shape[1]
         | 
| 129 | 
            +
                    )
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                    concept_scores = (cos_dist - self.concept_embeds_weights) + special_adjustment
         | 
| 132 | 
            +
                    # concept_scores = concept_scores.round(decimals=3)
         | 
| 133 | 
            +
                    has_nsfw_concepts = torch.any(concept_scores > 0, dim=1)
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                    images[has_nsfw_concepts] = 0.0  # black image
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                    return images, has_nsfw_concepts
         | 

