wd-tagger-transformers

Running

File size: 8,166 Bytes

ae039af

from PIL import Image

import torch

from transformers import (
    AutoImageProcessor,
    AutoModelForImageClassification,
)

import gradio as gr
import spaces  # ZERO GPU

MODEL_NAMES = ["p1atdev/wd-swinv2-tagger-v3-hf"]
MODEL_NAME = MODEL_NAMES[0]

model = AutoModelForImageClassification.from_pretrained(
    MODEL_NAME,
)
model.to("cuda" if torch.cuda.is_available() else "cpu")
processor = AutoImageProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)

# ref: https://qiita.com/tregu148/items/fccccbbc47d966dd2fc2
def gradio_copy_text(_text: None):
    gr.Info("Copied!")

COPY_ACTION_JS = """\
(inputs, _outputs) => {
  // inputs is the string value of the input_text
  if (inputs.trim() !== "") {
    navigator.clipboard.writeText(inputs);
  }
}"""

def _people_tag(noun: str, minimum: int = 1, maximum: int = 5):
    return (
        [f"1{noun}"]
        + [f"{num}{noun}s" for num in range(minimum + 1, maximum + 1)]
        + [f"{maximum+1}+{noun}s"]
    )


PEOPLE_TAGS = (
    _people_tag("girl") + _people_tag("boy") + _people_tag("other") + ["no humans"]
)
RATING_MAP = {
    "general": "safe",
    "sensitive": "sensitive",
    "questionable": "nsfw",
    "explicit": "explicit, nsfw",
}
RATING_MAP_E621 = {
    "general": "rating_safe",
    "sensitive": "rating_safe",
    "questionable": "rating_questionable",
    "explicit": "rating_explicit",
}

DESCRIPTION_MD = """
# WD Tagger with 🤗 transformers
Currently supports the following model(s):
- [p1atdev/wd-swinv2-tagger-v3-hf](https://huggingface.co/p1atdev/wd-swinv2-tagger-v3-hf)

""".strip()


def character_list_to_series_list(character_list):
    def get_series_dict():
        import re

        with open('characterfull.txt', 'r') as f:
            lines = f.readlines()

        series_dict = {}
        for line in lines:
            parts = line.strip().split(', ')
            if len(parts) >= 3:
                name = parts[-2].replace("\\", "")
            if name.endswith(")"):
                names = name.split("(")
                character_name = "(".join(names[:-1])
                if character_name.endswith(" "):
                    name = character_name[:-1]
            series = re.sub(r'\\[()]', '', parts[-1])
            series_dict[name] = series

        return series_dict
    
    output_series_tag = []
    series_tag = ""
    series_dict = get_series_dict()
    for tag in character_list:
        series_tag = series_dict.get(tag, "")
        if tag.endswith(")"):
            tags = tag.split("(")
            character_tag = "(".join(tags[:-1])
            if character_tag.endswith(" "):
                character_tag = character_tag[:-1]
            series_tag = tags[-1].replace(")", "")

    if series_tag:
        output_series_tag.append(series_tag)

    return output_series_tag


def get_e621_dict():
    with open('danbooru_e621.csv', 'r', encoding="utf-8") as f:
        lines = f.readlines()

    e621_dict = {}
    for line in lines:
        parts = line.strip().split(',')
        e621_dict[parts[0]] = parts[1]

    return e621_dict


def danbooru_to_e621(dtag, e621_dict):
    def d_to_e(match, e621_dict):
        dtag = match.group(0)
        etag = e621_dict.get(dtag.strip().replace("_", " "), "")
        if etag:
            return etag
        else:
            return dtag
    
    import re
    tag = re.sub(r'[\w ]+', lambda wrapper: d_to_e(wrapper, e621_dict), dtag, 2)

    return tag

def postprocess_results(
    results: dict[str, float], general_threshold: float, character_threshold: float
):
    results = {
        k: v for k, v in sorted(results.items(), key=lambda item: item[1], reverse=True)
    }

    rating = {}
    character = {}
    general = {}

    for k, v in results.items():
        if k.startswith("rating:"):
            rating[k.replace("rating:", "")] = v
            continue
        elif k.startswith("character:"):
            character[k.replace("character:", "")] = v
            continue

        general[k] = v

    character = {k: v for k, v in character.items() if v >= character_threshold}
    general = {k: v for k, v in general.items() if v >= general_threshold}

    return rating, character, general


def animagine_prompt(rating: list[str], character: list[str], general: list[str], tag_type):
    people_tags: list[str] = []
    other_tags: list[str] = []
    if tag_type == "e621":
        rating_tag = RATING_MAP_E621[rating[0]]
    else:
        rating_tag = RATING_MAP[rating[0]]

    e621_dict = get_e621_dict()
    for tag in general:
        if tag_type == "e621":
            tag = danbooru_to_e621(tag, e621_dict)
        if tag in PEOPLE_TAGS:
            people_tags.append(tag)
        else:
            other_tags.append(tag)

    output_series_tag = character_list_to_series_list(character)

    all_tags = people_tags + character + output_series_tag + other_tags + [rating_tag]

    return ", ".join(all_tags)


@spaces.GPU(enable_queue=True)
def predict_tags(
    image: Image.Image, general_threshold: float = 0.3, character_threshold: float = 0.8, tag_type = "danbooru"
):
    inputs = processor.preprocess(image, return_tensors="pt")

    outputs = model(**inputs.to(model.device, model.dtype))
    logits = torch.sigmoid(outputs.logits[0])  # take the first logits

    # get probabilities
    results = {
        model.config.id2label[i]: float(logit.float()) for i, logit in enumerate(logits)
    }

    # rating, character, general
    rating, character, general = postprocess_results(
        results, general_threshold, character_threshold
    )

    prompt = animagine_prompt(
        list(rating.keys()), list(character.keys()), list(general.keys()), tag_type
    )
    
    return rating, character, general, prompt, gr.update(interactive=True,)


def demo():
    with gr.Blocks() as ui:
        gr.Markdown(DESCRIPTION_MD)

        with gr.Row():
            with gr.Column():
                input_image = gr.Image(label="Input image", type="pil")

                with gr.Group():
                    general_threshold = gr.Slider(
                        label="Threshold",
                        minimum=0.0,
                        maximum=1.0,
                        value=0.3,
                        step=0.01,
                        interactive=True,
                    )
                    character_threshold = gr.Slider(
                        label="Character threshold",
                        minimum=0.0,
                        maximum=1.0,
                        value=0.8,
                        step=0.01,
                        interactive=True,
                    )
                    tag_type = gr.Radio(
                        label="Output tag conversion",
                        info="danbooru for Animagine, e621 for Pony.",
                        choices=["danbooru", "e621"],
                        value="danbooru",
                    )
                
                _model_radio = gr.Dropdown(
                    choices=MODEL_NAMES,
                    label="Model",
                    value=MODEL_NAMES[0],
                    interactive=True,
                )

                start_btn = gr.Button(value="Start", variant="primary")

            with gr.Column():
                
                with gr.Group():
                    prompt_text = gr.TextArea(label="Prompt", interactive=False)
                    copy_btn = gr.Button(value="Copy to clipboard", interactive=False)

                rating_tags_label = gr.Label(label="Rating tags")
                character_tags_label = gr.Label(label="Character tags")
                general_tags_label = gr.Label(label="General tags")

        start_btn.click(
            predict_tags,
            inputs=[input_image, general_threshold, character_threshold, tag_type],
            outputs=[
                rating_tags_label,
                character_tags_label,
                general_tags_label,
                prompt_text,
                copy_btn,
            ],
        )
        copy_btn.click(gradio_copy_text, inputs=[prompt_text], js=COPY_ACTION_JS)

    return ui

if __name__ == "__main__":
    demo().queue().launch()