Spaces:

pixai-labs
/

pixai-tagger-demo

Running

App Files Files Community

trojblue commited on Sep 9

Commit

20d8f48

1 Parent(s): 132d7fb

update app.py

Browse files

Files changed (2) hide show

app.py +153 -46
handler.py +64 -13

app.py CHANGED Viewed

@@ -88,8 +88,15 @@ def run_inference(
     url: str,
     general_threshold: float,
     character_threshold: float,
 ):
-    if source_choice == "Upload image":
         if image is None:
             raise gr.Error("Please upload an image.")
         inputs = image
@@ -98,13 +105,15 @@ def run_inference(
             raise gr.Error("Please provide an image URL.")
         inputs = {"url": url.strip()}
-    data = {
-        "inputs": inputs,
-        "parameters": {
-            "general_threshold": float(general_threshold),
-            "character_threshold": float(character_threshold),
-        },
     }
     started = time.time()
     try:
@@ -113,20 +122,62 @@ def run_inference(
         raise gr.Error(f"Inference error: {e}") from e
     latency = round(time.time() - started, 4)
-    features = ", ".join(sorted(out.get("feature", []))) or "—"
-    characters = ", ".join(sorted(out.get("character", []))) or "—"
-    ips = ", ".join(out.get("ip", [])) or "—"
     meta = {
         "device": handler.device,
         "latency_s_total": latency,
         **out.get("_timings", {}),
     }
-    return features, characters, ips, meta, out
-with gr.Blocks(title="PixAI Tagger v0.9 — Demo", fill_height=True) as demo:
     gr.Markdown(
         """
         # PixAI Tagger v0.9 — Gradio Demo
@@ -140,19 +191,41 @@ with gr.Blocks(title="PixAI Tagger v0.9 — Demo", fill_height=True) as demo:
         """
     )
     with gr.Row():
-        gr.Markdown(f"**{DEVICE_LABEL}**")
-    with gr.Row():
-        source_choice = gr.Radio(
-            choices=["Upload image", "From URL"],
-            value="Upload image",
-            label="Image source",
         )
     with gr.Row(variant="panel"):
         with gr.Column(scale=2):
-            image = gr.Image(label="Upload image", type="pil", visible=True, height="500px")
-            url = gr.Textbox(label="Image URL", placeholder="https://…", visible=False)
             def toggle_inputs(choice):
                 return (
@@ -160,48 +233,82 @@ with gr.Blocks(title="PixAI Tagger v0.9 — Demo", fill_height=True) as demo:
                     gr.update(visible=(choice == "From URL")),
                 )
-            source_choice.change(toggle_inputs, [source_choice], [image, url])
-        with gr.Column(scale=1):
-            general_threshold = gr.Slider(
-                minimum=0.0, maximum=1.0, step=0.01, value=0.30, label="General threshold"
-            )
-            character_threshold = gr.Slider(
-                minimum=0.0, maximum=1.0, step=0.01, value=0.85, label="Character threshold"
-            )
-            run_btn = gr.Button("Run", variant="primary")
-            clear_btn = gr.Button("Clear")
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("### Predicted Tags")
-            features_out = gr.Textbox(label="General tags", lines=4)
-            characters_out = gr.Textbox(label="Character tags", lines=4)
-            ip_out = gr.Textbox(label="IP tags", lines=2)
         with gr.Column():
-            gr.Markdown("### Metadata & Raw Output")
-            meta_out = gr.JSON(label="Timings/Device")
-            raw_out = gr.JSON(label="Raw JSON")
     examples = gr.Examples(
         label="Examples (URL mode)",
         examples=[
-            ["From URL", None, "https://cdn.donmai.us/sample/50/b7/__komeiji_koishi_touhou_drawn_by_cui_ying__sample-50b7006f16e0144d5b5db44cadc2d22f.jpg", 0.30, 0.85],
         ],
-        inputs=[source_choice, image, url, general_threshold, character_threshold],
         cache_examples=False,
     )
     def clear():
-        return (None, "", 0.30, 0.85, "", "", "", {}, {})
-    run_btn.click(
         run_inference,
-        inputs=[source_choice, image, url, general_threshold, character_threshold],
-        outputs=[features_out, characters_out, ip_out, meta_out, raw_out],
-        api_name="predict",
     )
     clear_btn.click(
         clear,
         inputs=None,

     url: str,
     general_threshold: float,
     character_threshold: float,
+    mode_val: str,
+    topk_general_val: int,
+    topk_character_val: int,
+    include_scores_val: bool,
+    underscore_mode_val: bool,
 ):
+    # Determine which input to use based on which Run button invoked the function.
+    # We'll pass a string flag via source_choice: either "url" or "image".
+    if source_choice == "image":
         if image is None:
             raise gr.Error("Please upload an image.")
         inputs = image
             raise gr.Error("Please provide an image URL.")
         inputs = {"url": url.strip()}
+    params = {
+        "general_threshold": float(general_threshold),
+        "character_threshold": float(character_threshold),
+        "mode": mode_val,
+        "topk_general": int(topk_general_val),
+        "topk_character": int(topk_character_val),
+        "include_scores": bool(include_scores_val),
     }
+    data = {"inputs": inputs, "parameters": params}
     started = time.time()
     try:
         raise gr.Error(f"Inference error: {e}") from e
     latency = round(time.time() - started, 4)
+    # Individual outputs
+    if underscore_mode_val:
+        characters = " ".join(out.get("character", [])) or "—"
+        ips = " ".join(out.get("ip", [])) or "—"
+        features = " ".join(out.get("feature", [])) or "—"
+    elif include_scores_val:
+        gen_scores = out.get("feature_scores", {})
+        char_scores = out.get("character_scores", {})
+        characters = ", ".join(
+            f"{k.replace('_', ' ')} ({char_scores[k]:.2f})" for k in sorted(char_scores, key=char_scores.get, reverse=True)
+        ) or "—"
+        ips = ", ".join(tag.replace("_", " ") for tag in out.get("ip", [])) or "—"
+        features = ", ".join(
+            f"{k.replace('_', ' ')} ({gen_scores[k]:.2f})" for k in sorted(gen_scores, key=gen_scores.get, reverse=True)
+        ) or "—"
+    else:
+        characters = ", ".join(sorted(t.replace("_", " ") for t in out.get("character", []))) or "—"
+        ips = ", ".join(tag.replace("_", " ") for tag in out.get("ip", [])) or "—"
+        features = ", ".join(sorted(t.replace("_", " ") for t in out.get("feature", []))) or "—"
+    # Combined output: probability-descending if scores available; else character, IP, general
+    if underscore_mode_val:
+        combined = " ".join(out.get("character", []) + out.get("ip", []) + out.get("feature", [])) or "—"
+    else:
+        char_scores = out.get("character_scores") or {}
+        gen_scores = out.get("feature_scores") or {}
+        if include_scores_val and (char_scores or gen_scores):
+            # Build (tag, score) pairs
+            char_pairs = [(k, float(char_scores.get(k, 0.0))) for k in out.get("character", [])]
+            ip_pairs = [(k, 1.0) for k in out.get("ip", [])]  # IP has no score; treat equally
+            gen_pairs = [(k, float(gen_scores.get(k, 0.0))) for k in out.get("feature", [])]
+            all_pairs = char_pairs + ip_pairs + gen_pairs
+            all_pairs.sort(key=lambda t: t[1], reverse=True)
+            combined = ", ".join(
+                [f"{k.replace('_', ' ')} ({score:.2f})" if (k in char_scores or k in gen_scores) else k.replace('_', ' ') for k, score in all_pairs]
+            ) or "—"
+        else:
+            combined = ", ".join(
+                list(sorted(t.replace("_", " ") for t in out.get("character", []))) +
+                [tag.replace("_", " ") for tag in out.get("ip", [])] +
+                list(sorted(t.replace("_", " ") for t in out.get("feature", [])))
+            ) or "—"
     meta = {
         "device": handler.device,
         "latency_s_total": latency,
         **out.get("_timings", {}),
+        "params": out.get("_params", {}),
     }
+    return features, characters, ips, combined, meta, out
+theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="violet", radius_size="lg",)
+with gr.Blocks(title="PixAI Tagger v0.9 — Demo", fill_height=True, theme=theme, analytics_enabled=False) as demo:
     gr.Markdown(
         """
         # PixAI Tagger v0.9 — Gradio Demo
         """
     )
     with gr.Row():
+        gr.Markdown(f"**{DEVICE_LABEL}** — adjust thresholds or switch to Top-K mode.")
+    with gr.Accordion("Settings", open=False):
+        mode = gr.Radio(
+            choices=["threshold", "topk"], value="threshold", label="Mode"
         )
+        with gr.Group(visible=True) as threshold_group:
+            general_threshold = gr.Slider(
+                minimum=0.0, maximum=1.0, step=0.01, value=0.30, label="General threshold"
+            )
+            character_threshold = gr.Slider(
+                minimum=0.0, maximum=1.0, step=0.01, value=0.85, label="Character threshold"
+            )
+        with gr.Group(visible=False) as topk_group:
+            topk_general = gr.Slider(
+                minimum=0, maximum=100, step=1, value=25, label="Top-K general"
+            )
+            topk_character = gr.Slider(
+                minimum=0, maximum=100, step=1, value=10, label="Top-K character"
+            )
+        include_scores = gr.Checkbox(value=False, label="Include scores in output")
+        underscore_mode = gr.Checkbox(value=False, label="Underscore-separated output")
+        def toggle_mode(selected):
+            return (
+                gr.update(visible=(selected == "threshold")),
+                gr.update(visible=(selected == "topk")),
+            )
+        mode.change(toggle_mode, inputs=[mode], outputs=[threshold_group, topk_group])
     with gr.Row(variant="panel"):
         with gr.Column(scale=2):
+            image = gr.Image(label="Upload image", type="pil", visible=True, height="420px")
+            url = gr.Textbox(label="Image URL", placeholder="https://…", visible=True)
             def toggle_inputs(choice):
                 return (
                     gr.update(visible=(choice == "From URL")),
                 )
+        with gr.Column(scale=3):
+            # No source choice; show both inputs and two run buttons
+            with gr.Row():
+                run_image_btn = gr.Button("Run from image", variant="primary")
+                run_url_btn = gr.Button("Run from URL")
+                clear_btn = gr.Button("Clear")
+            gr.Markdown("### Combined Output (character → IP → general)")
+            combined_out = gr.Textbox(label="Combined tags", lines=10,)
+            copy_combined = gr.Button("Copy combined")
+    with gr.Row():
         with gr.Column():
+            gr.Markdown("### Character / General / IP")
+            with gr.Row():
+                with gr.Column():
+                    characters_out = gr.Textbox(label="Character tags", lines=5,)
+                with gr.Column():
+                    features_out = gr.Textbox(label="General tags", lines=5,)
+                with gr.Column():
+                    ip_out = gr.Textbox(label="IP tags", lines=5,)
+            with gr.Row():
+                copy_characters = gr.Button("Copy character")
+                copy_features = gr.Button("Copy general")
+                copy_ip = gr.Button("Copy IP")
+    with gr.Accordion("Metadata & Raw Output", open=False):
+        with gr.Row():
+            with gr.Column():
+                meta_out = gr.JSON(label="Timings/Device")
+                raw_out = gr.JSON(label="Raw JSON")
+                copy_raw = gr.Button("Copy raw JSON")
     examples = gr.Examples(
         label="Examples (URL mode)",
         examples=[
+            [None, "https://cdn.donmai.us/sample/50/b7/__komeiji_koishi_touhou_drawn_by_cui_ying__sample-50b7006f16e0144d5b5db44cadc2d22f.jpg", 0.30, 0.85, "threshold", 25, 10, False, False],
         ],
+        inputs=[image, url, general_threshold, character_threshold, mode, topk_general, topk_character, include_scores, underscore_mode],
         cache_examples=False,
     )
     def clear():
+        return (None, "", 0.30, 0.85, "", "", "", "", {}, {})
+    # Bind buttons separately with a flag for source
+    run_url_btn.click(
+        run_inference,
+        inputs=[
+            gr.State("url"), image, url,
+            general_threshold, character_threshold,
+            mode, topk_general, topk_character, include_scores, underscore_mode,
+        ],
+        outputs=[features_out, characters_out, ip_out, combined_out, meta_out, raw_out],
+        api_name="predict_url",
+    )
+    run_image_btn.click(
         run_inference,
+        inputs=[
+            gr.State("image"), image, url,
+            general_threshold, character_threshold,
+            mode, topk_general, topk_character, include_scores, underscore_mode,
+        ],
+        outputs=[features_out, characters_out, ip_out, combined_out, meta_out, raw_out],
+        api_name="predict_image",
     )
+    # Copy buttons
+    copy_combined.click(lambda x: x, inputs=[combined_out], outputs=[combined_out])
+    copy_characters.click(lambda x: x, inputs=[characters_out], outputs=[characters_out])
+    copy_features.click(lambda x: x, inputs=[features_out], outputs=[features_out])
+    copy_ip.click(lambda x: x, inputs=[ip_out], outputs=[ip_out])
+    copy_raw.click(lambda x: x, inputs=[raw_out], outputs=[raw_out])
     clear_btn.click(
         clear,
         inputs=None,

handler.py CHANGED Viewed

@@ -167,6 +167,11 @@ class EndpointHandler:
         character_threshold = parameters.pop(
             "character_threshold", self.default_character_threshold
         )
         inference_start_time = time.time()
         with torch.inference_mode():
@@ -181,18 +186,37 @@ class EndpointHandler:
             # Run model on GPU
             probs = self.model(image_tensor)[0]  # Get probs for the single image
-            # Perform thresholding directly on the GPU
-            general_mask = probs[: self.gen_tag_count] > general_threshold
-            character_mask = probs[self.gen_tag_count :] > character_threshold
-            # Get the indices of positive tags on the GPU
-            general_indices = general_mask.nonzero(as_tuple=True)[0]
-            character_indices = (
-                character_mask.nonzero(as_tuple=True)[0] + self.gen_tag_count
-            )
-            # Combine indices and move the small result tensor to the CPU
-            combined_indices = torch.cat((general_indices, character_indices)).cpu()
         inference_time = time.time() - inference_start_time
@@ -200,15 +224,23 @@ class EndpointHandler:
         cur_gen_tags = []
         cur_char_tags = []
         # Use the efficient pre-computed map for lookups
-        for i in combined_indices:
-            idx = i.item()
             tag = self.index_to_tag_map[idx]
             if idx < self.gen_tag_count:
                 cur_gen_tags.append(tag)
             else:
                 cur_char_tags.append(tag)
         ip_tags = []
         for tag in cur_char_tags:
@@ -221,8 +253,27 @@ class EndpointHandler:
             f"Timing - Fetch: {fetch_time:.3f}s, Inference: {inference_time:.3f}s, Post-process: {post_process_time:.3f}s, Total: {fetch_time + inference_time + post_process_time:.3f}s"
         )
-        return {
             "feature": cur_gen_tags,
             "character": cur_char_tags,
             "ip": ip_tags,
         }

         character_threshold = parameters.pop(
             "character_threshold", self.default_character_threshold
         )
+        # Optional behavior controls
+        mode = parameters.pop("mode", "threshold")  # "threshold" | "topk"
+        include_scores = bool(parameters.pop("include_scores", False))
+        topk_general = int(parameters.pop("topk_general", 25))
+        topk_character = int(parameters.pop("topk_character", 10))
         inference_start_time = time.time()
         with torch.inference_mode():
             # Run model on GPU
             probs = self.model(image_tensor)[0]  # Get probs for the single image
+            if mode == "topk":
+                # Select top-k by category, independent of thresholds
+                gen_slice = probs[: self.gen_tag_count]
+                char_slice = probs[self.gen_tag_count :]
+                k_gen = max(0, min(int(topk_general), self.gen_tag_count))
+                k_char = max(0, min(int(topk_character), self.character_tag_count))
+                gen_scores, gen_idx = (torch.tensor([]), torch.tensor([], dtype=torch.long))
+                char_scores, char_idx = (torch.tensor([]), torch.tensor([], dtype=torch.long))
+                if k_gen > 0:
+                    gen_scores, gen_idx = torch.topk(gen_slice, k_gen)
+                if k_char > 0:
+                    char_scores, char_idx = torch.topk(char_slice, k_char)
+                    char_idx = char_idx + self.gen_tag_count
+                # Merge for unified post-processing
+                combined_indices = torch.cat((gen_idx, char_idx)).cpu()
+                combined_scores = torch.cat((gen_scores, char_scores)).cpu()
+            else:
+                # Perform thresholding directly on the GPU
+                general_mask = probs[: self.gen_tag_count] > general_threshold
+                character_mask = probs[self.gen_tag_count :] > character_threshold
+                # Get the indices of positive tags on the GPU
+                general_indices = general_mask.nonzero(as_tuple=True)[0]
+                character_indices = (
+                    character_mask.nonzero(as_tuple=True)[0] + self.gen_tag_count
+                )
+                # Combine indices and move the small result tensor to the CPU
+                combined_indices = torch.cat((general_indices, character_indices)).cpu()
+                combined_scores = probs[combined_indices].detach().float().cpu()
         inference_time = time.time() - inference_start_time
         cur_gen_tags = []
         cur_char_tags = []
+        gen_scores_out: dict[str, float] = {}
+        char_scores_out: dict[str, float] = {}
         # Use the efficient pre-computed map for lookups
+        for pos, i in enumerate(combined_indices):
+            idx = int(i.item())
             tag = self.index_to_tag_map[idx]
             if idx < self.gen_tag_count:
                 cur_gen_tags.append(tag)
+                if include_scores:
+                    score = float(combined_scores[pos].item())
+                    gen_scores_out[tag] = score
             else:
                 cur_char_tags.append(tag)
+                if include_scores:
+                    score = float(combined_scores[pos].item())
+                    char_scores_out[tag] = score
         ip_tags = []
         for tag in cur_char_tags:
             f"Timing - Fetch: {fetch_time:.3f}s, Inference: {inference_time:.3f}s, Post-process: {post_process_time:.3f}s, Total: {fetch_time + inference_time + post_process_time:.3f}s"
         )
+        out: dict[str, Any] = {
             "feature": cur_gen_tags,
             "character": cur_char_tags,
             "ip": ip_tags,
+            "_timings": {
+                "fetch_s": round(fetch_time, 4),
+                "inference_s": round(inference_time, 4),
+                "post_process_s": round(post_process_time, 4),
+                "total_s": round(fetch_time + inference_time + post_process_time, 4),
+            },
+            "_params": {
+                "mode": mode,
+                "general_threshold": general_threshold,
+                "character_threshold": character_threshold,
+                "topk_general": topk_general,
+                "topk_character": topk_character,
+            },
         }
+        if include_scores:
+            out["feature_scores"] = gen_scores_out
+            out["character_scores"] = char_scores_out
+        return out