trojblue commited on
Commit
20d8f48
Β·
1 Parent(s): 132d7fb

update app.py

Browse files
Files changed (2) hide show
  1. app.py +153 -46
  2. handler.py +64 -13
app.py CHANGED
@@ -88,8 +88,15 @@ def run_inference(
88
  url: str,
89
  general_threshold: float,
90
  character_threshold: float,
 
 
 
 
 
91
  ):
92
- if source_choice == "Upload image":
 
 
93
  if image is None:
94
  raise gr.Error("Please upload an image.")
95
  inputs = image
@@ -98,13 +105,15 @@ def run_inference(
98
  raise gr.Error("Please provide an image URL.")
99
  inputs = {"url": url.strip()}
100
 
101
- data = {
102
- "inputs": inputs,
103
- "parameters": {
104
- "general_threshold": float(general_threshold),
105
- "character_threshold": float(character_threshold),
106
- },
 
107
  }
 
108
 
109
  started = time.time()
110
  try:
@@ -113,20 +122,62 @@ def run_inference(
113
  raise gr.Error(f"Inference error: {e}") from e
114
  latency = round(time.time() - started, 4)
115
 
116
- features = ", ".join(sorted(out.get("feature", []))) or "β€”"
117
- characters = ", ".join(sorted(out.get("character", []))) or "β€”"
118
- ips = ", ".join(out.get("ip", [])) or "β€”"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  meta = {
121
  "device": handler.device,
122
  "latency_s_total": latency,
123
  **out.get("_timings", {}),
 
124
  }
125
 
126
- return features, characters, ips, meta, out
 
127
 
 
128
 
129
- with gr.Blocks(title="PixAI Tagger v0.9 β€” Demo", fill_height=True) as demo:
130
  gr.Markdown(
131
  """
132
  # PixAI Tagger v0.9 β€” Gradio Demo
@@ -140,19 +191,41 @@ with gr.Blocks(title="PixAI Tagger v0.9 β€” Demo", fill_height=True) as demo:
140
  """
141
  )
142
  with gr.Row():
143
- gr.Markdown(f"**{DEVICE_LABEL}**")
144
 
145
- with gr.Row():
146
- source_choice = gr.Radio(
147
- choices=["Upload image", "From URL"],
148
- value="Upload image",
149
- label="Image source",
150
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  with gr.Row(variant="panel"):
153
  with gr.Column(scale=2):
154
- image = gr.Image(label="Upload image", type="pil", visible=True, height="500px")
155
- url = gr.Textbox(label="Image URL", placeholder="https://…", visible=False)
156
 
157
  def toggle_inputs(choice):
158
  return (
@@ -160,48 +233,82 @@ with gr.Blocks(title="PixAI Tagger v0.9 β€” Demo", fill_height=True) as demo:
160
  gr.update(visible=(choice == "From URL")),
161
  )
162
 
163
- source_choice.change(toggle_inputs, [source_choice], [image, url])
164
 
165
- with gr.Column(scale=1):
166
- general_threshold = gr.Slider(
167
- minimum=0.0, maximum=1.0, step=0.01, value=0.30, label="General threshold"
168
- )
169
- character_threshold = gr.Slider(
170
- minimum=0.0, maximum=1.0, step=0.01, value=0.85, label="Character threshold"
171
- )
172
- run_btn = gr.Button("Run", variant="primary")
173
- clear_btn = gr.Button("Clear")
174
 
175
- with gr.Row():
176
- with gr.Column():
177
- gr.Markdown("### Predicted Tags")
178
- features_out = gr.Textbox(label="General tags", lines=4)
179
- characters_out = gr.Textbox(label="Character tags", lines=4)
180
- ip_out = gr.Textbox(label="IP tags", lines=2)
181
 
 
 
 
 
 
182
  with gr.Column():
183
- gr.Markdown("### Metadata & Raw Output")
184
- meta_out = gr.JSON(label="Timings/Device")
185
- raw_out = gr.JSON(label="Raw JSON")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  examples = gr.Examples(
188
  label="Examples (URL mode)",
189
  examples=[
190
- ["From URL", None, "https://cdn.donmai.us/sample/50/b7/__komeiji_koishi_touhou_drawn_by_cui_ying__sample-50b7006f16e0144d5b5db44cadc2d22f.jpg", 0.30, 0.85],
191
  ],
192
- inputs=[source_choice, image, url, general_threshold, character_threshold],
193
  cache_examples=False,
194
  )
195
 
196
  def clear():
197
- return (None, "", 0.30, 0.85, "", "", "", {}, {})
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
- run_btn.click(
200
  run_inference,
201
- inputs=[source_choice, image, url, general_threshold, character_threshold],
202
- outputs=[features_out, characters_out, ip_out, meta_out, raw_out],
203
- api_name="predict",
 
 
 
 
204
  )
 
 
 
 
 
 
 
205
  clear_btn.click(
206
  clear,
207
  inputs=None,
 
88
  url: str,
89
  general_threshold: float,
90
  character_threshold: float,
91
+ mode_val: str,
92
+ topk_general_val: int,
93
+ topk_character_val: int,
94
+ include_scores_val: bool,
95
+ underscore_mode_val: bool,
96
  ):
97
+ # Determine which input to use based on which Run button invoked the function.
98
+ # We'll pass a string flag via source_choice: either "url" or "image".
99
+ if source_choice == "image":
100
  if image is None:
101
  raise gr.Error("Please upload an image.")
102
  inputs = image
 
105
  raise gr.Error("Please provide an image URL.")
106
  inputs = {"url": url.strip()}
107
 
108
+ params = {
109
+ "general_threshold": float(general_threshold),
110
+ "character_threshold": float(character_threshold),
111
+ "mode": mode_val,
112
+ "topk_general": int(topk_general_val),
113
+ "topk_character": int(topk_character_val),
114
+ "include_scores": bool(include_scores_val),
115
  }
116
+ data = {"inputs": inputs, "parameters": params}
117
 
118
  started = time.time()
119
  try:
 
122
  raise gr.Error(f"Inference error: {e}") from e
123
  latency = round(time.time() - started, 4)
124
 
125
+ # Individual outputs
126
+ if underscore_mode_val:
127
+ characters = " ".join(out.get("character", [])) or "β€”"
128
+ ips = " ".join(out.get("ip", [])) or "β€”"
129
+ features = " ".join(out.get("feature", [])) or "β€”"
130
+ elif include_scores_val:
131
+ gen_scores = out.get("feature_scores", {})
132
+ char_scores = out.get("character_scores", {})
133
+ characters = ", ".join(
134
+ f"{k.replace('_', ' ')} ({char_scores[k]:.2f})" for k in sorted(char_scores, key=char_scores.get, reverse=True)
135
+ ) or "β€”"
136
+ ips = ", ".join(tag.replace("_", " ") for tag in out.get("ip", [])) or "β€”"
137
+ features = ", ".join(
138
+ f"{k.replace('_', ' ')} ({gen_scores[k]:.2f})" for k in sorted(gen_scores, key=gen_scores.get, reverse=True)
139
+ ) or "β€”"
140
+ else:
141
+ characters = ", ".join(sorted(t.replace("_", " ") for t in out.get("character", []))) or "β€”"
142
+ ips = ", ".join(tag.replace("_", " ") for tag in out.get("ip", [])) or "β€”"
143
+ features = ", ".join(sorted(t.replace("_", " ") for t in out.get("feature", []))) or "β€”"
144
+
145
+ # Combined output: probability-descending if scores available; else character, IP, general
146
+ if underscore_mode_val:
147
+ combined = " ".join(out.get("character", []) + out.get("ip", []) + out.get("feature", [])) or "β€”"
148
+ else:
149
+ char_scores = out.get("character_scores") or {}
150
+ gen_scores = out.get("feature_scores") or {}
151
+ if include_scores_val and (char_scores or gen_scores):
152
+ # Build (tag, score) pairs
153
+ char_pairs = [(k, float(char_scores.get(k, 0.0))) for k in out.get("character", [])]
154
+ ip_pairs = [(k, 1.0) for k in out.get("ip", [])] # IP has no score; treat equally
155
+ gen_pairs = [(k, float(gen_scores.get(k, 0.0))) for k in out.get("feature", [])]
156
+ all_pairs = char_pairs + ip_pairs + gen_pairs
157
+ all_pairs.sort(key=lambda t: t[1], reverse=True)
158
+ combined = ", ".join(
159
+ [f"{k.replace('_', ' ')} ({score:.2f})" if (k in char_scores or k in gen_scores) else k.replace('_', ' ') for k, score in all_pairs]
160
+ ) or "β€”"
161
+ else:
162
+ combined = ", ".join(
163
+ list(sorted(t.replace("_", " ") for t in out.get("character", []))) +
164
+ [tag.replace("_", " ") for tag in out.get("ip", [])] +
165
+ list(sorted(t.replace("_", " ") for t in out.get("feature", [])))
166
+ ) or "β€”"
167
 
168
  meta = {
169
  "device": handler.device,
170
  "latency_s_total": latency,
171
  **out.get("_timings", {}),
172
+ "params": out.get("_params", {}),
173
  }
174
 
175
+ return features, characters, ips, combined, meta, out
176
+
177
 
178
+ theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="violet", radius_size="lg",)
179
 
180
+ with gr.Blocks(title="PixAI Tagger v0.9 β€” Demo", fill_height=True, theme=theme, analytics_enabled=False) as demo:
181
  gr.Markdown(
182
  """
183
  # PixAI Tagger v0.9 β€” Gradio Demo
 
191
  """
192
  )
193
  with gr.Row():
194
+ gr.Markdown(f"**{DEVICE_LABEL}** β€” adjust thresholds or switch to Top-K mode.")
195
 
196
+ with gr.Accordion("Settings", open=False):
197
+ mode = gr.Radio(
198
+ choices=["threshold", "topk"], value="threshold", label="Mode"
 
 
199
  )
200
+ with gr.Group(visible=True) as threshold_group:
201
+ general_threshold = gr.Slider(
202
+ minimum=0.0, maximum=1.0, step=0.01, value=0.30, label="General threshold"
203
+ )
204
+ character_threshold = gr.Slider(
205
+ minimum=0.0, maximum=1.0, step=0.01, value=0.85, label="Character threshold"
206
+ )
207
+ with gr.Group(visible=False) as topk_group:
208
+ topk_general = gr.Slider(
209
+ minimum=0, maximum=100, step=1, value=25, label="Top-K general"
210
+ )
211
+ topk_character = gr.Slider(
212
+ minimum=0, maximum=100, step=1, value=10, label="Top-K character"
213
+ )
214
+ include_scores = gr.Checkbox(value=False, label="Include scores in output")
215
+ underscore_mode = gr.Checkbox(value=False, label="Underscore-separated output")
216
+
217
+ def toggle_mode(selected):
218
+ return (
219
+ gr.update(visible=(selected == "threshold")),
220
+ gr.update(visible=(selected == "topk")),
221
+ )
222
+
223
+ mode.change(toggle_mode, inputs=[mode], outputs=[threshold_group, topk_group])
224
 
225
  with gr.Row(variant="panel"):
226
  with gr.Column(scale=2):
227
+ image = gr.Image(label="Upload image", type="pil", visible=True, height="420px")
228
+ url = gr.Textbox(label="Image URL", placeholder="https://…", visible=True)
229
 
230
  def toggle_inputs(choice):
231
  return (
 
233
  gr.update(visible=(choice == "From URL")),
234
  )
235
 
 
236
 
 
 
 
 
 
 
 
 
 
237
 
238
+ with gr.Column(scale=3):
239
+ # No source choice; show both inputs and two run buttons
240
+ with gr.Row():
241
+ run_image_btn = gr.Button("Run from image", variant="primary")
242
+ run_url_btn = gr.Button("Run from URL")
243
+ clear_btn = gr.Button("Clear")
244
 
245
+ gr.Markdown("### Combined Output (character β†’ IP β†’ general)")
246
+ combined_out = gr.Textbox(label="Combined tags", lines=10,)
247
+ copy_combined = gr.Button("Copy combined")
248
+
249
+ with gr.Row():
250
  with gr.Column():
251
+ gr.Markdown("### Character / General / IP")
252
+ with gr.Row():
253
+ with gr.Column():
254
+ characters_out = gr.Textbox(label="Character tags", lines=5,)
255
+ with gr.Column():
256
+ features_out = gr.Textbox(label="General tags", lines=5,)
257
+ with gr.Column():
258
+ ip_out = gr.Textbox(label="IP tags", lines=5,)
259
+ with gr.Row():
260
+ copy_characters = gr.Button("Copy character")
261
+ copy_features = gr.Button("Copy general")
262
+ copy_ip = gr.Button("Copy IP")
263
+
264
+ with gr.Accordion("Metadata & Raw Output", open=False):
265
+ with gr.Row():
266
+ with gr.Column():
267
+ meta_out = gr.JSON(label="Timings/Device")
268
+ raw_out = gr.JSON(label="Raw JSON")
269
+ copy_raw = gr.Button("Copy raw JSON")
270
 
271
  examples = gr.Examples(
272
  label="Examples (URL mode)",
273
  examples=[
274
+ [None, "https://cdn.donmai.us/sample/50/b7/__komeiji_koishi_touhou_drawn_by_cui_ying__sample-50b7006f16e0144d5b5db44cadc2d22f.jpg", 0.30, 0.85, "threshold", 25, 10, False, False],
275
  ],
276
+ inputs=[image, url, general_threshold, character_threshold, mode, topk_general, topk_character, include_scores, underscore_mode],
277
  cache_examples=False,
278
  )
279
 
280
  def clear():
281
+ return (None, "", 0.30, 0.85, "", "", "", "", {}, {})
282
+
283
+ # Bind buttons separately with a flag for source
284
+ run_url_btn.click(
285
+ run_inference,
286
+ inputs=[
287
+ gr.State("url"), image, url,
288
+ general_threshold, character_threshold,
289
+ mode, topk_general, topk_character, include_scores, underscore_mode,
290
+ ],
291
+ outputs=[features_out, characters_out, ip_out, combined_out, meta_out, raw_out],
292
+ api_name="predict_url",
293
+ )
294
 
295
+ run_image_btn.click(
296
  run_inference,
297
+ inputs=[
298
+ gr.State("image"), image, url,
299
+ general_threshold, character_threshold,
300
+ mode, topk_general, topk_character, include_scores, underscore_mode,
301
+ ],
302
+ outputs=[features_out, characters_out, ip_out, combined_out, meta_out, raw_out],
303
+ api_name="predict_image",
304
  )
305
+
306
+ # Copy buttons
307
+ copy_combined.click(lambda x: x, inputs=[combined_out], outputs=[combined_out])
308
+ copy_characters.click(lambda x: x, inputs=[characters_out], outputs=[characters_out])
309
+ copy_features.click(lambda x: x, inputs=[features_out], outputs=[features_out])
310
+ copy_ip.click(lambda x: x, inputs=[ip_out], outputs=[ip_out])
311
+ copy_raw.click(lambda x: x, inputs=[raw_out], outputs=[raw_out])
312
  clear_btn.click(
313
  clear,
314
  inputs=None,
handler.py CHANGED
@@ -167,6 +167,11 @@ class EndpointHandler:
167
  character_threshold = parameters.pop(
168
  "character_threshold", self.default_character_threshold
169
  )
 
 
 
 
 
170
 
171
  inference_start_time = time.time()
172
  with torch.inference_mode():
@@ -181,18 +186,37 @@ class EndpointHandler:
181
  # Run model on GPU
182
  probs = self.model(image_tensor)[0] # Get probs for the single image
183
 
184
- # Perform thresholding directly on the GPU
185
- general_mask = probs[: self.gen_tag_count] > general_threshold
186
- character_mask = probs[self.gen_tag_count :] > character_threshold
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- # Get the indices of positive tags on the GPU
189
- general_indices = general_mask.nonzero(as_tuple=True)[0]
190
- character_indices = (
191
- character_mask.nonzero(as_tuple=True)[0] + self.gen_tag_count
192
- )
193
 
194
- # Combine indices and move the small result tensor to the CPU
195
- combined_indices = torch.cat((general_indices, character_indices)).cpu()
 
196
 
197
  inference_time = time.time() - inference_start_time
198
 
@@ -200,15 +224,23 @@ class EndpointHandler:
200
 
201
  cur_gen_tags = []
202
  cur_char_tags = []
 
 
203
 
204
  # Use the efficient pre-computed map for lookups
205
- for i in combined_indices:
206
- idx = i.item()
207
  tag = self.index_to_tag_map[idx]
208
  if idx < self.gen_tag_count:
209
  cur_gen_tags.append(tag)
 
 
 
210
  else:
211
  cur_char_tags.append(tag)
 
 
 
212
 
213
  ip_tags = []
214
  for tag in cur_char_tags:
@@ -221,8 +253,27 @@ class EndpointHandler:
221
  f"Timing - Fetch: {fetch_time:.3f}s, Inference: {inference_time:.3f}s, Post-process: {post_process_time:.3f}s, Total: {fetch_time + inference_time + post_process_time:.3f}s"
222
  )
223
 
224
- return {
225
  "feature": cur_gen_tags,
226
  "character": cur_char_tags,
227
  "ip": ip_tags,
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  }
 
 
 
 
 
 
 
167
  character_threshold = parameters.pop(
168
  "character_threshold", self.default_character_threshold
169
  )
170
+ # Optional behavior controls
171
+ mode = parameters.pop("mode", "threshold") # "threshold" | "topk"
172
+ include_scores = bool(parameters.pop("include_scores", False))
173
+ topk_general = int(parameters.pop("topk_general", 25))
174
+ topk_character = int(parameters.pop("topk_character", 10))
175
 
176
  inference_start_time = time.time()
177
  with torch.inference_mode():
 
186
  # Run model on GPU
187
  probs = self.model(image_tensor)[0] # Get probs for the single image
188
 
189
+ if mode == "topk":
190
+ # Select top-k by category, independent of thresholds
191
+ gen_slice = probs[: self.gen_tag_count]
192
+ char_slice = probs[self.gen_tag_count :]
193
+ k_gen = max(0, min(int(topk_general), self.gen_tag_count))
194
+ k_char = max(0, min(int(topk_character), self.character_tag_count))
195
+ gen_scores, gen_idx = (torch.tensor([]), torch.tensor([], dtype=torch.long))
196
+ char_scores, char_idx = (torch.tensor([]), torch.tensor([], dtype=torch.long))
197
+ if k_gen > 0:
198
+ gen_scores, gen_idx = torch.topk(gen_slice, k_gen)
199
+ if k_char > 0:
200
+ char_scores, char_idx = torch.topk(char_slice, k_char)
201
+ char_idx = char_idx + self.gen_tag_count
202
+
203
+ # Merge for unified post-processing
204
+ combined_indices = torch.cat((gen_idx, char_idx)).cpu()
205
+ combined_scores = torch.cat((gen_scores, char_scores)).cpu()
206
+ else:
207
+ # Perform thresholding directly on the GPU
208
+ general_mask = probs[: self.gen_tag_count] > general_threshold
209
+ character_mask = probs[self.gen_tag_count :] > character_threshold
210
 
211
+ # Get the indices of positive tags on the GPU
212
+ general_indices = general_mask.nonzero(as_tuple=True)[0]
213
+ character_indices = (
214
+ character_mask.nonzero(as_tuple=True)[0] + self.gen_tag_count
215
+ )
216
 
217
+ # Combine indices and move the small result tensor to the CPU
218
+ combined_indices = torch.cat((general_indices, character_indices)).cpu()
219
+ combined_scores = probs[combined_indices].detach().float().cpu()
220
 
221
  inference_time = time.time() - inference_start_time
222
 
 
224
 
225
  cur_gen_tags = []
226
  cur_char_tags = []
227
+ gen_scores_out: dict[str, float] = {}
228
+ char_scores_out: dict[str, float] = {}
229
 
230
  # Use the efficient pre-computed map for lookups
231
+ for pos, i in enumerate(combined_indices):
232
+ idx = int(i.item())
233
  tag = self.index_to_tag_map[idx]
234
  if idx < self.gen_tag_count:
235
  cur_gen_tags.append(tag)
236
+ if include_scores:
237
+ score = float(combined_scores[pos].item())
238
+ gen_scores_out[tag] = score
239
  else:
240
  cur_char_tags.append(tag)
241
+ if include_scores:
242
+ score = float(combined_scores[pos].item())
243
+ char_scores_out[tag] = score
244
 
245
  ip_tags = []
246
  for tag in cur_char_tags:
 
253
  f"Timing - Fetch: {fetch_time:.3f}s, Inference: {inference_time:.3f}s, Post-process: {post_process_time:.3f}s, Total: {fetch_time + inference_time + post_process_time:.3f}s"
254
  )
255
 
256
+ out: dict[str, Any] = {
257
  "feature": cur_gen_tags,
258
  "character": cur_char_tags,
259
  "ip": ip_tags,
260
+ "_timings": {
261
+ "fetch_s": round(fetch_time, 4),
262
+ "inference_s": round(inference_time, 4),
263
+ "post_process_s": round(post_process_time, 4),
264
+ "total_s": round(fetch_time + inference_time + post_process_time, 4),
265
+ },
266
+ "_params": {
267
+ "mode": mode,
268
+ "general_threshold": general_threshold,
269
+ "character_threshold": character_threshold,
270
+ "topk_general": topk_general,
271
+ "topk_character": topk_character,
272
+ },
273
  }
274
+
275
+ if include_scores:
276
+ out["feature_scores"] = gen_scores_out
277
+ out["character_scores"] = char_scores_out
278
+
279
+ return out