Spaces:

fancyfeast
/

joytag

Running

fancyfeast commited on Dec 20, 2023

Commit

b72bef3

1 Parent(s): df9e86f

Prepare images correctly

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,15 +4,45 @@ import huggingface_hub
 from PIL import Image
 import torch.amp.autocast_mode
 from pathlib import Path
 MODEL_REPO = "fancyfeast/joytag"
 @torch.no_grad()
 def predict(image: Image.Image):
-	with torch.amp.autocast_mode.autocast('cuda', enabled=True):
-		preds = model(image)
 		tag_preds = preds['tags'].sigmoid().cpu()
 	return {top_tags[i]: tag_preds[i] for i in range(len(top_tags))}

 from PIL import Image
 import torch.amp.autocast_mode
 from pathlib import Path
+import torch
+import torchvision.transforms.functional as TVF
 MODEL_REPO = "fancyfeast/joytag"
+def prepare_image(image: Image.Image, target_size: int) -> torch.Tensor:
+	# Pad image to square
+	image_shape = image.size
+	max_dim = max(image_shape)
+	pad_left = (max_dim - image_shape[0]) // 2
+	pad_top = (max_dim - image_shape[1]) // 2
+	padded_image = Image.new('RGB', (max_dim, max_dim), (255, 255, 255))
+	padded_image.paste(image, (pad_left, pad_top))
+	# Resize image
+	if max_dim != target_size:
+		padded_image = padded_image.resize((target_size, target_size), Image.BICUBIC)
+	# Convert to tensor
+	image_tensor = TVF.pil_to_tensor(padded_image) / 255.0
+	# Normalize
+	image_tensor = TVF.normalize(image_tensor, mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
+	return image_tensor
 @torch.no_grad()
 def predict(image: Image.Image):
+	image_tensor = prepare_image(image, model.image_size)
+	batch = {
+		'image': image_tensor.unsqueeze(0),
+	}
+	with torch.amp.autocast_mode.autocast('cpu', enabled=True):
+		preds = model(batch)
 		tag_preds = preds['tags'].sigmoid().cpu()
 	return {top_tags[i]: tag_preds[i] for i in range(len(top_tags))}