Spaces:

city96
/

CityAesthetics-demo

Runtime error

App Files Files Community

City commited on Nov 15, 2023

Commit

9ac551a

1 Parent(s): a23f9da

Sync with GitHub

Browse files

Files changed (5) hide show

README.md +2 -4
app.py → demo_score_gradio.py +56 -113
inference.py +102 -0
model.py +44 -44
requirements.txt +3 -3

README.md CHANGED Viewed

@@ -1,14 +1,12 @@
 ---
 title: CityAesthetics Demo
-emoji: 🏙️
 colorFrom: blue
 colorTo: yellow
 sdk: gradio
 sdk_version: 3.23.0
-app_file: app.py
 models: [city96/CityAesthetics]
 pinned: false
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: CityAesthetics Demo
+emoji: ✨
 colorFrom: blue
 colorTo: yellow
 sdk: gradio
 sdk_version: 3.23.0
+app_file: demo_score_gradio.py
 models: [city96/CityAesthetics]
 pinned: false
 license: apache-2.0
 ---

app.py → demo_score_gradio.py RENAMED Viewed

@@ -1,113 +1,56 @@
-import os
-import torch
-import gradio as gr
-from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
-from huggingface_hub import hf_hub_download
-from safetensors.torch import load_file
-from model import AestheticPredictorModel
-HFREPO = "City96/CityAesthetics"
-MODELS = [
-	"CityAesthetics-Anime-v1.8",
-]
-class CityAestheticsPipeline:
-	"""
-	Demo pipeline for [image=>score] prediction
-		Accepts a list of model paths on initialization.
-		Resulting object can be called directly with a PIL image as the input.
-		Returns a dict with the model name as key and the score [0.0;1.0] as a value.
-	"""
-	def __init__(self, model_paths):
-		self.models = {}
-		for path in model_paths:
-			name = os.path.splitext(os.path.basename(path))[0]
-			self.models[name] = self.load_model(path)
-		clip_ver = "openai/clip-vit-large-patch14"
-		self.proc = CLIPImageProcessor.from_pretrained(clip_ver)
-		self.clip = CLIPVisionModelWithProjection.from_pretrained(clip_ver)
-		print("CityAesthetics: Pipeline init ok") # debug
-	def load_model(self, path):
-		sd = load_file(path)
-		assert tuple(sd["up.0.weight"].shape) == (1024, 768) # only allow CLIP ver
-		model = AestheticPredictorModel()
-		model.load_state_dict(sd)
-		model.eval()
-		return model
-	def __call__(self, raw):
-		img = self.proc(images=raw, return_tensors="pt")
-		with torch.no_grad():
-			emb = self.clip(pixel_values=img["pixel_values"])
-		emb = emb["image_embeds"].detach().cpu()
-		out = {}
-		for name, model in self.models.items():
-			pred = model(emb)
-			out[name] = float(pred.squeeze(0))
-		return out
-def get_model_path(name):
-	fname = f"{name}.safetensors"
-	# local path: [models/AesPred-Anime-v1.8.safetensors]
-	path  = os.path.join(os.path.dirname(os.path.realpath(__file__)),"models")
-	if os.path.isfile(os.path.join(path, fname)):
-		print("CityAesthetics: Using local model")
-		return os.path.join(path, fname)
-	# huggingface hub fallback
-	print("CityAesthetics: Using HF Hub model")
-	return str(hf_hub_download(
-		token    = os.environ.get("HFS_TOKEN") or True,
-		repo_id  = HFREPO,
-		filename = fname,
-		# subfolder = fname.split('-')[1],
-	))
-article = """\
-# About
-This is the live demo for the CityAesthetics class of predictors.
-For more information, you can check out the [Huggingface Hub](https://huggingface.co/city96/CityAesthetics) or [GitHub page](https://github.com/city96/CityAesthetics).
-## CityAesthetics-Anime
-This flavor is optimized for scoring anime images with at least one subject present.
-### Intentional biases:
-- Completely negative towards real life photos (ideal score of 0%)
-- Strongly Negative towards text (subtitles, memes, etc) and manga panels
-- Fairly negative towards 3D and to some extent 2.5D images
-- Negative towards western cartoons and stylized images (chibi, parody)
-### Expected output scores:
-- Non-anime images should always score below 20%
-- Sketches/rough lineart/oekaki get around 20-40%
-- Flat shading/TV anime gets around 40-50%
-- Above 50% is mostly scored based on my personal style preferences
-### Issues:
-- Tends to filter male characters.
-- Requires at least 1 subject, won't work for scenery/landscapes.
-- Noticeable positive bias towards anime characters with animal ears.
-- Hit-or-miss with AI generated images due to style/quality not being correlated.
-"""
-pipeline = CityAestheticsPipeline([get_model_path(x) for x in MODELS])
-gr.Interface(
-	fn      = pipeline,
-	title   = "CityAesthetics demo",
-	article = article,
-	inputs  = gr.Image(label="Input image", type="pil"),
-	outputs = gr.Label(label="Model prediction", show_label=False),
-	examples = "./examples",
-	allow_flagging = "never",
-	analytics_enabled = False,
-).launch()

+import os
+import gradio as gr
+from inference import CityAestheticsMultiModelPipeline, get_model_path
+TOKEN  = os.environ.get("HFS_TOKEN")
+HFREPO = "City96/CityAesthetics"
+MODELS = [
+	"CityAesthetics-Anime-v1.8",
+]
+article = """\
+# About
+This is the live demo for the CityAesthetics class of predictors.
+For more information, you can check out the [Huggingface Hub](https://huggingface.co/city96/CityAesthetics) or [GitHub page](https://github.com/city96/CityClassifiers).
+## CityAesthetics-Anime
+This flavor is optimized for scoring anime images with at least one subject present.
+### Intentional biases:
+- Completely negative towards real life photos (ideal score of 0%)
+- Strongly Negative towards text (subtitles, memes, etc) and manga panels
+- Fairly negative towards 3D and to some extent 2.5D images
+- Negative towards western cartoons and stylized images (chibi, parody)
+### Expected output scores:
+- Non-anime images should always score below 20%
+- Sketches/rough lineart/oekaki get around 20-40%
+- Flat shading/TV anime gets around 40-50%
+- Above 50% is mostly scored based on my personal style preferences
+### Issues:
+- Tends to filter male characters.
+- Requires at least 1 subject, won't work for scenery/landscapes.
+- Noticeable positive bias towards anime characters with animal ears.
+- Hit-or-miss with AI generated images due to style/quality not being correlated.
+"""
+pipeline = CityAestheticsMultiModelPipeline(
+	[get_model_path(x, HFREPO, TOKEN) for x in MODELS],
+)
+gr.Interface(
+	fn      = pipeline,
+	title   = "CityAesthetics demo",
+	article = article,
+	inputs  = gr.Image(label="Input image", type="pil"),
+	outputs = gr.Label(label="Model prediction", show_label=False),
+	examples = "./examples" if os.path.isdir("./examples") else None,
+	allow_flagging = "never",
+	analytics_enabled = False,
+).launch()

inference.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+import torch
+from safetensors.torch import load_file
+from huggingface_hub import hf_hub_download
+from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
+from model import AestheticPredictorModel
+class CityAestheticsPipeline:
+	"""
+	Demo model pipeline for [image=>score] prediction
+		Accepts a single model path on initialization.
+		Resulting object can be called directly with a PIL image as the input
+		Returns a single float value with the predicted score [0.0;1.0].
+	"""
+	clip_ver = "openai/clip-vit-large-patch14"
+	def __init__(self, model_path, device="cpu", clip_dtype=torch.float32):
+		self.device = device
+		self.clip_dtype = clip_dtype
+		self._init_clip()
+		self.model = self._load_model(model_path)
+		print("CityAesthetics: Pipeline init ok") # debug
+	def __call__(self, raw):
+		emb = self.get_clip_emb(raw)
+		return self.get_model_pred(self.model, emb)
+	def get_model_pred(self, model, emb):
+		with torch.no_grad():
+			pred = model(emb)
+		return float(pred.detach().cpu().squeeze(0))
+	def get_clip_emb(self, raw):
+		img = self.proc(
+			images = raw,
+			return_tensors = "pt"
+		)["pixel_values"].to(self.clip_dtype).to(self.device)
+		with torch.no_grad():
+			emb = self.clip(pixel_values=img)
+		return emb["image_embeds"].detach().to(torch.float32)
+	def _init_clip(self):
+		self.proc = CLIPImageProcessor.from_pretrained(self.clip_ver)
+		self.clip = CLIPVisionModelWithProjection.from_pretrained(
+			self.clip_ver,
+			device_map  = self.device,
+			torch_dtype = self.clip_dtype,
+		)
+	def _load_model(self, path):
+		sd = load_file(path)
+		assert tuple(sd["up.0.weight"].shape) == (1024, 768) # only allow CLIP ver
+		model = AestheticPredictorModel()
+		model.eval()
+		model.load_state_dict(sd)
+		model.to(self.device)
+		return model
+class CityAestheticsMultiModelPipeline(CityAestheticsPipeline):
+	"""
+	Demo multi-model pipeline for [image=>score] prediction
+		Accepts a list of model paths on initialization.
+		Resulting object can be called directly with a PIL image as the input.
+		Returns a dict with the model name as key and the score [0.0;1.0] as a value.
+	"""
+	def __init__(self, model_paths, device="cpu", clip_dtype=torch.float32):
+		self.device = device
+		self.clip_dtype = clip_dtype
+		self._init_clip()
+		self.models = {}
+		for path in model_paths:
+			name = os.path.splitext(os.path.basename(path))[0]
+			self.models[name] = self._load_model(path)
+		print("CityAesthetics: Pipeline init ok") # debug
+	def __call__(self, raw):
+		emb = self.get_clip_emb(raw)
+		out = {}
+		for name, model in self.models.items():
+			pred = model(emb)
+			out[name] = self.get_model_pred(model, emb)
+		return out
+def get_model_path(name, repo, token=True):
+	"""
+	Returns local model path or falls back to HF hub if required.
+	"""
+	fname = f"{name}.safetensors"
+	# local path: [models/AesPred-Anime-v1.8.safetensors]
+	path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"models")
+	if os.path.isfile(os.path.join(path, fname)):
+		print("CityAesthetics: Using local model")
+		return os.path.join(path, fname)
+	# huggingface hub fallback
+	print("CityAesthetics: Using HF Hub model")
+	return str(hf_hub_download(
+		token    = token,
+		repo_id  = repo,
+		filename = fname,
+	))

model.py CHANGED Viewed

@@ -1,44 +1,44 @@
-import torch
-import torch.nn as nn
-class ResBlock(nn.Module):
-	"""Block with residuals"""
-	def __init__(self, ch):
-		super().__init__()
-		self.join = nn.ReLU()
-		self.long = nn.Sequential(
-			nn.Linear(ch, ch),
-			nn.LeakyReLU(0.1),
-			nn.Linear(ch, ch),
-			nn.LeakyReLU(0.1),
-			nn.Linear(ch, ch),
-		)
-	def forward(self, x):
-		return self.join(self.long(x) + x)
-class AestheticPredictorModel(nn.Module):
-	"""
-	Main predictor class. Original:
-	https://github.com/city96/CityAesthetics/blob/main/model.py
-	"""
-	def __init__(self, features=768, hidden=1024):
-		super().__init__()
-		self.features = features
-		self.hidden = hidden
-		self.up = nn.Sequential(
-			nn.Linear(self.features, self.hidden),
-			ResBlock(ch=self.hidden),
-		)
-		self.down = nn.Sequential(
-			nn.Linear(self.hidden, 128),
-			nn.Linear(128, 64),
-			nn.Dropout(0.1),
-			nn.LeakyReLU(),
-			nn.Linear(64, 32),
-			nn.Linear(32,  1),
-			nn.Tanh(),
-		)
-	def forward(self, x):
-		y = self.up(x)
-		z = self.down(y)
-		return (z+1.0)/2.0

+import torch
+import torch.nn as nn
+class ResBlock(nn.Module):
+	"""Linear block with residuals"""
+	def __init__(self, ch):
+		super().__init__()
+		self.join = nn.ReLU()
+		self.long = nn.Sequential(
+			nn.Linear(ch, ch),
+			nn.LeakyReLU(0.1),
+			nn.Linear(ch, ch),
+			nn.LeakyReLU(0.1),
+			nn.Linear(ch, ch),
+		)
+	def forward(self, x):
+		return self.join(self.long(x) + x)
+class AestheticPredictorModel(nn.Module):
+	"""
+	Main predictor class. Original:
+	https://github.com/city96/CityClassifiers/blob/main/model.py
+	"""
+	def __init__(self, features=768, hidden=1024):
+		super().__init__()
+		self.features = features
+		self.hidden = hidden
+		self.up = nn.Sequential(
+			nn.Linear(self.features, self.hidden),
+			ResBlock(ch=self.hidden),
+		)
+		self.down = nn.Sequential(
+			nn.Linear(self.hidden, 128),
+			nn.Linear(128, 64),
+			nn.Dropout(0.1),
+			nn.LeakyReLU(),
+			nn.Linear(64, 32),
+			nn.Linear(32,  1),
+			nn.Tanh(),
+		)
+	def forward(self, x):
+		y = self.up(x)
+		z = self.down(y)
+		return (z+1.0)/2.0

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
-torch==2.1.0
-safetensors==0.4.0
-transformers==4.35.0

+torch==2.1.0
+safetensors==0.4.0
+transformers==4.35.0