Spaces:

deepghs
/

monochrome_detection

Sleeping

App Files Files Community

narugo1992 commited on Apr 22, 2023

Commit

e6b5e51

1 Parent(s): 66aead2

dev(narugo): save it

Browse files

Files changed (2) hide show

encode.py +59 -0
monochrome.py +26 -45

encode.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import numpy as np
+from image import load_image, ImageTyping
+__all__ = [
+    'rgb_encode',
+]
+_DEFAULT_ORDER = 'HWC'
+def _get_hwc_map(order_: str):
+    return tuple(_DEFAULT_ORDER.index(c) for c in order_.upper())
+def rgb_encode(image: ImageTyping, order_: str = 'CHW', use_float: bool = True) -> np.ndarray:
+    """
+    Overview:
+        Encode image as rgb channels.
+    :param image: Image to be encoded.
+    :param order_: Order of encoding, default is ``CHW``.
+    :param use_float: Use float to represent the channels, default is ``True``. ``np.uint8`` will be used when false.
+    :return: Encoded rgb image.
+    Examples::
+        >>> from PIL import Image
+        >>> from encode import rgb_encode
+        >>>
+        >>> image = Image.open('custom_image.jpg')
+        >>> image
+        <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1606x1870 at 0x7F9EC37389D0>
+        >>>
+        >>> data = rgb_encode(image)
+        >>> data.shape, data.dtype
+        ((3, 1870, 1606), dtype('float32'))
+        >>> data = rgb_encode(image, order_='CHW')
+        >>> data.shape, data.dtype
+        ((3, 1870, 1606), dtype('float32'))
+        >>> data = rgb_encode(image, order_='WHC')
+        >>> data.shape, data.dtype
+        ((1606, 1870, 3), dtype('float32'))
+        >>> data = rgb_encode(image, use_float=False)
+        >>> data.shape, data.dtype
+        ((3, 1870, 1606), dtype('uint8'))
+    .. note::
+        The function :func:`rgb_encode`'s result is the same as \
+            ``torchvision.transforms.functional import to_tensor``'s result when the given ``image`` is in RGB mode.
+    """
+    image = load_image(image, mode='RGB')
+    array = np.asarray(image)
+    array = np.transpose(array, _get_hwc_map(order_))
+    if use_float:
+        array = (array / 255.0).astype(np.float32)
+        assert array.dtype == np.float32
+    else:
+        assert array.dtype == np.uint8
+    return array

monochrome.py CHANGED Viewed

@@ -1,24 +1,25 @@
 from functools import lru_cache
-from typing import Optional, Mapping
 import numpy as np
-from PIL import Image, ImageFilter
 from huggingface_hub import hf_hub_download
-from scipy import signal
 from image import ImageTyping, load_image
 from onnxruntime_ import open_onnx_model
 __all__ = [
     'get_monochrome_score',
 ]
-_DEFAULT_MONOCHROME_CKPT = 'monochrome-resnet18-safe2-450.onnx'
 _MONOCHROME_CKPTS = [
-    'monochrome-resnet18-safe2-450.onnx',
-    'monochrome-resnet18-480.onnx',
-    'monochrome-alexnet-480.onnx',
 ]
 @lru_cache()
@@ -29,49 +30,29 @@ def _monochrome_validate_model(ckpt):
     ))
-def np_hist(x, a_min: float = 0.0, a_max: float = 1.0, bins: int = 256):
-    x = np.asarray(x)
-    edges = np.linspace(a_min, a_max, bins + 1)
-    cnt, _ = np.histogram(x, bins=edges)
-    return cnt / cnt.sum()
-def butterworth_filter(r, fc):
-    w = fc / (len(r) / 2)  # Normalize the frequency
-    b, a = signal.butter(5, w, 'low')
-    return np.clip(signal.filtfilt(b, a, r), a_min=0.0, a_max=1.0)
-def _hsv_encode(image: Image.Image, feature_bins: int = 180, mf: Optional[int] = 5,
-                maxpixels: int = 20000, fc: Optional[int] = 75, normalize: bool = True):
-    if image.width * image.height > maxpixels:
-        r = (image.width * image.height / maxpixels) ** 0.5
-        new_width, new_height = map(lambda x: int(round(x / r)), image.size)
-        image = image.resize((new_width, new_height))
-    if mf is not None:
-        image = image.filter(ImageFilter.MedianFilter(mf))
-    image = image.convert('HSV')
-    data = (np.transpose(np.asarray(image), (2, 0, 1)) / 255.0).astype(np.float32)
-    channels = [np_hist(data[i], bins=feature_bins) for i in range(3)]
-    if fc is not None:
-        channels = [butterworth_filter(ch, fc) for ch in channels]
-    dist = np.stack(channels)
-    assert dist.shape == (3, feature_bins)
-    if normalize:
-        mean = np.mean(dist, axis=1, keepdims=True)
-        std = np.std(dist, axis=1, keepdims=True, ddof=1)
-        dist = (dist - mean) / std
-    return dist
-def get_monochrome_score(image: ImageTyping, ckpt: str = _DEFAULT_MONOCHROME_CKPT) -> Mapping[str, float]:
     image = load_image(image, mode='RGB')
-    input_data = _hsv_encode(image).astype(np.float32)
     input_data = np.stack([input_data])
     output_data, = _monochrome_validate_model(ckpt).run(['output'], {'input': input_data})
     return {name: v.item() for name, v in zip(['normal', 'monochrome'], output_data[0])}

 from functools import lru_cache
+from typing import Optional, Tuple
 import numpy as np
+from PIL import Image
+from PIL.Image import Resampling
 from huggingface_hub import hf_hub_download
+from encode import rgb_encode
 from image import ImageTyping, load_image
 from onnxruntime_ import open_onnx_model
 __all__ = [
     'get_monochrome_score',
+    'is_monochrome',
 ]
+# _DEFAULT_MONOCHROME_CKPT = 'monochrome-resnet18-safe2-450.onnx'
 _MONOCHROME_CKPTS = [
+    'monochrome-caformer_safe2-80.onnx',
 ]
+_DEFAULT_MONOCHROME_CKPT = _MONOCHROME_CKPTS[0]
 @lru_cache()
     ))
+def _2d_encode(image: Image.Image, size: Tuple[int, int] = (384, 384),
+               normalize: Optional[Tuple[float, float]] = (0.5, 0.5)):
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    image = image.resize(size, Resampling.BILINEAR)
+    data = rgb_encode(image, order_='CHW')
+    if normalize is not None:
+        mean_, std_ = normalize
+        mean = np.asarray([mean_]).reshape((-1, 1, 1))
+        std = np.asarray([std_]).reshape((-1, 1, 1))
+        data = (data - mean) / std
+    return data
+def get_monochrome_score(image: ImageTyping, ckpt: str = _DEFAULT_MONOCHROME_CKPT) -> float:
     image = load_image(image, mode='RGB')
+    input_data = _2d_encode(image).astype(np.float32)
     input_data = np.stack([input_data])
     output_data, = _monochrome_validate_model(ckpt).run(['output'], {'input': input_data})
     return {name: v.item() for name, v in zip(['normal', 'monochrome'], output_data[0])}
+def is_monochrome(image: ImageTyping, threshold: float = 0.5, ckpt: str = _DEFAULT_MONOCHROME_CKPT) -> bool:
+    return get_monochrome_score(image, ckpt) >= threshold