Spaces:

HaisuGuan
/

JGWEV

Sleeping

App Files Files Community

HaisuGuan commited on Jan 4, 2024

Commit

cf75d92

1 Parent(s): 332a731

模型代码

Browse files

Files changed (5) hide show

utils/__init__.py +3 -0
utils/logging.py +22 -0
utils/metrics.py +207 -0
utils/optimize.py +13 -0
utils/sampling.py +89 -0

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from utils.logging import *
+from utils.sampling import *
+from utils.optimize import *

utils/logging.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import torch
+import os
+import torchvision.utils as tvu
+def save_image(img, file_directory):
+    if not os.path.exists(os.path.dirname(file_directory)):
+        os.makedirs(os.path.dirname(file_directory))
+    tvu.save_image(img, file_directory)
+def save_checkpoint(state, filename):
+    if not os.path.exists(os.path.dirname(filename)):
+        os.makedirs(os.path.dirname(filename))
+    torch.save(state, filename + '.pth.tar')
+def load_checkpoint(path, device):
+    if device is None:
+        return torch.load(path)
+    else:
+        return torch.load(path, map_location=device)

utils/metrics.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import cv2
+import numpy as np
+import PIL
+def calculate_psnr(img1, img2, test_y_channel=False):
+    """Calculate PSNR (Peak Signal-to-Noise Ratio).
+    Ref: https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio
+    Args:
+        img1 (ndarray): Images with range [0, 255].
+        img2 (ndarray): Images with range [0, 255].
+        test_y_channel (bool): Test on Y channel of YCbCr. Default: False.
+    Returns:
+        float: psnr result.
+    """
+    assert img1.shape == img2.shape, (f'Image shapes are differnet: {img1.shape}, {img2.shape}.')
+    assert img1.shape[2] == 3
+    img1 = img1.astype(np.float64)
+    img2 = img2.astype(np.float64)
+    if test_y_channel:
+        img1 = to_y_channel(img1)
+        img2 = to_y_channel(img2)
+    mse = np.mean((img1 - img2) ** 2)
+    if mse == 0:
+        return float('inf')
+    return 20. * np.log10(255. / np.sqrt(mse))
+def _ssim(img1, img2):
+    """Calculate SSIM (structural similarity) for one channel images.
+    It is called by func:`calculate_ssim`.
+    Args:
+        img1 (ndarray): Images with range [0, 255] with order 'HWC'.
+        img2 (ndarray): Images with range [0, 255] with order 'HWC'.
+    Returns:
+        float: ssim result.
+    """
+    C1 = (0.01 * 255) ** 2
+    C2 = (0.03 * 255) ** 2
+    img1 = img1.astype(np.float64)
+    img2 = img2.astype(np.float64)
+    kernel = cv2.getGaussianKernel(11, 1.5)
+    window = np.outer(kernel, kernel.transpose())
+    mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5]
+    mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
+    mu1_sq = mu1 ** 2
+    mu2_sq = mu2 ** 2
+    mu1_mu2 = mu1 * mu2
+    sigma1_sq = cv2.filter2D(img1 ** 2, -1, window)[5:-5, 5:-5] - mu1_sq
+    sigma2_sq = cv2.filter2D(img2 ** 2, -1, window)[5:-5, 5:-5] - mu2_sq
+    sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
+    ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
+    return ssim_map.mean()
+def calculate_ssim(img1, img2, test_y_channel=False):
+    """Calculate SSIM (structural similarity).
+    Ref:
+    Image quality assessment: From error visibility to structural similarity
+    The results are the same as that of the official released MATLAB code in
+    https://ece.uwaterloo.ca/~z70wang/research/ssim/.
+    For three-channel images, SSIM is calculated for each channel and then
+    averaged.
+    Args:
+        img1 (ndarray): Images with range [0, 255].
+        img2 (ndarray): Images with range [0, 255].
+        test_y_channel (bool): Test on Y channel of YCbCr. Default: False.
+    Returns:
+        float: ssim result.
+    """
+    assert img1.shape == img2.shape, (f'Image shapes are differnet: {img1.shape}, {img2.shape}.')
+    assert img1.shape[2] == 3
+    img1 = img1.astype(np.float64)
+    img2 = img2.astype(np.float64)
+    if test_y_channel:
+        img1 = to_y_channel(img1)
+        img2 = to_y_channel(img2)
+    ssims = []
+    for i in range(img1.shape[2]):
+        ssims.append(_ssim(img1[..., i], img2[..., i]))
+    return np.array(ssims).mean()
+def to_y_channel(img):
+    """Change to Y channel of YCbCr.
+    Args:
+        img (ndarray): Images with range [0, 255].
+    Returns:
+        (ndarray): Images with range [0, 255] (float type) without round.
+    """
+    img = img.astype(np.float32) / 255.
+    if img.ndim == 3 and img.shape[2] == 3:
+        img = bgr2ycbcr(img, y_only=True)
+        img = img[..., None]
+    return img * 255.
+def _convert_input_type_range(img):
+    """Convert the type and range of the input image.
+    It converts the input image to np.float32 type and range of [0, 1].
+    It is mainly used for pre-processing the input image in colorspace
+    convertion functions such as rgb2ycbcr and ycbcr2rgb.
+    Args:
+        img (ndarray): The input image. It accepts:
+            1. np.uint8 type with range [0, 255];
+            2. np.float32 type with range [0, 1].
+    Returns:
+        (ndarray): The converted image with type of np.float32 and range of
+            [0, 1].
+    """
+    img_type = img.dtype
+    img = img.astype(np.float32)
+    if img_type == np.float32:
+        pass
+    elif img_type == np.uint8:
+        img /= 255.
+    else:
+        raise TypeError('The img type should be np.float32 or np.uint8, ' f'but got {img_type}')
+    return img
+def _convert_output_type_range(img, dst_type):
+    """Convert the type and range of the image according to dst_type.
+    It converts the image to desired type and range. If `dst_type` is np.uint8,
+    images will be converted to np.uint8 type with range [0, 255]. If
+    `dst_type` is np.float32, it converts the image to np.float32 type with
+    range [0, 1].
+    It is mainly used for post-processing images in colorspace convertion
+    functions such as rgb2ycbcr and ycbcr2rgb.
+    Args:
+        img (ndarray): The image to be converted with np.float32 type and
+            range [0, 255].
+        dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it
+            converts the image to np.uint8 type with range [0, 255]. If
+            dst_type is np.float32, it converts the image to np.float32 type
+            with range [0, 1].
+    Returns:
+        (ndarray): The converted image with desired type and range.
+    """
+    if dst_type not in (np.uint8, np.float32):
+        raise TypeError('The dst_type should be np.float32 or np.uint8, ' f'but got {dst_type}')
+    if dst_type == np.uint8:
+        img = img.round()
+    else:
+        img /= 255.
+    return img.astype(dst_type)
+def bgr2ycbcr(img, y_only=False):
+    """Convert a BGR image to YCbCr image.
+    The bgr version of rgb2ycbcr.
+    It implements the ITU-R BT.601 conversion for standard-definition
+    television. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+    It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`.
+    In OpenCV, it implements a JPEG conversion. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+    Args:
+        img (ndarray): The input image. It accepts:
+            1. np.uint8 type with range [0, 255];
+            2. np.float32 type with range [0, 1].
+        y_only (bool): Whether to only return Y channel. Default: False.
+    Returns:
+        ndarray: The converted YCbCr image. The output image has the same type
+            and range as input image.
+    """
+    img_type = img.dtype
+    img = _convert_input_type_range(img)
+    if y_only:
+        out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0
+    else:
+        out_img = np.matmul(
+            img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], [65.481, -37.797, 112.0]]) + [16, 128, 128]
+    out_img = _convert_output_type_range(out_img, img_type)
+    return out_img

utils/optimize.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import torch.optim as optim
+def get_optimizer(config, parameters):
+    if config.optim.optimizer == 'Adam':
+        return optim.Adam(parameters, lr=config.optim.lr, weight_decay=config.optim.weight_decay,
+                          betas=(0.9, 0.999), amsgrad=config.optim.amsgrad, eps=config.optim.eps)
+    elif config.optim.optimizer == 'RMSProp':
+        return optim.RMSprop(parameters, lr=config.optim.lr, weight_decay=config.optim.weight_decay)
+    elif config.optim.optimizer == 'SGD':
+        return optim.SGD(parameters, lr=config.optim.lr, momentum=0.9)
+    else:
+        raise NotImplementedError('Optimizer {} not understood.'.format(config.optim.optimizer))

utils/sampling.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import torch
+from torchvision.transforms.functional import crop
+def compute_alpha(beta, t):
+    beta = torch.cat([torch.zeros(1).to(beta.device), beta], dim=0)
+    a = (1 - beta).cumprod(dim=0).index_select(0, t + 1).view(-1, 1, 1, 1)
+    return a
+def data_transform(X):
+    return 2 * X - 1.0
+def inverse_data_transform(X):
+    return torch.clamp((X + 1.0) / 2.0, 0.0, 1.0)
+def generalized_steps(x, x_cond, seq, model, b, eta=0., device=None):
+    with torch.no_grad():
+        n = x.size(0)
+        seq_next = [-1] + list(seq[:-1])
+        x0_preds = []
+        xs = [x]
+        for i, j in zip(reversed(seq), reversed(seq_next)):
+            t = (torch.ones(n) * i).to(x.device)
+            next_t = (torch.ones(n) * j).to(x.device)
+            at = compute_alpha(b, t.long())
+            at_next = compute_alpha(b, next_t.long())
+            xt = xs[-1].to(device)
+            et = model(torch.cat([x_cond, xt], dim=1), t)
+            x0_t = (xt - et * (1 - at).sqrt()) / at.sqrt()
+            x0_preds.append(x0_t.to(device))
+            c1 = eta * ((1 - at / at_next) * (1 - at_next) / (1 - at)).sqrt()
+            c2 = ((1 - at_next) - c1 ** 2).sqrt()
+            xt_next = at_next.sqrt() * x0_t + c1 * torch.randn_like(x) + c2 * et
+            xs.append(xt_next.to(device))
+    return xs, x0_preds
+def generalized_steps_overlapping(x, x_cond, seq, model, b, eta=0., corners=None, p_size=None, manual_batching=True,
+                                  device=None):
+    with torch.no_grad():
+        n = x.size(0)
+        seq_next = [-1] + list(seq[:-1])
+        x0_preds = []
+        xs = [x]
+        x_grid_mask = torch.zeros_like(x_cond, device=x.device)
+        for (hi, wi) in corners:
+            x_grid_mask[:, :, hi:hi + p_size, wi:wi + p_size] += 1
+        for i, j in zip(reversed(seq), reversed(seq_next)):
+            t = (torch.ones(n) * i).to(x.device)
+            next_t = (torch.ones(n) * j).to(x.device)
+            at = compute_alpha(b, t.long())
+            at_next = compute_alpha(b, next_t.long())
+            xt = xs[-1].to(device)
+            et_output = torch.zeros_like(x_cond, device=x.device)
+            if manual_batching:
+                manual_batching_size = 64
+                xt_patch = torch.cat([crop(xt, hi, wi, p_size, p_size) for (hi, wi) in corners], dim=0)
+                x_cond_patch = torch.cat([data_transform(crop(x_cond, hi, wi, p_size, p_size)) for (hi, wi) in corners],
+                                         dim=0)
+                for i in range(0, len(corners), manual_batching_size):
+                    outputs = model(torch.cat([x_cond_patch[i:i + manual_batching_size],
+                                               xt_patch[i:i + manual_batching_size]], dim=1), t)
+                    for idx, (hi, wi) in enumerate(corners[i:i + manual_batching_size]):
+                        et_output[0, :, hi:hi + p_size, wi:wi + p_size] += outputs[idx]
+            else:
+                for (hi, wi) in corners:
+                    xt_patch = crop(xt, hi, wi, p_size, p_size)
+                    x_cond_patch = crop(x_cond, hi, wi, p_size, p_size)
+                    x_cond_patch = data_transform(x_cond_patch)
+                    et_output[:, :, hi:hi + p_size, wi:wi + p_size] += model(torch.cat([x_cond_patch, xt_patch], dim=1),
+                                                                             t)
+            et = torch.div(et_output, x_grid_mask)
+            x0_t = (xt - et * (1 - at).sqrt()) / at.sqrt()
+            x0_preds.append(x0_t.to(device))
+            c1 = eta * ((1 - at / at_next) * (1 - at_next) / (1 - at)).sqrt()
+            c2 = ((1 - at_next) - c1 ** 2).sqrt()
+            xt_next = at_next.sqrt() * x0_t + c1 * torch.randn_like(x) + c2 * et
+            xs.append(xt_next.to(device))
+    return xs, x0_preds