Spaces:

imxieyi
/

waifu2x-ios-model-converter

Running

App Files Files Community

Yi Xie commited on Jun 3, 2024

Commit

aa5fd42

1 Parent(s): b770ee6

Add 2xLexicaRRDBNet and 2xLexicaRRDBNet_Sharp

Browse files

Files changed (3) hide show

app.py +2 -0
converter.py +48 -7
known_models.yaml +24 -0

app.py CHANGED Viewed

@@ -84,6 +84,8 @@ def convert(input_model):
             command += ['--num-blocks', str(model['blocks'])]
         if 'convs' in model:
             command += ['--num-convs', str(model['convs'])]
         command += [file]
         logger.debug('Command: %s', command)
         process = subprocess.Popen(command, stdout=subprocess.PIPE)

             command += ['--num-blocks', str(model['blocks'])]
         if 'convs' in model:
             command += ['--num-convs', str(model['convs'])]
+        if 'shuffle-factor' in model:
+            command += ['--shuffle-factor', str(model['shuffle-factor'])]
         command += [file]
         logger.debug('Command: %s', command)
         process = subprocess.Popen(command, stdout=subprocess.PIPE)

converter.py CHANGED Viewed

@@ -38,6 +38,7 @@ optional_args.add_argument('--has-cuda', action='store_true', help='Input model
 optional_args.add_argument('--num-features', type=int, help='Override number of features for (Real-)ESRGAN model')
 optional_args.add_argument('--num-blocks', type=int, help='Override number of blocks for (Real-)ESRGAN model')
 optional_args.add_argument('--num-convs', type=int, help='Override number of conv layers for Real-ESRGAN Compact model')
 optional_args.add_argument('--input-size', type=int, default=256, help='Input size (both width and height), default to 256')
 optional_args.add_argument('--shrink-size', type=int, default=20, help='Shrink size (applied to all 4 sides on input), default to 20')
 optional_args.add_argument('--description', type=str, required=False, help='Description of the model, supports Markdown')
@@ -88,13 +89,20 @@ else:
     logger.info('Using torch device cpu, please be patient')
 logger.info('Creating model architecture')
-channels = 3
 if args.monochrome:
-    channels = 1
 num_features = 64
 num_blocks = 23
 num_convs = 16
 if args.type == 'esrgan_old_lite':
     num_features = 32
@@ -110,17 +118,17 @@ if args.num_convs is not None:
 if args.type == 'esrgan_old' or args.type == 'esrgan_old_lite':
     from esrgan_old import architecture
     torch_model = architecture.RRDB_Net(
-        channels, channels, num_features, num_blocks, gc=32, upscale=args.scale, norm_type=None,
         act_type='leakyrelu', mode='CNA', res_scale=1, upsample_mode='upconv')
 elif args.type == 'real_esrgan':
     from basicsr.archs.rrdbnet_arch import RRDBNet
-    torch_model = RRDBNet(num_in_ch=channels, num_out_ch=channels, num_feat=num_features, num_block=num_blocks, num_grow_ch=32, scale=args.scale)
 elif args.type == 'real_esrgan_compact':
     from basicsr.archs.srvgg_arch import SRVGGNetCompact
-    torch_model = SRVGGNetCompact(num_in_ch=channels, num_out_ch=channels, num_feat=num_features, num_conv=num_convs, upscale=args.scale, act_type='prelu')
 elif args.type == 'esrgan_plus':
     from esrgan_plus.codes.models.modules.architecture import RRDBNet
-    torch_model = RRDBNet(in_nc=channels, out_nc=channels, nf=num_features, nb=num_blocks, gc=32, upscale=args.scale)
 else:
     logger.fatal('Unknown model type: %s', args.type)
     sys.exit(-1)
@@ -151,6 +159,35 @@ if args.monochrome:
             return x
     torch_model = MonochromeWrapper(torch_model)
 logger.info('Tracing model, will take a long time and a lot of RAM')
 torch_model.eval()
 torch_model = torch_model.to(device)
@@ -170,10 +207,14 @@ logger.info('Converting to Core ML')
 input_shape = [1, 3, args.input_size, args.input_size]
 output_size = args.input_size * args.scale
 output_shape = [1, 3, output_size, output_size]
 model = ct.convert(
     traced_model,
     convert_to="mlprogram",
-    inputs=[ct.TensorType(shape=input_shape)]
 )
 model_name = args.filename.split('/')[-1].split('.')[0]
 mlmodel_file = args.out_dir + '/' + model_name + '.mlpackage'

 optional_args.add_argument('--num-features', type=int, help='Override number of features for (Real-)ESRGAN model')
 optional_args.add_argument('--num-blocks', type=int, help='Override number of blocks for (Real-)ESRGAN model')
 optional_args.add_argument('--num-convs', type=int, help='Override number of conv layers for Real-ESRGAN Compact model')
+optional_args.add_argument('--shuffle-factor', type=int, help='Shuffle input channels in ESRGAN model')
 optional_args.add_argument('--input-size', type=int, default=256, help='Input size (both width and height), default to 256')
 optional_args.add_argument('--shrink-size', type=int, default=20, help='Shrink size (applied to all 4 sides on input), default to 20')
 optional_args.add_argument('--description', type=str, required=False, help='Description of the model, supports Markdown')
     logger.info('Using torch device cpu, please be patient')
 logger.info('Creating model architecture')
+in_channels = 3
+out_channels = 3
+model_scale = args.scale
 if args.monochrome:
+    in_channels = 1
+    out_channels = 1
+if args.shuffle_factor:
+    in_channels *= args.shuffle_factor * args.shuffle_factor
+    model_scale *= args.shuffle_factor
 num_features = 64
 num_blocks = 23
 num_convs = 16
+shuffle_factor = None
 if args.type == 'esrgan_old_lite':
     num_features = 32
 if args.type == 'esrgan_old' or args.type == 'esrgan_old_lite':
     from esrgan_old import architecture
     torch_model = architecture.RRDB_Net(
+        in_channels, out_channels, num_features, num_blocks, gc=32, upscale=model_scale, norm_type=None,
         act_type='leakyrelu', mode='CNA', res_scale=1, upsample_mode='upconv')
 elif args.type == 'real_esrgan':
     from basicsr.archs.rrdbnet_arch import RRDBNet
+    torch_model = RRDBNet(num_in_ch=in_channels, num_out_ch=out_channels, num_feat=num_features, num_block=num_blocks, num_grow_ch=32, scale=args.scale)
 elif args.type == 'real_esrgan_compact':
     from basicsr.archs.srvgg_arch import SRVGGNetCompact
+    torch_model = SRVGGNetCompact(num_in_ch=in_channels, num_out_ch=out_channels, num_feat=num_features, num_conv=num_convs, upscale=args.scale, act_type='prelu')
 elif args.type == 'esrgan_plus':
     from esrgan_plus.codes.models.modules.architecture import RRDBNet
+    torch_model = RRDBNet(in_nc=in_channels, out_nc=out_channels, nf=num_features, nb=num_blocks, gc=32, upscale=args.scale)
 else:
     logger.fatal('Unknown model type: %s', args.type)
     sys.exit(-1)
             return x
     torch_model = MonochromeWrapper(torch_model)
+if args.shuffle_factor:
+    from torch import nn
+    # Source: https://github.com/chaiNNer-org/spandrel/blob/cb2f03459819ce114c52e328b7ac9bb2812f205a/libs/spandrel/spandrel/architectures/__arch_helpers/padding.py
+    def pad_to_multiple(
+        tensor: torch.Tensor,
+        multiple: int,
+        *,
+        mode: str,
+        value: float = 0.0,
+    ) -> torch.Tensor:
+        _, _, h, w = tensor.size()
+        pad_h = (multiple - h % multiple) % multiple
+        pad_w = (multiple - w % multiple) % multiple
+        if pad_h or pad_w:
+            return nn.pad(tensor, (0, pad_w, 0, pad_h), mode, value)
+        return tensor
+    class ShuffleWrapper(nn.Module):
+        def __init__(self, model: nn.Module):
+            super(ShuffleWrapper, self).__init__()
+            self.model = model
+        def forward(self, x: torch.Tensor):
+            _, _, h, w = x.size()
+            x = pad_to_multiple(x, args.shuffle_factor, mode="reflect")
+            x = torch.pixel_unshuffle(x, downscale_factor=args.shuffle_factor)
+            x = self.model(x)
+            return x[:, :, : h * model_scale, : w * model_scale]
+    torch_model = ShuffleWrapper(torch_model)
 logger.info('Tracing model, will take a long time and a lot of RAM')
 torch_model.eval()
 torch_model = torch_model.to(device)
 input_shape = [1, 3, args.input_size, args.input_size]
 output_size = args.input_size * args.scale
 output_shape = [1, 3, output_size, output_size]
+minimum_deployment_target = None
+if args.shuffle_factor:
+    minimum_deployment_target = ct.target.iOS16
 model = ct.convert(
     traced_model,
     convert_to="mlprogram",
+    inputs=[ct.TensorType(shape=input_shape)],
+    minimum_deployment_target=minimum_deployment_target
 )
 model_name = args.filename.split('/')[-1].split('.')[0]
 mlmodel_file = args.out_dir + '/' + model_name + '.mlpackage'

known_models.yaml CHANGED Viewed

@@ -340,4 +340,28 @@ models:
   description: "Category: Oversharpening Purpose: Denoise Pretrained: 1st attempt on random sharpening with the same dataset at 200000 iterations, which was trained on non-random desharp model, total ~600000 iterations on 3 models.\n\n\n\nMade for rare particular cases when the image was destroyed by applying noise, i.e. game textures or any badly exported photos. If your image does not have any oversharpening, it won't hurt them, leaving as is. In theory, this model knows when to activate and when to skip, also can successfully remove artifacts if only some parts of the image are oversharpened, for example in image consisting of several combined images, 1 of them with sharpen noise."
   author: "Loinne"
   source: "[OpenModelDB](https://openmodeldb.info/models/1x-DeSharpen)"
   license: "[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)"

   description: "Category: Oversharpening Purpose: Denoise Pretrained: 1st attempt on random sharpening with the same dataset at 200000 iterations, which was trained on non-random desharp model, total ~600000 iterations on 3 models.\n\n\n\nMade for rare particular cases when the image was destroyed by applying noise, i.e. game textures or any badly exported photos. If your image does not have any oversharpening, it won't hurt them, leaving as is. In theory, this model knows when to activate and when to skip, also can successfully remove artifacts if only some parts of the image are oversharpened, for example in image consisting of several combined images, 1 of them with sharpen noise."
   author: "Loinne"
   source: "[OpenModelDB](https://openmodeldb.info/models/1x-DeSharpen)"
+  license: "[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)"
+- name: "2xLexicaRRDBNet"
+  type: esrgan_old
+  file: "./torch_models/2xLexicaRRDBNet.pth"
+  sourceLink: https://github.com/Phhofm/models/raw/main/2xLexicaRRDBNet/2xLexicaRRDBNet.pth
+  sha256: 47c4ecdb06717b13e16da3000485cce72e378e18b7b7ee2e1020562283f7ac31
+  scale: 2
+  shuffle-factor: 2
+  cuda: true
+  description: "Upscaling AI generated images"
+  author: "[Philip Hofmann](https://github.com/Phhofm)"
+  source: "[GitHub Phhofm/models](https://github.com/Phhofm/models)"
+  license: "[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)"
+- name: "2xLexicaRRDBNet Sharp"
+  type: esrgan_old
+  file: "./torch_models/2xLexicaRRDBNet_Sharp.pth"
+  sourceLink: https://github.com/Phhofm/models/raw/main/2xLexicaRRDBNet/2xLexicaRRDBNet_Sharp.pth
+  sha256: 11e7bec714d8d15e686e336fe7095bd952e21ee8308551a00ea4df2caa374ac7
+  scale: 2
+  shuffle-factor: 2
+  cuda: true
+  description: "Upscaling AI generated images - a bit sharper then 2xLexicaRRDBNet"
+  author: "[Philip Hofmann](https://github.com/Phhofm)"
+  source: "[GitHub Phhofm/models](https://github.com/Phhofm/models)"
   license: "[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)"