Spaces:

rahul7star
/

ai-toolkit

Running

App Files Files Community

ai-toolkit / toolkit /llvae.py

rahul7star

boilerplate

fcc02a2 verified 5 months ago

raw

history blame

5.9 kB

	import torch
	import torch.nn as nn
	import numpy as np
	import itertools


	class LosslessLatentDecoder(nn.Module):
	def __init__(self, in_channels, latent_depth, dtype=torch.float32, trainable=False):
	super(LosslessLatentDecoder, self).__init__()
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.latent_depth = latent_depth
	self.in_channels = in_channels
	self.out_channels = int(in_channels // (latent_depth * latent_depth))
	numpy_kernel = self.build_kernel(in_channels, latent_depth)
	numpy_kernel = torch.from_numpy(numpy_kernel).to(device=device, dtype=dtype)
	if trainable:
	self.kernel = nn.Parameter(numpy_kernel)
	else:
	self.kernel = numpy_kernel

	def build_kernel(self, in_channels, latent_depth):
	# my old code from tensorflow.
	# tensorflow kernel is (height, width, out_channels, in_channels)
	# pytorch kernel is (in_channels, out_channels, height, width)
	out_channels = self.out_channels

	# kernel_shape = [kernel_filter_size, kernel_filter_size, out_channels, in_channels] # tensorflow
	kernel_shape = [in_channels, out_channels, latent_depth, latent_depth] # pytorch
	kernel = np.zeros(kernel_shape, np.float32)

	# Build the kernel so that a 4 pixel cluster has each pixel come from a separate channel.
	for c in range(0, out_channels):
	i = 0
	for x, y in itertools.product(range(latent_depth), repeat=2):
	# kernel[y, x, c, c * latent_depth * latent_depth + i] = 1 # tensorflow
	kernel[c * latent_depth * latent_depth + i, c, y, x] = 1.0 # pytorch
	i += 1

	return kernel

	def forward(self, x):
	dtype = x.dtype
	if self.kernel.dtype != dtype:
	self.kernel = self.kernel.to(dtype=dtype)

	# Deconvolve input tensor with the kernel
	return nn.functional.conv_transpose2d(x, self.kernel, stride=self.latent_depth, padding=0, groups=1)


	class LosslessLatentEncoder(nn.Module):
	def __init__(self, in_channels, latent_depth, dtype=torch.float32, trainable=False):
	super(LosslessLatentEncoder, self).__init__()
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.latent_depth = latent_depth
	self.in_channels = in_channels
	self.out_channels = int(in_channels * (latent_depth * latent_depth))
	numpy_kernel = self.build_kernel(in_channels, latent_depth)
	numpy_kernel = torch.from_numpy(numpy_kernel).to(device=device, dtype=dtype)
	if trainable:
	self.kernel = nn.Parameter(numpy_kernel)
	else:
	self.kernel = numpy_kernel


	def build_kernel(self, in_channels, latent_depth):
	# my old code from tensorflow.
	# tensorflow kernel is (height, width, in_channels, out_channels)
	# pytorch kernel is (out_channels, in_channels, height, width)
	out_channels = self.out_channels

	# kernel_shape = [latent_depth, latent_depth, in_channels, out_channels] # tensorflow
	kernel_shape = [out_channels, in_channels, latent_depth, latent_depth] # pytorch
	kernel = np.zeros(kernel_shape, np.float32)

	# Build the kernel so that a 4 pixel cluster has each pixel come from a separate channel.
	for c in range(0, in_channels):
	i = 0
	for x, y in itertools.product(range(latent_depth), repeat=2):
	# kernel[y, x, c, c * latent_depth * latent_depth + i] = 1 # tensorflow
	kernel[c * latent_depth * latent_depth + i, c, y, x] = 1.0 # pytorch
	i += 1
	return kernel

	def forward(self, x):
	dtype = x.dtype
	if self.kernel.dtype != dtype:
	self.kernel = self.kernel.to(dtype=dtype)
	# Convolve input tensor with the kernel
	return nn.functional.conv2d(x, self.kernel, stride=self.latent_depth, padding=0, groups=1)


	class LosslessLatentVAE(nn.Module):
	def __init__(self, in_channels, latent_depth, dtype=torch.float32, trainable=False):
	super(LosslessLatentVAE, self).__init__()
	self.latent_depth = latent_depth
	self.in_channels = in_channels
	self.encoder = LosslessLatentEncoder(in_channels, latent_depth, dtype=dtype, trainable=trainable)
	encoder_out_channels = self.encoder.out_channels
	self.decoder = LosslessLatentDecoder(encoder_out_channels, latent_depth, dtype=dtype, trainable=trainable)

	def forward(self, x):
	latent = self.latent_encoder(x)
	out = self.latent_decoder(latent)
	return out

	def encode(self, x):
	return self.encoder(x)

	def decode(self, x):
	return self.decoder(x)


	# test it
	if __name__ == '__main__':
	import os
	from PIL import Image
	import torchvision.transforms as transforms
	user_path = os.path.expanduser('~')
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	dtype = torch.float32

	input_path = os.path.join(user_path, "Pictures/sample_2_512.png")
	output_path = os.path.join(user_path, "Pictures/sample_2_512_llvae.png")
	img = Image.open(input_path)
	img_tensor = transforms.ToTensor()(img)
	img_tensor = img_tensor.unsqueeze(0).to(device=device, dtype=dtype)
	print("input_shape: ", list(img_tensor.shape))
	vae = LosslessLatentVAE(in_channels=3, latent_depth=8, dtype=dtype).to(device=device, dtype=dtype)
	latent = vae.encode(img_tensor)
	print("latent_shape: ", list(latent.shape))
	out_tensor = vae.decode(latent)
	print("out_shape: ", list(out_tensor.shape))

	mse_loss = nn.MSELoss()
	mse = mse_loss(img_tensor, out_tensor)
	print("roundtrip_loss: ", mse.item())
	out_img = transforms.ToPILImage()(out_tensor.squeeze(0))
	out_img.save(output_path)