Spaces:

KumaPower
/

AvatarArtist

Running on Zero

App Files Files Community

刘虹雨 commited on Mar 31

Commit

ab06a25

1 Parent(s): 7ead217

update code

Browse files

Files changed (4) hide show

.gitignore +0 -1
DiT_VAE/diffusion/data/datasets/TriplaneData.py +141 -0
DiT_VAE/diffusion/data/datasets/__init__.py +2 -0
DiT_VAE/diffusion/data/datasets/utils.py +84 -0

.gitignore CHANGED Viewed

@@ -5,7 +5,6 @@ output*
 logs*
 taming*
 samples*
-datasets*
 asset*
 temp_samples*
 wandb*

 logs*
 taming*
 samples*
 asset*
 temp_samples*
 wandb*

DiT_VAE/diffusion/data/datasets/TriplaneData.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import os
+import random
+from PIL import Image
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from transformers import AutoImageProcessor
+from DiT_VAE.diffusion.data.builder import DATASETS
+from omegaconf import OmegaConf
+from torchvision import transforms
+from transformers import CLIPImageProcessor
+import io
+import zipfile
+import numpy
+import json
+def to_rgb_image(maybe_rgba: Image.Image):
+    if maybe_rgba.mode == 'RGB':
+        return maybe_rgba
+    elif maybe_rgba.mode == 'RGBA':
+        rgba = maybe_rgba
+        img = numpy.random.randint(127, 128, size=[rgba.size[1], rgba.size[0], 3], dtype=numpy.uint8)
+        img = Image.fromarray(img, 'RGB')
+        img.paste(rgba, mask=rgba.getchannel('A'))
+        return img
+    else:
+        raise ValueError("Unsupported image type.", maybe_rgba.mode)
+@DATASETS.register_module()
+class TriplaneData(Dataset):
+    def __init__(self,
+                 data_base_dir,
+                 model_names,
+                 data_json_file,
+                 dino_path,
+                 i_drop_rate=0.1,
+                 image_size=256,
+                 **kwargs):
+        self.dict_data_image = json.load(open(data_json_file))  # {'image_name': pose}
+        self.data_base_dir = data_base_dir
+        self.dino_img_processor = AutoImageProcessor.from_pretrained(dino_path)
+        self.size = image_size
+        self.data_list = list(self.dict_data_image.keys())
+        self.zip_file_dict = {}
+        config_gan_model = OmegaConf.load(model_names)
+        all_models = config_gan_model['gan_models'].keys()
+        for model_name in all_models:
+            zipfile_path = os.path.join(self.data_base_dir, model_name + '.zip')
+            zipfile_load = zipfile.ZipFile(zipfile_path)
+            self.zip_file_dict[model_name] = zipfile_load
+        self.transform = transforms.Compose([
+            transforms.Resize(self.size, interpolation=transforms.InterpolationMode.BILINEAR),
+            transforms.CenterCrop(self.size),
+            transforms.ToTensor(),
+            transforms.Normalize([0.5], [0.5]),
+        ])
+        self.clip_image_processor = CLIPImageProcessor()
+        self.i_drop_rate = i_drop_rate
+    def getdata(self, idx):
+        data_name = self.data_list[idx]
+        data_model_name = self.dict_data_image[data_name]['model_name']
+        zipfile_loaded = self.zip_file_dict[data_model_name]
+        # zipfile_path = os.path.join(self.data_base_dir, data_model_name)
+        # zipfile_loaded = zipfile.ZipFile(zipfile_path)
+        with zipfile_loaded.open(self.dict_data_image[data_name]['z_dir'], 'r') as f:
+            buffer = io.BytesIO(f.read())
+            data_z = torch.load(buffer)
+        with zipfile_loaded.open(self.dict_data_image[data_name]['vert_dir'], 'r') as f:
+            buffer = io.BytesIO(f.read())
+            data_vert = torch.load(buffer)
+        with zipfile_loaded.open(self.dict_data_image[data_name]['img_dir'], 'r') as f:
+            raw_image = to_rgb_image(Image.open(f))
+            dino_img = self.dino_img_processor(images=raw_image, return_tensors="pt").pixel_values
+            image = self.transform(raw_image.convert("RGB"))
+            clip_image = self.clip_image_processor(images=raw_image, return_tensors="pt").pixel_values
+        drop_image_embed = 0
+        rand_num = random.random()
+        if rand_num < self.i_drop_rate:
+            drop_image_embed = 1
+        return {
+            "raw_image": raw_image,
+            "dino_img": dino_img,
+            "image": image,
+            "clip_image": clip_image.clone(),
+            "data_z": data_z,
+            "data_vert": data_vert,
+            "data_model_name": data_model_name,
+            "drop_image_embed": drop_image_embed,
+        }
+        #
+        # img_path = self.img_samples[index]
+        # npz_path = self.txt_feat_samples[index]
+        # npy_path = self.vae_feat_samples[index]
+        # prompt = self.prompt_samples[index]
+        # data_info = {
+        #     'img_hw': torch.tensor([torch.tensor(self.resolution), torch.tensor(self.resolution)], dtype=torch.float32),
+        #     'aspect_ratio': torch.tensor(1.)
+        # }
+        #
+        # img = self.loader(npy_path) if self.load_vae_feat else self.loader(img_path)
+        # txt_info = np.load(npz_path)
+        # txt_fea = torch.from_numpy(txt_info['caption_feature'])     # 1xTx4096
+        # attention_mask = torch.ones(1, 1, txt_fea.shape[1])     # 1x1xT
+        # if 'attention_mask' in txt_info.keys():
+        #     attention_mask = torch.from_numpy(txt_info['attention_mask'])[None]
+        # if txt_fea.shape[1] != self.max_lenth:
+        #     txt_fea = torch.cat([txt_fea, txt_fea[:, -1:].repeat(1, self.max_lenth-txt_fea.shape[1], 1)], dim=1)
+        #     attention_mask = torch.cat([attention_mask, torch.zeros(1, 1, self.max_lenth-attention_mask.shape[-1])], dim=-1)
+        #
+        # if self.transform:
+        #     img = self.transform(img)
+        #
+        # data_info['prompt'] = prompt
+        # return img, txt_fea, attention_mask, data_info
+    def __getitem__(self, idx):
+        for _ in range(20):
+            try:
+                return self.getdata(idx)
+            except Exception as e:
+                print(f"Error details: {str(e)}")
+                idx = np.random.randint(len(self))
+        raise RuntimeError('Too many bad data.')
+    def __len__(self):
+        return len(self.data_list)
+    def __getattr__(self, name):
+        if name == "set_epoch":
+            return lambda epoch: None
+        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")

DiT_VAE/diffusion/data/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .TriplaneData import TriplaneData
2	+ from .utils import *

DiT_VAE/diffusion/data/datasets/utils.py ADDED Viewed

	@@ -0,0 +1,84 @@

+ASPECT_RATIO_1024 = {
+    '0.25': [512., 2048.], '0.26': [512., 1984.], '0.27': [512., 1920.], '0.28': [512., 1856.],
+    '0.32': [576., 1792.], '0.33': [576., 1728.], '0.35': [576., 1664.], '0.4':  [640., 1600.],
+    '0.42':  [640., 1536.], '0.48': [704., 1472.], '0.5': [704., 1408.], '0.52': [704., 1344.],
+    '0.57': [768., 1344.], '0.6': [768., 1280.], '0.68': [832., 1216.], '0.72': [832., 1152.],
+    '0.78': [896., 1152.], '0.82': [896., 1088.], '0.88': [960., 1088.], '0.94': [960., 1024.],
+    '1.0':  [1024., 1024.], '1.07': [1024.,  960.], '1.13': [1088.,  960.], '1.21': [1088.,  896.],
+    '1.29': [1152.,  896.], '1.38': [1152.,  832.], '1.46': [1216.,  832.], '1.67': [1280.,  768.],
+    '1.75': [1344.,  768.], '2.0':  [1408.,  704.], '2.09':  [1472.,  704.], '2.4':  [1536.,  640.],
+    '2.5':  [1600.,  640.], '2.89':  [1664.,  576.], '3.0':  [1728.,  576.], '3.11':  [1792.,  576.],
+    '3.62':  [1856.,  512.], '3.75':  [1920.,  512.], '3.88':  [1984.,  512.], '4.0':  [2048.,  512.],
+}
+ASPECT_RATIO_512 = {
+     '0.25': [256.0, 1024.0], '0.26': [256.0, 992.0], '0.27': [256.0, 960.0], '0.28': [256.0, 928.0],
+     '0.32': [288.0, 896.0], '0.33': [288.0, 864.0], '0.35': [288.0, 832.0], '0.4': [320.0, 800.0],
+     '0.42': [320.0, 768.0], '0.48': [352.0, 736.0], '0.5': [352.0, 704.0], '0.52': [352.0, 672.0],
+     '0.57': [384.0, 672.0], '0.6': [384.0, 640.0], '0.68': [416.0, 608.0], '0.72': [416.0, 576.0],
+     '0.78': [448.0, 576.0], '0.82': [448.0, 544.0], '0.88': [480.0, 544.0], '0.94': [480.0, 512.0],
+     '1.0': [512.0, 512.0], '1.07': [512.0, 480.0], '1.13': [544.0, 480.0], '1.21': [544.0, 448.0],
+     '1.29': [576.0, 448.0], '1.38': [576.0, 416.0], '1.46': [608.0, 416.0], '1.67': [640.0, 384.0],
+     '1.75': [672.0, 384.0], '2.0': [704.0, 352.0], '2.09': [736.0, 352.0], '2.4': [768.0, 320.0],
+     '2.5': [800.0, 320.0], '2.89': [832.0, 288.0], '3.0': [864.0, 288.0], '3.11': [896.0, 288.0],
+     '3.62': [928.0, 256.0], '3.75': [960.0, 256.0], '3.88': [992.0, 256.0], '4.0': [1024.0, 256.0]
+     }
+ASPECT_RATIO_256 = {
+     '0.25': [128.0, 512.0], '0.26': [128.0, 496.0], '0.27': [128.0, 480.0], '0.28': [128.0, 464.0],
+     '0.32': [144.0, 448.0], '0.33': [144.0, 432.0], '0.35': [144.0, 416.0], '0.4': [160.0, 400.0],
+     '0.42': [160.0, 384.0], '0.48': [176.0, 368.0], '0.5': [176.0, 352.0], '0.52': [176.0, 336.0],
+     '0.57': [192.0, 336.0], '0.6': [192.0, 320.0], '0.68': [208.0, 304.0], '0.72': [208.0, 288.0],
+     '0.78': [224.0, 288.0], '0.82': [224.0, 272.0], '0.88': [240.0, 272.0], '0.94': [240.0, 256.0],
+     '1.0': [256.0, 256.0], '1.07': [256.0, 240.0], '1.13': [272.0, 240.0], '1.21': [272.0, 224.0],
+     '1.29': [288.0, 224.0], '1.38': [288.0, 208.0], '1.46': [304.0, 208.0], '1.67': [320.0, 192.0],
+     '1.75': [336.0, 192.0], '2.0': [352.0, 176.0], '2.09': [368.0, 176.0], '2.4': [384.0, 160.0],
+     '2.5': [400.0, 160.0], '2.89': [416.0, 144.0], '3.0': [432.0, 144.0], '3.11': [448.0, 144.0],
+     '3.62': [464.0, 128.0], '3.75': [480.0, 128.0], '3.88': [496.0, 128.0], '4.0': [512.0, 128.0]
+}
+ASPECT_RATIO_256_TEST = {
+     '0.25': [128.0, 512.0], '0.28': [128.0, 464.0],
+     '0.32': [144.0, 448.0], '0.33': [144.0, 432.0], '0.35': [144.0, 416.0], '0.4': [160.0, 400.0],
+     '0.42': [160.0, 384.0], '0.48': [176.0, 368.0], '0.5': [176.0, 352.0], '0.52': [176.0, 336.0],
+     '0.57': [192.0, 336.0], '0.6': [192.0, 320.0], '0.68': [208.0, 304.0], '0.72': [208.0, 288.0],
+     '0.78': [224.0, 288.0], '0.82': [224.0, 272.0], '0.88': [240.0, 272.0], '0.94': [240.0, 256.0],
+     '1.0': [256.0, 256.0], '1.07': [256.0, 240.0], '1.13': [272.0, 240.0], '1.21': [272.0, 224.0],
+     '1.29': [288.0, 224.0], '1.38': [288.0, 208.0], '1.46': [304.0, 208.0], '1.67': [320.0, 192.0],
+     '1.75': [336.0, 192.0], '2.0': [352.0, 176.0], '2.09': [368.0, 176.0], '2.4': [384.0, 160.0],
+     '2.5': [400.0, 160.0], '3.0': [432.0, 144.0],
+     '4.0': [512.0, 128.0]
+}
+ASPECT_RATIO_512_TEST = {
+     '0.25': [256.0, 1024.0], '0.28': [256.0, 928.0],
+     '0.32': [288.0, 896.0], '0.33': [288.0, 864.0], '0.35': [288.0, 832.0], '0.4': [320.0, 800.0],
+     '0.42': [320.0, 768.0], '0.48': [352.0, 736.0], '0.5': [352.0, 704.0], '0.52': [352.0, 672.0],
+     '0.57': [384.0, 672.0], '0.6': [384.0, 640.0], '0.68': [416.0, 608.0], '0.72': [416.0, 576.0],
+     '0.78': [448.0, 576.0], '0.82': [448.0, 544.0], '0.88': [480.0, 544.0], '0.94': [480.0, 512.0],
+     '1.0': [512.0, 512.0], '1.07': [512.0, 480.0], '1.13': [544.0, 480.0], '1.21': [544.0, 448.0],
+     '1.29': [576.0, 448.0], '1.38': [576.0, 416.0], '1.46': [608.0, 416.0], '1.67': [640.0, 384.0],
+     '1.75': [672.0, 384.0], '2.0': [704.0, 352.0], '2.09': [736.0, 352.0], '2.4': [768.0, 320.0],
+     '2.5': [800.0, 320.0], '3.0': [864.0, 288.0],
+     '4.0': [1024.0, 256.0]
+     }
+ASPECT_RATIO_1024_TEST = {
+    '0.25': [512., 2048.], '0.28': [512., 1856.],
+    '0.32': [576., 1792.], '0.33': [576., 1728.], '0.35': [576., 1664.], '0.4':  [640., 1600.],
+    '0.42':  [640., 1536.], '0.48': [704., 1472.], '0.5': [704., 1408.], '0.52': [704., 1344.],
+    '0.57': [768., 1344.], '0.6': [768., 1280.], '0.68': [832., 1216.], '0.72': [832., 1152.],
+    '0.78': [896., 1152.], '0.82': [896., 1088.], '0.88': [960., 1088.], '0.94': [960., 1024.],
+    '1.0':  [1024., 1024.], '1.07': [1024.,  960.], '1.13': [1088.,  960.], '1.21': [1088.,  896.],
+    '1.29': [1152.,  896.], '1.38': [1152.,  832.], '1.46': [1216.,  832.], '1.67': [1280.,  768.],
+    '1.75': [1344.,  768.], '2.0':  [1408.,  704.], '2.09':  [1472.,  704.], '2.4':  [1536.,  640.],
+    '2.5':  [1600.,  640.], '3.0':  [1728.,  576.],
+    '4.0':  [2048.,  512.],
+}
+def get_chunks(lst, n):
+    for i in range(0, len(lst), n):
+        yield lst[i:i + n]