Spaces:

Mariam-Elz
/

Model-Demo

Runtime error

App Files Files Community

Mariam-Elz commited on Feb 15

Commit

6f85658

verified ·

1 Parent(s): b7eef6f

Upload pipelines.py with huggingface_hub

Browse files

Files changed (1) hide show

pipelines.py +212 -170

pipelines.py CHANGED Viewed

@@ -1,170 +1,212 @@
-import torch
-from libs.base_utils import do_resize_content
-from imagedream.ldm.util import (
-    instantiate_from_config,
-    get_obj_from_str,
-)
-from omegaconf import OmegaConf
-from PIL import Image
-import numpy as np
-class TwoStagePipeline(object):
-    def __init__(
-        self,
-        stage1_model_config,
-        stage2_model_config,
-        stage1_sampler_config,
-        stage2_sampler_config,
-        device="cuda",
-        dtype=torch.float16,
-        resize_rate=1,
-    ) -> None:
-        """
-        only for two stage generate process.
-        - the first stage was condition on single pixel image, gererate multi-view pixel image, based on the v2pp config
-        - the second stage was condition on multiview pixel image generated by the first stage, generate the final image, based on the stage2-test config
-        """
-        self.resize_rate = resize_rate
-        self.stage1_model = instantiate_from_config(OmegaConf.load(stage1_model_config.config).model)
-        self.stage1_model.load_state_dict(torch.load(stage1_model_config.resume, map_location="cpu"), strict=False)
-        self.stage1_model = self.stage1_model.to(device).to(dtype)
-        self.stage2_model = instantiate_from_config(OmegaConf.load(stage2_model_config.config).model)
-        sd = torch.load(stage2_model_config.resume, map_location="cpu")
-        self.stage2_model.load_state_dict(sd, strict=False)
-        self.stage2_model = self.stage2_model.to(device).to(dtype)
-        self.stage1_model.device = device
-        self.stage2_model.device = device
-        self.device = device
-        self.dtype = dtype
-        self.stage1_sampler = get_obj_from_str(stage1_sampler_config.target)(
-            self.stage1_model, device=device, dtype=dtype, **stage1_sampler_config.params
-        )
-        self.stage2_sampler = get_obj_from_str(stage2_sampler_config.target)(
-            self.stage2_model, device=device, dtype=dtype, **stage2_sampler_config.params
-        )
-    def stage1_sample(
-        self,
-        pixel_img,
-        prompt="3D assets",
-        neg_texts="uniform low no texture ugly, boring, bad anatomy, blurry, pixelated,  obscure, unnatural colors, poor lighting, dull, and unclear.",
-        step=50,
-        scale=5,
-        ddim_eta=0.0,
-    ):
-        if type(pixel_img) == str:
-            pixel_img = Image.open(pixel_img)
-        if isinstance(pixel_img, Image.Image):
-            if pixel_img.mode == "RGBA":
-                background = Image.new('RGBA', pixel_img.size, (0, 0, 0, 0))
-                pixel_img = Image.alpha_composite(background, pixel_img).convert("RGB")
-            else:
-                pixel_img = pixel_img.convert("RGB")
-        else:
-            raise
-        uc = self.stage1_sampler.model.get_learned_conditioning([neg_texts]).to(self.device)
-        stage1_images = self.stage1_sampler.i2i(
-            self.stage1_sampler.model,
-            self.stage1_sampler.size,
-            prompt,
-            uc=uc,
-            sampler=self.stage1_sampler.sampler,
-            ip=pixel_img,
-            step=step,
-            scale=scale,
-            batch_size=self.stage1_sampler.batch_size,
-            ddim_eta=ddim_eta,
-            dtype=self.stage1_sampler.dtype,
-            device=self.stage1_sampler.device,
-            camera=self.stage1_sampler.camera,
-            num_frames=self.stage1_sampler.num_frames,
-            pixel_control=(self.stage1_sampler.mode == "pixel"),
-            transform=self.stage1_sampler.image_transform,
-            offset_noise=self.stage1_sampler.offset_noise,
-        )
-        stage1_images = [Image.fromarray(img) for img in stage1_images]
-        stage1_images.pop(self.stage1_sampler.ref_position)
-        return stage1_images
-    def stage2_sample(self, pixel_img, stage1_images, scale=5, step=50):
-        if type(pixel_img) == str:
-            pixel_img = Image.open(pixel_img)
-        if isinstance(pixel_img, Image.Image):
-            if pixel_img.mode == "RGBA":
-                background = Image.new('RGBA', pixel_img.size, (0, 0, 0, 0))
-                pixel_img = Image.alpha_composite(background, pixel_img).convert("RGB")
-            else:
-                pixel_img = pixel_img.convert("RGB")
-        else:
-            raise
-        stage2_images = self.stage2_sampler.i2iStage2(
-            self.stage2_sampler.model,
-            self.stage2_sampler.size,
-            "3D assets",
-            self.stage2_sampler.uc,
-            self.stage2_sampler.sampler,
-            pixel_images=stage1_images,
-            ip=pixel_img,
-            step=step,
-            scale=scale,
-            batch_size=self.stage2_sampler.batch_size,
-            ddim_eta=0.0,
-            dtype=self.stage2_sampler.dtype,
-            device=self.stage2_sampler.device,
-            camera=self.stage2_sampler.camera,
-            num_frames=self.stage2_sampler.num_frames,
-            pixel_control=(self.stage2_sampler.mode == "pixel"),
-            transform=self.stage2_sampler.image_transform,
-            offset_noise=self.stage2_sampler.offset_noise,
-        )
-        stage2_images = [Image.fromarray(img) for img in stage2_images]
-        return stage2_images
-    def set_seed(self, seed):
-        self.stage1_sampler.seed = seed
-        self.stage2_sampler.seed = seed
-    def __call__(self, pixel_img, prompt="3D assets", scale=5, step=50):
-        pixel_img = do_resize_content(pixel_img, self.resize_rate)
-        stage1_images = self.stage1_sample(pixel_img, prompt, scale=scale, step=step)
-        stage2_images = self.stage2_sample(pixel_img, stage1_images, scale=scale, step=step)
-        return {
-            "ref_img": pixel_img,
-            "stage1_images": stage1_images,
-            "stage2_images": stage2_images,
-        }
-if __name__ == "__main__":
-    stage1_config = OmegaConf.load("configs/nf7_v3_SNR_rd_size_stroke.yaml").config
-    stage2_config = OmegaConf.load("configs/stage2-v2-snr.yaml").config
-    stage2_sampler_config = stage2_config.sampler
-    stage1_sampler_config = stage1_config.sampler
-    stage1_model_config = stage1_config.models
-    stage2_model_config = stage2_config.models
-    pipeline = TwoStagePipeline(
-        stage1_model_config,
-        stage2_model_config,
-        stage1_sampler_config,
-        stage2_sampler_config,
-    )
-    img = Image.open("assets/astronaut.png")
-    rt_dict = pipeline(img)
-    stage1_images = rt_dict["stage1_images"]
-    stage2_images = rt_dict["stage2_images"]
-    np_imgs = np.concatenate(stage1_images, 1)
-    np_xyzs = np.concatenate(stage2_images, 1)
-    Image.fromarray(np_imgs).save("pixel_images.png")
-    Image.fromarray(np_xyzs).save("xyz_images.png")

+import torch
+from libs.base_utils import do_resize_content
+from imagedream.ldm.util import (
+    instantiate_from_config,
+    get_obj_from_str,
+)
+from omegaconf import OmegaConf
+from PIL import Image
+import PIL
+import rembg
+class TwoStagePipeline(object):
+    def __init__(
+        self,
+        stage1_model_config,
+        stage2_model_config,
+        stage1_sampler_config,
+        stage2_sampler_config,
+        device="cuda",
+        dtype=torch.float16,
+        resize_rate=1,
+    ) -> None:
+        """
+        only for two stage generate process.
+        - the first stage was condition on single pixel image, gererate multi-view pixel image, based on the v2pp config
+        - the second stage was condition on multiview pixel image generated by the first stage, generate the final image, based on the stage2-test config
+        """
+        self.resize_rate = resize_rate
+        self.stage1_model = instantiate_from_config(OmegaConf.load(stage1_model_config.config).model)
+        self.stage1_model.load_state_dict(torch.load(stage1_model_config.resume, map_location="cpu"), strict=False)
+        self.stage1_model = self.stage1_model.to(device).to(dtype)
+        self.stage2_model = instantiate_from_config(OmegaConf.load(stage2_model_config.config).model)
+        sd = torch.load(stage2_model_config.resume, map_location="cpu")
+        self.stage2_model.load_state_dict(sd, strict=False)
+        self.stage2_model = self.stage2_model.to(device).to(dtype)
+        self.stage1_model.device = device
+        self.stage2_model.device = device
+        self.device = device
+        self.dtype = dtype
+        self.stage1_sampler = get_obj_from_str(stage1_sampler_config.target)(
+            self.stage1_model, device=device, dtype=dtype, **stage1_sampler_config.params
+        )
+        self.stage2_sampler = get_obj_from_str(stage2_sampler_config.target)(
+            self.stage2_model, device=device, dtype=dtype, **stage2_sampler_config.params
+        )
+    def stage1_sample(
+        self,
+        pixel_img,
+        prompt="3D assets",
+        neg_texts="uniform low no texture ugly, boring, bad anatomy, blurry, pixelated,  obscure, unnatural colors, poor lighting, dull, and unclear.",
+        step=50,
+        scale=5,
+        ddim_eta=0.0,
+    ):
+        if type(pixel_img) == str:
+            pixel_img = Image.open(pixel_img)
+        if isinstance(pixel_img, Image.Image):
+            if pixel_img.mode == "RGBA":
+                background = Image.new('RGBA', pixel_img.size, (0, 0, 0, 0))
+                pixel_img = Image.alpha_composite(background, pixel_img).convert("RGB")
+            else:
+                pixel_img = pixel_img.convert("RGB")
+        else:
+            raise
+        uc = self.stage1_sampler.model.get_learned_conditioning([neg_texts]).to(self.device)
+        stage1_images = self.stage1_sampler.i2i(
+            self.stage1_sampler.model,
+            self.stage1_sampler.size,
+            prompt,
+            uc=uc,
+            sampler=self.stage1_sampler.sampler,
+            ip=pixel_img,
+            step=step,
+            scale=scale,
+            batch_size=self.stage1_sampler.batch_size,
+            ddim_eta=ddim_eta,
+            dtype=self.stage1_sampler.dtype,
+            device=self.stage1_sampler.device,
+            camera=self.stage1_sampler.camera,
+            num_frames=self.stage1_sampler.num_frames,
+            pixel_control=(self.stage1_sampler.mode == "pixel"),
+            transform=self.stage1_sampler.image_transform,
+            offset_noise=self.stage1_sampler.offset_noise,
+        )
+        stage1_images = [Image.fromarray(img) for img in stage1_images]
+        stage1_images.pop(self.stage1_sampler.ref_position)
+        return stage1_images
+    def stage2_sample(self, pixel_img, stage1_images, scale=5, step=50):
+        if type(pixel_img) == str:
+            pixel_img = Image.open(pixel_img)
+        if isinstance(pixel_img, Image.Image):
+            if pixel_img.mode == "RGBA":
+                background = Image.new('RGBA', pixel_img.size, (0, 0, 0, 0))
+                pixel_img = Image.alpha_composite(background, pixel_img).convert("RGB")
+            else:
+                pixel_img = pixel_img.convert("RGB")
+        else:
+            raise
+        stage2_images = self.stage2_sampler.i2iStage2(
+            self.stage2_sampler.model,
+            self.stage2_sampler.size,
+            "3D assets",
+            self.stage2_sampler.uc,
+            self.stage2_sampler.sampler,
+            pixel_images=stage1_images,
+            ip=pixel_img,
+            step=step,
+            scale=scale,
+            batch_size=self.stage2_sampler.batch_size,
+            ddim_eta=0.0,
+            dtype=self.stage2_sampler.dtype,
+            device=self.stage2_sampler.device,
+            camera=self.stage2_sampler.camera,
+            num_frames=self.stage2_sampler.num_frames,
+            pixel_control=(self.stage2_sampler.mode == "pixel"),
+            transform=self.stage2_sampler.image_transform,
+            offset_noise=self.stage2_sampler.offset_noise,
+        )
+        stage2_images = [Image.fromarray(img) for img in stage2_images]
+        return stage2_images
+    def set_seed(self, seed):
+        self.stage1_sampler.seed = seed
+        self.stage2_sampler.seed = seed
+    def __call__(self, pixel_img, prompt="3D assets", scale=5, step=50):
+        pixel_img = do_resize_content(pixel_img, self.resize_rate)
+        stage1_images = self.stage1_sample(pixel_img, prompt, scale=scale, step=step)
+        stage2_images = self.stage2_sample(pixel_img, stage1_images, scale=scale, step=step)
+        return {
+            "ref_img": pixel_img,
+            "stage1_images": stage1_images,
+            "stage2_images": stage2_images,
+        }
+rembg_session = rembg.new_session()
+def expand_to_square(image, bg_color=(0, 0, 0, 0)):
+    # expand image to 1:1
+    width, height = image.size
+    if width == height:
+        return image
+    new_size = (max(width, height), max(width, height))
+    new_image = Image.new("RGBA", new_size, bg_color)
+    paste_position = ((new_size[0] - width) // 2, (new_size[1] - height) // 2)
+    new_image.paste(image, paste_position)
+    return new_image
+def remove_background(
+    image: PIL.Image.Image,
+    rembg_session = None,
+    force: bool = False,
+    **rembg_kwargs,
+) -> PIL.Image.Image:
+    do_remove = True
+    if image.mode == "RGBA" and image.getextrema()[3][0] < 255:
+        # explain why current do not rm bg
+        print("alhpa channl not enpty, skip remove background, using alpha channel as mask")
+        background = Image.new("RGBA", image.size, (0, 0, 0, 0))
+        image = Image.alpha_composite(background, image)
+        do_remove = False
+    do_remove = do_remove or force
+    if do_remove:
+        image = rembg.remove(image, session=rembg_session, **rembg_kwargs)
+    return image
+def do_resize_content(original_image: Image, scale_rate):
+    # resize image content wile retain the original image size
+    if scale_rate != 1:
+        # Calculate the new size after rescaling
+        new_size = tuple(int(dim * scale_rate) for dim in original_image.size)
+        # Resize the image while maintaining the aspect ratio
+        resized_image = original_image.resize(new_size)
+        # Create a new image with the original size and black background
+        padded_image = Image.new("RGBA", original_image.size, (0, 0, 0, 0))
+        paste_position = ((original_image.width - resized_image.width) // 2, (original_image.height - resized_image.height) // 2)
+        padded_image.paste(resized_image, paste_position)
+        return padded_image
+    else:
+        return original_image
+def add_background(image, bg_color=(255, 255, 255)):
+    # given an RGBA image, alpha channel is used as mask to add background color
+    background = Image.new("RGBA", image.size, bg_color)
+    return Image.alpha_composite(background, image)
+def preprocess_image(image, background_choice, foreground_ratio, backgroud_color):
+    """
+    input image is a pil image in RGBA, return RGB image
+    """
+    print(background_choice)
+    if background_choice == "Alpha as mask":
+        background = Image.new("RGBA", image.size, (0, 0, 0, 0))
+        image = Image.alpha_composite(background, image)
+    else:
+        image = remove_background(image, rembg_session, force_remove=True)
+    image = do_resize_content(image, foreground_ratio)
+    image = expand_to_square(image)
+    image = add_background(image, backgroud_color)
+    return image.convert("RGB")