Spaces:

danhtran2mind
/

ControlNet-Image-Generator

Sleeping

File size: 7,500 Bytes

f56ede2

import cv2
import torch
from PIL import Image
import numpy as np
import yaml
import argparse
from controlnet_aux import OpenposeDetector
from diffusers import (
    StableDiffusionControlNetPipeline, 
    ControlNetModel, 
    UniPCMultistepScheduler
)

from utils.download import load_image
from utils.plot import image_grid
import os
from tqdm import tqdm
import re
import uuid

def load_config(config_path):
    try:
        with open(config_path, 'r') as file:
            return yaml.safe_load(file)
    except Exception as e:
        raise ValueError(f"Error loading config file: {e}")

def initialize_controlnet(config):
    model_id = config['model_id']
    local_dir = config.get('local_dir', model_id)
    return ControlNetModel.from_pretrained(
        local_dir if local_dir != model_id else model_id,
        torch_dtype=torch.float16
    )

def initialize_pipeline(controlnet, config):
    model_id = config['model_id']
    local_dir = config.get('local_dir', model_id)
    pipe = StableDiffusionControlNetPipeline.from_pretrained(
        local_dir if local_dir != model_id else model_id,
        controlnet=controlnet,
        torch_dtype=torch.float16
    )
    pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
    return pipe

def setup_device(pipe):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    if device == "cuda":
        pipe.enable_model_cpu_offload()
    pipe.to(device)
    return device

def generate_images(pipe, prompts, pose_images, generators, negative_prompts, num_steps, guidance_scale, controlnet_conditioning_scale, width, height):
    return pipe(
        prompts,
        pose_images,
        negative_prompt=negative_prompts,
        generator=generators,
        num_inference_steps=num_steps,
        guidance_scale=guidance_scale,
        controlnet_conditioning_scale=controlnet_conditioning_scale,
        width=width,
        height=height
    ).images

def infer(args):
    # Load configuration
    configs = load_config(args.config_path)
    
    # Initialize models
    controlnet_detector = OpenposeDetector.from_pretrained(
        configs[2]['model_id']  # lllyasviel/ControlNet
    )
    controlnet = initialize_controlnet(configs[0])
    pipe = initialize_pipeline(controlnet, configs[1])
    
    # Setup device
    device = setup_device(pipe)
    
    # Load and process image
    try:
        if args.input_image:
            demo_image = Image.open(args.input_image).convert("RGB")
        elif args.image_url:
            demo_image = load_image(args.image_url)
        else:
            raise ValueError("Either --input_image or --image_url must be provided")
    except Exception as e:
        raise ValueError(f"Error loading image: {e}")
    
    poses = [controlnet_detector(demo_image)]
    
    # Generate images
    generators = [torch.Generator(device="cpu").manual_seed(args.seed + i) for i in range(len(poses))]
    
    output_images = generate_images(
        pipe,
        [args.prompt] * len(generators),
        poses,
        generators,
        [args.negative_prompt] * len(generators),
        args.num_steps,
        args.guidance_scale,
        args.controlnet_conditioning_scale,
        args.width,
        args.height
    )
    
    # Save images if save_output is True
    if args.save_output:
        os.makedirs(args.output_dir, exist_ok=True)
        for i, img in enumerate(tqdm(output_images, desc="Saving images")):
            if args.use_prompt_as_output_name:
                # Sanitize prompt for filename (replace spaces and special characters)
                sanitized_prompt = re.sub(r'[^\w\s-]', '', args.prompt).replace(' ', '_').lower()
                filename = f"{sanitized_prompt}_{i}.png"
            else:
                # Use UUID for filename
                filename = f"{uuid.uuid4()}_{i}.png"
            img.save(os.path.join(args.output_dir, filename))

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="ControlNet image generation with pose detection")
    # Create mutually exclusive group for input_image and image_url
    image_group = parser.add_mutually_exclusive_group(required=True)
    image_group.add_argument("--input_image", type=str, default=None,
                             help="Path to local input image (default: tests/test_data/yoga1.jpg)")
    image_group.add_argument("--image_url", type=str, default=None,
                             help="URL of input image (e.g., https://huggingface.co/datasets/YiYiXu/controlnet-testing/resolve/main/yoga1.jpeg)")
    
    parser.add_argument("--config_path", type=str, default="configs/model_ckpts.yaml", 
                        help="Path to configuration YAML file")
    parser.add_argument("--prompt", type=str, default="a man is doing yoga",
                        help="Text prompt for image generation")
    parser.add_argument("--negative_prompt", type=str, 
                        default="monochrome, lowres, bad anatomy, worst quality, low quality",
                        help="Negative prompt for image generation")
    parser.add_argument("--num_steps", type=int, default=20,
                        help="Number of inference steps")
    parser.add_argument("--seed", type=int, default=2,
                        help="Random seed for generation")
    parser.add_argument("--width", type=int, default=512,
                        help="Width of the generated image")
    parser.add_argument("--height", type=int, default=512,
                        help="Height of the generated image")
    parser.add_argument("--guidance_scale", type=float, default=7.5,
                        help="Guidance scale for prompt adherence")
    parser.add_argument("--controlnet_conditioning_scale", type=float, default=1.0,
                        help="ControlNet conditioning scale")
    parser.add_argument("--output_dir", type=str, default="tests/test_data",
                        help="Directory to save generated images")
    parser.add_argument("--use_prompt_as_output_name", action="store_true",
                        help="Use prompt as part of output image filename")
    parser.add_argument("--save_output", action="store_true artr",
                        help="Save generated images to output directory")
    
    args = parser.parse_args()
    infer(args)

# Using image_url
# python script.py \
#     --config_path configs/model_ckpts.yaml \
#     --image_url https://huggingface.co/datasets/YiYiXu/controlnet-testing/resolve/main/yoga1.jpeg \
#     --prompt "a man is doing yoga in a serene park" \
#     --negative_prompt "monochrome, lowres, bad anatomy" \
#     --num_steps 30 \
#     --seed 42 \
#     --width 512 \
#     --height 512 \
#     --guidance_scale 7.5 \
#     --controlnet_conditioning_scale 0.8 \
#     --output_dir "tests/test_data" \
#     --save_output

# Using input_image
# python script.py \
#     --config_path configs/model_ckpts.yaml \
#     --input_image "tests/test_data/yoga1.jpg" \
#     --prompt "a man is doing yoga in a serene park" \
#     --negative_prompt "monochrome, lowres, bad anatomy" \
#     --num_steps 30 \
#     --seed 42 \
#     --width 512 \
#     --height 512 \
#     --guidance_scale 7.5 \
#     --controlnet_conditioning_scale 0.8 \
#     --output_dir "tests/test_data" \ 
#     --save_output