Spaces:
Sleeping
Sleeping
Initial commit for SD ControlNet Canny application
Browse files- README.md +26 -25
- app.py +83 -32
- requirements.txt +2 -1
README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: red
|
| 6 |
sdk: gradio
|
|
@@ -8,48 +8,49 @@ sdk_version: 5.25.2
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
-
short_description:
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 15 |
|
| 16 |
-
#
|
| 17 |
|
| 18 |
-
|
| 19 |
|
| 20 |
-
##
|
| 21 |
|
| 22 |
-
1.
|
| 23 |
|
| 24 |
```bash
|
| 25 |
pip install -r requirements.txt
|
| 26 |
```
|
| 27 |
|
| 28 |
-
2.
|
| 29 |
|
| 30 |
```bash
|
| 31 |
python app.py
|
| 32 |
```
|
| 33 |
|
| 34 |
-
##
|
| 35 |
|
| 36 |
-
1.
|
| 37 |
-
2.
|
| 38 |
-
3.
|
| 39 |
-
4.
|
| 40 |
-
-
|
| 41 |
-
-
|
| 42 |
-
-
|
| 43 |
|
| 44 |
-
##
|
| 45 |
|
| 46 |
-
- "
|
| 47 |
-
- "
|
| 48 |
-
- "
|
| 49 |
-
- "
|
| 50 |
-
- "
|
| 51 |
-
- "Make it look like night time"
|
| 52 |
|
| 53 |
-
##
|
| 54 |
|
| 55 |
-
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: SD ControlNet Canny
|
| 3 |
+
emoji: 🎨
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: red
|
| 6 |
sdk: gradio
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
+
short_description: Stable Diffusion with ControlNet Canny Edge Detection
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 15 |
|
| 16 |
+
# ControlNet Canny - Edge Guided Image Generation
|
| 17 |
|
| 18 |
+
这个应用程序使用 [ControlNet Canny](https://huggingface.co/lllyasviel/sd-controlnet-canny) 模型,通过边缘检测来控制图像生成过程。ControlNet 允许您使用图像的边缘结构来引导 Stable Diffusion 生成符合特定结构的图像。
|
| 19 |
|
| 20 |
+
## 设置
|
| 21 |
|
| 22 |
+
1. 安装所需的依赖项:
|
| 23 |
|
| 24 |
```bash
|
| 25 |
pip install -r requirements.txt
|
| 26 |
```
|
| 27 |
|
| 28 |
+
2. 运行应用程序:
|
| 29 |
|
| 30 |
```bash
|
| 31 |
python app.py
|
| 32 |
```
|
| 33 |
|
| 34 |
+
## 使用方法
|
| 35 |
|
| 36 |
+
1. 上传一张图片或使用示例图片
|
| 37 |
+
2. 输入一个描述您想要生成的图像的提示词(例如,"一个充满山脉和湖泊的奇幻风景")
|
| 38 |
+
3. 点击"运行"生成边缘控制的图像
|
| 39 |
+
4. 在"高级设置"部分调整参数以获得更多控制:
|
| 40 |
+
- Canny 低阈值/高阈值:控制边缘检测的灵敏度
|
| 41 |
+
- 指导比例:控制生成的图像与文本提示的匹配程度
|
| 42 |
+
- 推理步数:更高的值提供更好的质量,但需要更长的时间
|
| 43 |
|
| 44 |
+
## 提示词示例
|
| 45 |
|
| 46 |
+
- "一个充满山脉和湖泊的奇幻风景"
|
| 47 |
+
- "一个赛博朋克风格的城市街景"
|
| 48 |
+
- "一个穿着冬装的卡通角色"
|
| 49 |
+
- "一个未来主义的建筑设计"
|
| 50 |
+
- "一个梦幻般的森林场景"
|
|
|
|
| 51 |
|
| 52 |
+
## 技术细节
|
| 53 |
|
| 54 |
+
此应用程序使用 Hugging Face 的 [lllyasviel/sd-controlnet-canny](https://huggingface.co/lllyasviel/sd-controlnet-canny) 模型和 Diffusers 库。该模型通过 Canny 边缘检测算法提取输入图像的边缘,然后使用这些边缘来引导 Stable Diffusion 生成遵循同样结构的新图像。
|
| 55 |
+
|
| 56 |
+
ControlNet 能够保持输入图像的结构和构图,同时根据文本提示更改图像的样式和内容。
|
app.py
CHANGED
|
@@ -1,31 +1,62 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
import random
|
|
|
|
| 4 |
|
| 5 |
import spaces
|
| 6 |
import torch
|
| 7 |
-
from diffusers import
|
| 8 |
from diffusers.utils import load_image
|
|
|
|
| 9 |
|
| 10 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 11 |
-
|
|
|
|
| 12 |
|
| 13 |
if torch.cuda.is_available():
|
| 14 |
torch_dtype = torch.float16
|
| 15 |
else:
|
| 16 |
torch_dtype = torch.float32
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
torch_dtype=torch_dtype,
|
| 21 |
safety_checker=None
|
| 22 |
)
|
| 23 |
pipe = pipe.to(device)
|
| 24 |
-
pipe.scheduler =
|
| 25 |
|
| 26 |
MAX_SEED = np.iinfo(np.int32).max
|
| 27 |
MAX_IMAGE_SIZE = 1024
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
@spaces.GPU
|
| 30 |
def infer(
|
| 31 |
prompt,
|
|
@@ -33,7 +64,8 @@ def infer(
|
|
| 33 |
negative_prompt,
|
| 34 |
seed,
|
| 35 |
randomize_seed,
|
| 36 |
-
|
|
|
|
| 37 |
guidance_scale,
|
| 38 |
num_inference_steps,
|
| 39 |
progress=gr.Progress(track_tqdm=True),
|
|
@@ -55,24 +87,26 @@ def infer(
|
|
| 55 |
width = MAX_IMAGE_SIZE
|
| 56 |
if height > MAX_IMAGE_SIZE:
|
| 57 |
height = MAX_IMAGE_SIZE
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
image = pipe(
|
| 60 |
prompt=prompt,
|
| 61 |
-
image=
|
| 62 |
negative_prompt=negative_prompt,
|
| 63 |
guidance_scale=guidance_scale,
|
| 64 |
-
image_guidance_scale=image_guidance_scale,
|
| 65 |
num_inference_steps=num_inference_steps,
|
| 66 |
generator=generator,
|
| 67 |
).images[0]
|
| 68 |
|
| 69 |
-
return image, seed
|
| 70 |
|
| 71 |
|
| 72 |
examples = [
|
| 73 |
-
["
|
| 74 |
-
["
|
| 75 |
-
["
|
| 76 |
]
|
| 77 |
|
| 78 |
css = """
|
|
@@ -84,7 +118,7 @@ css = """
|
|
| 84 |
|
| 85 |
with gr.Blocks(css=css) as demo:
|
| 86 |
with gr.Column(elem_id="col-container"):
|
| 87 |
-
gr.Markdown(" #
|
| 88 |
|
| 89 |
with gr.Row():
|
| 90 |
with gr.Column(scale=1):
|
|
@@ -94,11 +128,19 @@ with gr.Blocks(css=css) as demo:
|
|
| 94 |
height=400
|
| 95 |
)
|
| 96 |
with gr.Column(scale=1):
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
prompt = gr.Text(
|
| 100 |
-
label="
|
| 101 |
-
placeholder="Enter your
|
| 102 |
)
|
| 103 |
|
| 104 |
run_button = gr.Button("Run", variant="primary")
|
|
@@ -111,22 +153,30 @@ with gr.Blocks(css=css) as demo:
|
|
| 111 |
)
|
| 112 |
|
| 113 |
with gr.Row():
|
| 114 |
-
|
| 115 |
-
label="
|
| 116 |
-
minimum=
|
| 117 |
-
maximum=
|
| 118 |
-
step=
|
| 119 |
-
value=
|
| 120 |
)
|
| 121 |
|
| 122 |
-
|
| 123 |
-
label="
|
| 124 |
-
minimum=1
|
| 125 |
-
maximum=
|
| 126 |
-
step=
|
| 127 |
-
value=
|
| 128 |
)
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
seed = gr.Slider(
|
| 131 |
label="Seed",
|
| 132 |
minimum=0,
|
|
@@ -142,13 +192,13 @@ with gr.Blocks(css=css) as demo:
|
|
| 142 |
minimum=1,
|
| 143 |
maximum=100,
|
| 144 |
step=1,
|
| 145 |
-
value=
|
| 146 |
)
|
| 147 |
|
| 148 |
gr.Examples(
|
| 149 |
examples=examples,
|
| 150 |
inputs=[prompt, input_image],
|
| 151 |
-
outputs=[result, seed],
|
| 152 |
fn=infer,
|
| 153 |
cache_examples=True,
|
| 154 |
)
|
|
@@ -162,11 +212,12 @@ with gr.Blocks(css=css) as demo:
|
|
| 162 |
negative_prompt,
|
| 163 |
seed,
|
| 164 |
randomize_seed,
|
| 165 |
-
|
|
|
|
| 166 |
guidance_scale,
|
| 167 |
num_inference_steps,
|
| 168 |
],
|
| 169 |
-
outputs=[result, seed],
|
| 170 |
)
|
| 171 |
|
| 172 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
import random
|
| 4 |
+
import cv2
|
| 5 |
|
| 6 |
import spaces
|
| 7 |
import torch
|
| 8 |
+
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
|
| 9 |
from diffusers.utils import load_image
|
| 10 |
+
from PIL import Image
|
| 11 |
|
| 12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 13 |
+
sd_model_id = "runwayml/stable-diffusion-v1-5"
|
| 14 |
+
controlnet_model_id = "lllyasviel/sd-controlnet-canny"
|
| 15 |
|
| 16 |
if torch.cuda.is_available():
|
| 17 |
torch_dtype = torch.float16
|
| 18 |
else:
|
| 19 |
torch_dtype = torch.float32
|
| 20 |
|
| 21 |
+
# Load ControlNet model
|
| 22 |
+
controlnet = ControlNetModel.from_pretrained(
|
| 23 |
+
controlnet_model_id,
|
| 24 |
+
torch_dtype=torch_dtype
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# Load Stable Diffusion with ControlNet
|
| 28 |
+
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
| 29 |
+
sd_model_id,
|
| 30 |
+
controlnet=controlnet,
|
| 31 |
torch_dtype=torch_dtype,
|
| 32 |
safety_checker=None
|
| 33 |
)
|
| 34 |
pipe = pipe.to(device)
|
| 35 |
+
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
|
| 36 |
|
| 37 |
MAX_SEED = np.iinfo(np.int32).max
|
| 38 |
MAX_IMAGE_SIZE = 1024
|
| 39 |
|
| 40 |
+
def apply_canny(image, low_threshold, high_threshold):
|
| 41 |
+
"""Apply Canny edge detection to the image"""
|
| 42 |
+
# Convert PIL image to numpy array
|
| 43 |
+
image_np = np.array(image)
|
| 44 |
+
|
| 45 |
+
# Convert to grayscale if the image is colored
|
| 46 |
+
if len(image_np.shape) == 3 and image_np.shape[2] == 3:
|
| 47 |
+
image_gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
|
| 48 |
+
else:
|
| 49 |
+
image_gray = image_np
|
| 50 |
+
|
| 51 |
+
# Apply Canny edge detection
|
| 52 |
+
edges = cv2.Canny(image_gray, low_threshold, high_threshold)
|
| 53 |
+
|
| 54 |
+
# Convert back to RGB for the model
|
| 55 |
+
edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
|
| 56 |
+
|
| 57 |
+
# Convert back to PIL image
|
| 58 |
+
return Image.fromarray(edges_rgb)
|
| 59 |
+
|
| 60 |
@spaces.GPU
|
| 61 |
def infer(
|
| 62 |
prompt,
|
|
|
|
| 64 |
negative_prompt,
|
| 65 |
seed,
|
| 66 |
randomize_seed,
|
| 67 |
+
canny_low_threshold,
|
| 68 |
+
canny_high_threshold,
|
| 69 |
guidance_scale,
|
| 70 |
num_inference_steps,
|
| 71 |
progress=gr.Progress(track_tqdm=True),
|
|
|
|
| 87 |
width = MAX_IMAGE_SIZE
|
| 88 |
if height > MAX_IMAGE_SIZE:
|
| 89 |
height = MAX_IMAGE_SIZE
|
| 90 |
+
|
| 91 |
+
# Apply Canny edge detection
|
| 92 |
+
canny_image = apply_canny(input_image, canny_low_threshold, canny_high_threshold)
|
| 93 |
|
| 94 |
image = pipe(
|
| 95 |
prompt=prompt,
|
| 96 |
+
image=canny_image,
|
| 97 |
negative_prompt=negative_prompt,
|
| 98 |
guidance_scale=guidance_scale,
|
|
|
|
| 99 |
num_inference_steps=num_inference_steps,
|
| 100 |
generator=generator,
|
| 101 |
).images[0]
|
| 102 |
|
| 103 |
+
return image, seed, canny_image
|
| 104 |
|
| 105 |
|
| 106 |
examples = [
|
| 107 |
+
["A fantasy landscape with mountains and a lake", "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"],
|
| 108 |
+
["A cyberpunk city street scene", "https://raw.githubusercontent.com/timothybrooks/instruct-pix2pix/main/imgs/example.jpg"],
|
| 109 |
+
["A cartoon character in winter clothing", "https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/controlnet/person_image.png"],
|
| 110 |
]
|
| 111 |
|
| 112 |
css = """
|
|
|
|
| 118 |
|
| 119 |
with gr.Blocks(css=css) as demo:
|
| 120 |
with gr.Column(elem_id="col-container"):
|
| 121 |
+
gr.Markdown(" # ControlNet Canny - Edge Guided Image Generation")
|
| 122 |
|
| 123 |
with gr.Row():
|
| 124 |
with gr.Column(scale=1):
|
|
|
|
| 128 |
height=400
|
| 129 |
)
|
| 130 |
with gr.Column(scale=1):
|
| 131 |
+
canny_image = gr.Image(
|
| 132 |
+
label="Canny Edge Detection",
|
| 133 |
+
height=400
|
| 134 |
+
)
|
| 135 |
+
with gr.Column(scale=1):
|
| 136 |
+
result = gr.Image(
|
| 137 |
+
label="Result",
|
| 138 |
+
height=400
|
| 139 |
+
)
|
| 140 |
|
| 141 |
prompt = gr.Text(
|
| 142 |
+
label="Prompt",
|
| 143 |
+
placeholder="Enter your prompt (e.g., 'a fantasy landscape with mountains')",
|
| 144 |
)
|
| 145 |
|
| 146 |
run_button = gr.Button("Run", variant="primary")
|
|
|
|
| 153 |
)
|
| 154 |
|
| 155 |
with gr.Row():
|
| 156 |
+
canny_low_threshold = gr.Slider(
|
| 157 |
+
label="Canny Low Threshold",
|
| 158 |
+
minimum=1,
|
| 159 |
+
maximum=255,
|
| 160 |
+
step=1,
|
| 161 |
+
value=100,
|
| 162 |
)
|
| 163 |
|
| 164 |
+
canny_high_threshold = gr.Slider(
|
| 165 |
+
label="Canny High Threshold",
|
| 166 |
+
minimum=1,
|
| 167 |
+
maximum=255,
|
| 168 |
+
step=1,
|
| 169 |
+
value=200,
|
| 170 |
)
|
| 171 |
|
| 172 |
+
guidance_scale = gr.Slider(
|
| 173 |
+
label="Guidance scale",
|
| 174 |
+
minimum=1.0,
|
| 175 |
+
maximum=20.0,
|
| 176 |
+
step=0.1,
|
| 177 |
+
value=7.5,
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
seed = gr.Slider(
|
| 181 |
label="Seed",
|
| 182 |
minimum=0,
|
|
|
|
| 192 |
minimum=1,
|
| 193 |
maximum=100,
|
| 194 |
step=1,
|
| 195 |
+
value=30,
|
| 196 |
)
|
| 197 |
|
| 198 |
gr.Examples(
|
| 199 |
examples=examples,
|
| 200 |
inputs=[prompt, input_image],
|
| 201 |
+
outputs=[result, seed, canny_image],
|
| 202 |
fn=infer,
|
| 203 |
cache_examples=True,
|
| 204 |
)
|
|
|
|
| 212 |
negative_prompt,
|
| 213 |
seed,
|
| 214 |
randomize_seed,
|
| 215 |
+
canny_low_threshold,
|
| 216 |
+
canny_high_threshold,
|
| 217 |
guidance_scale,
|
| 218 |
num_inference_steps,
|
| 219 |
],
|
| 220 |
+
outputs=[result, seed, canny_image],
|
| 221 |
)
|
| 222 |
|
| 223 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
|
@@ -5,4 +5,5 @@ accelerate>=0.21.0
|
|
| 5 |
gradio>=3.50.0
|
| 6 |
numpy>=1.24.0
|
| 7 |
Pillow>=10.0.0
|
| 8 |
-
safetensors>=0.3.2
|
|
|
|
|
|
| 5 |
gradio>=3.50.0
|
| 6 |
numpy>=1.24.0
|
| 7 |
Pillow>=10.0.0
|
| 8 |
+
safetensors>=0.3.2
|
| 9 |
+
opencv-python>=4.8.0
|