cavargas10 commited on Mar 8

Commit

f72d529

verified ·

1 Parent(s): a850d89

Upload 22 files

Browse files

Files changed (22) hide show

.gitattributes +53 -35
000028_scribble_concat.webp +3 -0
000028_scribble_concat_canny.webp +3 -0
000028_scribble_concat_hed.webp +3 -0
000028_scribble_concat_lineart.webp +3 -0
000028_scribble_concat_pidi.webp +3 -0
000155_scribble_concat.webp +3 -0
000186_scribble_concat.webp +3 -0
000210_scribble_concat.webp +3 -0
000227_scribble_concat.webp +3 -0
000242_scribble_concat.webp +3 -0
000250_scribble_concat.webp +3 -0
000256_scribble_concat.webp +3 -0
000271_scribble_concat.webp +3 -0
000283_scribble_concat.webp +3 -0
000285_scribble_concat.webp +3 -0
000290_scribble_concat.webp +3 -0
README.md +204 -0
config.json +56 -0
diffusion_pytorch_model.safetensors +3 -0
masonry.webp +3 -0
masonry2.webp +3 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,53 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+000283_scribble_concat.webp filter=lfs diff=lfs merge=lfs -text
+masonry.webp filter=lfs diff=lfs merge=lfs -text
+masonry2.webp filter=lfs diff=lfs merge=lfs -text
+000028_scribble_concat_canny.webp filter=lfs diff=lfs merge=lfs -text
+000028_scribble_concat_hed.webp filter=lfs diff=lfs merge=lfs -text
+000028_scribble_concat_lineart.webp filter=lfs diff=lfs merge=lfs -text
+000028_scribble_concat_pidi.webp filter=lfs diff=lfs merge=lfs -text
+000028_scribble_concat.webp filter=lfs diff=lfs merge=lfs -text
+000155_scribble_concat.webp filter=lfs diff=lfs merge=lfs -text
+000186_scribble_concat.webp filter=lfs diff=lfs merge=lfs -text
+000210_scribble_concat.webp filter=lfs diff=lfs merge=lfs -text
+000227_scribble_concat.webp filter=lfs diff=lfs merge=lfs -text
+000242_scribble_concat.webp filter=lfs diff=lfs merge=lfs -text
+000250_scribble_concat.webp filter=lfs diff=lfs merge=lfs -text
+000256_scribble_concat.webp filter=lfs diff=lfs merge=lfs -text
+000271_scribble_concat.webp filter=lfs diff=lfs merge=lfs -text
+000285_scribble_concat.webp filter=lfs diff=lfs merge=lfs -text
+000290_scribble_concat.webp filter=lfs diff=lfs merge=lfs -text

000028_scribble_concat.webp ADDED Viewed

Git LFS Details

SHA256: b6b40c000d3ac6e7b3dd2ac9d548674e71fc8ebfe735177b14900513479d85b6
Pointer size: 131 Bytes
Size of remote file: 219 kB

000028_scribble_concat_canny.webp ADDED Viewed

Git LFS Details

SHA256: 2955072aa48e31cead9c399a2cd50b69bf8a61f189ad2a53d9496683f34ed6ea
Pointer size: 131 Bytes
Size of remote file: 313 kB

000028_scribble_concat_hed.webp ADDED Viewed

Git LFS Details

SHA256: f1993b4d902595eea87e92f462ed7f1b70ec12c617496e4d88e2cda6de5b5c83
Pointer size: 131 Bytes
Size of remote file: 266 kB

000028_scribble_concat_lineart.webp ADDED Viewed

Git LFS Details

SHA256: 34ed02d35e6fdce87ee1fd25fd4ab12199df843636c634557a61a11f10ced614
Pointer size: 131 Bytes
Size of remote file: 428 kB

000028_scribble_concat_pidi.webp ADDED Viewed

Git LFS Details

SHA256: ab2e9eb48f05167e50136ffec990ba60df2ed5a67eff8358a2db5e34f6709b34
Pointer size: 131 Bytes
Size of remote file: 260 kB

000155_scribble_concat.webp ADDED Viewed

Git LFS Details

SHA256: e670c293055d5069ee62cb4cbd290d04cfcdbfc9e5102f7a4652751dca79ce56
Pointer size: 131 Bytes
Size of remote file: 409 kB

000186_scribble_concat.webp ADDED Viewed

Git LFS Details

SHA256: 88e44fcb6a046fc1a9a8de3a82aa756200ce219d0dbbed1a3ac556d088e80ee6
Pointer size: 131 Bytes
Size of remote file: 700 kB

000210_scribble_concat.webp ADDED Viewed

Git LFS Details

SHA256: 138a032b8cea5c9112db57ac2276c1994235d5a7a298c70d7e200e314ac1c959
Pointer size: 131 Bytes
Size of remote file: 141 kB

000227_scribble_concat.webp ADDED Viewed

Git LFS Details

SHA256: a41cd8cd1b5450ea1894bb87f8139384da43c61e3df3bd0455e401a3b0cc229c
Pointer size: 131 Bytes
Size of remote file: 414 kB

000242_scribble_concat.webp ADDED Viewed

Git LFS Details

SHA256: e566f426554a8fb12ac9e4ef823d1b7099f9c2a1aa1cf7bc0b6d124c50748ae7
Pointer size: 131 Bytes
Size of remote file: 158 kB

000250_scribble_concat.webp ADDED Viewed

Git LFS Details

SHA256: cdad7a0ef9d3966b6b7f40e8de5f433abfdb493176fc65a303801646cd0aa060
Pointer size: 131 Bytes
Size of remote file: 164 kB

000256_scribble_concat.webp ADDED Viewed

Git LFS Details

SHA256: 3c1d2e965175150db30b650b4a4005b19b1c9cbb55dd32776992457c192983a5
Pointer size: 131 Bytes
Size of remote file: 445 kB

000271_scribble_concat.webp ADDED Viewed

Git LFS Details

SHA256: cea5c6d5c3caf4cafb9d6b5b4da8faa3d081648b345eacbbac91e9783d2c09f5
Pointer size: 131 Bytes
Size of remote file: 419 kB

000283_scribble_concat.webp ADDED Viewed

Git LFS Details

SHA256: 17e44db401dc62516d71b2a033271c5187ed0b0179bbc560158a99cff3d999be
Pointer size: 132 Bytes
Size of remote file: 1.03 MB

000285_scribble_concat.webp ADDED Viewed

Git LFS Details

SHA256: d3e26746a78f22cc5b0ce15df63a6a6e31f39127365be63815b1be5f51c81da1
Pointer size: 131 Bytes
Size of remote file: 479 kB

000290_scribble_concat.webp ADDED Viewed

Git LFS Details

SHA256: 43229ff959cab43b7bb9e5ab12177d26e0bf8df6007a7133842a640eb444275c
Pointer size: 131 Bytes
Size of remote file: 278 kB

README.md CHANGED Viewed

@@ -1,3 +1,207 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
+pipeline_tag: text-to-image
 ---
+# **This is an anyline model that can generate images comparable with midjourney and support any line type and any width!**
+The following five lines are using different control lines, from top to below, Scribble, Canny, HED, PIDI, Lineart
+![image](./000028_scribble_concat.webp)
+![image](./000028_scribble_concat_canny.webp)
+![image](./000028_scribble_concat_hed.webp)
+![image](./000028_scribble_concat_pidi.webp)
+![image](./000028_scribble_concat_lineart.webp)
+# **General Scribble model that can generate images comparable with midjourney!**
+![image](./masonry.webp)
+# Controlnet-Scribble-Sdxl-1.0
+<!-- Provide a quick summary of what the model is/does. -->
+Hello, I am very happy to announce the controlnet-scribble-sdxl-1.0 model, **a very powerful controlnet that can generate high resolution images visually comparable with midjourney**.
+The model was trained with large amount of high quality data(over 10000000 images), with carefully filtered and captioned(powerful vllm model). Besides, useful tricks are applied
+during the training, including date augmentation, mutiple loss and multi resolution. Note that this model can achieve higher aesthetic performance than our Controlnet-Canny-Sdxl-1.0 model,
+the model support any type of lines and any width of lines, the sketch can be very simple and so does the prompt. This model is more general and good at generate visual appealing images,
+The control ability is also strong, for example if you are unstatisfied with some local regions about the generated image, draw a more precise sketch and give a detail prompt will help a lot.
+**Note the model also support lineart or canny lines, you can try it and will get a surpurise!!!**
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** xinsir
+- **Model type:** ControlNet_SDXL
+- **License:** apache-2.0
+- **Finetuned from model [optional]:** stabilityai/stable-diffusion-xl-base-1.0
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Paper [optional]:** https://arxiv.org/abs/2302.05543
+### Examples[**Note the following examples are all generate using stabilityai/stable-diffusion-xl-base-1.0 and xinsir/controlnet-scribble-sdxl-1.0**]
+prompt: purple feathered eagle with specks of light like stars in feathers. It glows with arcane power
+![image1](./000155_scribble_concat.webp)
+prompt: manga girl in the city, drip marketing
+![image2](./000186_scribble_concat.webp)
+prompt: 17 year old girl with long dark hair in the style of realism with fantasy elements, detailed botanical illustrations, barbs and thorns, ethereal, magical, black, purple and maroon, intricate, photorealistic
+![image3](./000210_scribble_concat.webp)
+prompt: a logo for a paintball field named district 7 on a white background featuring paintballs the is bright and colourful eye catching and impactuful
+![image4](./000227_scribble_concat.webp)
+prompt: a photograph of a handsome crying blonde man with his face painted in the pride flag
+![image5](./000242_scribble_concat.webp)
+prompt: simple flat sketch fox play ball
+![image6](./000250_scribble_concat.webp)
+prompt: concept art, a surreal magical Tome of the Sun God, the book binding appears to be made of solar fire and emits a holy, radiant glow, Age of Wonders, Unreal Engine v5
+![image7](./000256_scribble_concat.webp)
+prompt: black Caribbean man walking balance front his fate chaos anarchy liberty independence force energy independence cinematic surreal beautiful rendition intricate sharp detail 8k
+![image8](./000271_scribble_concat.webp)
+prompt: die hard nakatomi plaza, explosion at the top, vector, night scene
+![image9](./000285_scribble_concat.webp)
+prompt: solitary glowing yellow tree in a desert. ultra wide shot. night time. hdr photography
+![image10](./000290_scribble_concat.webp)
+## How to Get Started with the Model
+Use the code below to get started with the model.
+```python
+from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
+from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
+from controlnet_aux import PidiNetDetector, HEDdetector
+from diffusers.utils import load_image
+from huggingface_hub import HfApi
+from pathlib import Path
+from PIL import Image
+import torch
+import numpy as np
+import cv2
+import os
+def nms(x, t, s):
+    x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
+    f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
+    f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
+    f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
+    f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
+    y = np.zeros_like(x)
+    for f in [f1, f2, f3, f4]:
+        np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
+    z = np.zeros_like(y, dtype=np.uint8)
+    z[y > t] = 255
+    return z
+controlnet_conditioning_scale = 1.0
+prompt = "your prompt, the longer the better, you can describe it as detail as possible"
+negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
+eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
+controlnet = ControlNetModel.from_pretrained(
+    "xinsir/controlnet-scribble-sdxl-1.0",
+    torch_dtype=torch.float16
+)
+# when test with other base model, you need to change the vae also.
+vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    controlnet=controlnet,
+    vae=vae,
+    safety_checker=None,
+    torch_dtype=torch.float16,
+    scheduler=eulera_scheduler,
+)
+# you can use either hed to generate a fake scribble given an image or a sketch image totally draw by yourself
+if random.random() > 0.5:
+  # Method 1
+  # if you use hed, you should provide an image, the image can be real or anime, you extract its hed lines and use it as the scribbles
+  # The detail about hed detect you can refer to https://github.com/lllyasviel/ControlNet/blob/main/gradio_fake_scribble2image.py
+  # Below is a example using diffusers HED detector
+  # image_path = Image.open("your image path, the image can be real or anime, HED detector will extract its edge boundery")
+  image_path = cv2.imread("your image path, the image can be real or anime, HED detector will extract its edge boundery")
+  processor = HEDdetector.from_pretrained('lllyasviel/Annotators')
+  controlnet_img = processor(image_path, scribble=False)
+  controlnet_img.save("a hed detect path for an image")
+  # following is some processing to simulate human sketch draw, different threshold can generate different width of lines
+  controlnet_img = np.array(controlnet_img)
+  controlnet_img = nms(controlnet_img, 127, 3)
+  controlnet_img = cv2.GaussianBlur(controlnet_img, (0, 0), 3)
+  # higher threshold, thiner line
+  random_val = int(round(random.uniform(0.01, 0.10), 2) * 255)
+  controlnet_img[controlnet_img > random_val] = 255
+  controlnet_img[controlnet_img < 255] = 0
+  controlnet_img = Image.fromarray(controlnet_img)
+else:
+  # Method 2
+  # if you use a sketch image total draw by yourself
+  control_path = "the sketch image you draw with some tools, like drawing board, the path you save it"
+  controlnet_img = Image.open(control_path) # Note that the image must be black-white(0 or 255), like the examples we list
+# must resize to 1024*1024 or same resolution bucket to get the best performance
+width, height  = controlnet_img.size
+ratio = np.sqrt(1024. * 1024. / (width * height))
+new_width, new_height = int(width * ratio), int(height * ratio)
+controlnet_img = controlnet_img.resize((new_width, new_height))
+images = pipe(
+    prompt,
+    negative_prompt=negative_prompt,
+    image=controlnet_img,
+    controlnet_conditioning_scale=controlnet_conditioning_scale,
+    width=new_width,
+    height=new_height,
+    num_inference_steps=30,
+    ).images
+images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")
+```
+## Evaluation Data
+The test data is randomly sample from midjourney upscale images with prompts, as the purpose of the project is to letting people draw images like midjourney. midjourney’s users include a large number of professional designers,
+and the upscale image tend to have more beauty score and prompt consistency, it is suitable to use it as the test set to judge the ability of controlnet. We select 300 prompt-image pairs randomly and generate 4 images per prompt,
+totally 1200 images generated. We caculate the Laion Aesthetic Score to measure the beauty and the PerceptualSimilarity to measure the control ability, we find the quality of images have a good consistency with the meric values.
+We compare our methods with other SOTA huggingface models and list the result below. We are the models that have highest aesthectic score, and can generate visually appealing images if you prompt it properly.
+Note: The condition image are generated using HED detector and random threshold to generate different kinds of lines.
+## Quantitative Result
+| metric | xinsir/controlnet-scribble-sdxl-1.0 |
+|-------|-------|
+| laion_aesthetic |  **6.03**   |
+| perceptual similarity | 0.5701 |
+laion_aesthetic(the higher the better)
+perceptual similarity(the lower the better)
+Note: The values are caculated when save in webp format, when save in png the aesthetic values will increase 0.1-0.3, but the relative relation remains unchanged.
+### Conclusion
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+In our evaluation, the model can generate visually appealing images using simple sketch and simple prompt. This model can support any type of lines and any width of lines, using thick line will give a coarse control
+which obey the prompt your write more, and using thick line will give a strong control which obey the condition image more. The model can help you complish the drawing from coarse to fine, the model achieves higher
+aesthetic score than xinsir/controlnet-canny-sdxl-1.0, but the control ability will decrease a bit because of thick line.

config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_class_name": "ControlNetModel",
+  "_diffusers_version": "0.20.0.dev0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "class_embed_type": null,
+  "conditioning_channels": 3,
+  "conditioning_embedding_out_channels": [
+    16,
+    32,
+    96,
+    256
+  ],
+  "controlnet_conditioning_channel_order": "rgb",
+  "cross_attention_dim": 2048,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "global_pool_conditions": false,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_time_scale_shift": "default",
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}

diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3e4ac47bc814019d50dc842f579301440deb6d8f09ee1b91a30f527ace1b852
+size 2502139104

masonry.webp ADDED Viewed

Git LFS Details

SHA256: 386e8b5f429c6e3066924d4c91ba7c702d78e2108497dd53d25480168abdbd43
Pointer size: 132 Bytes
Size of remote file: 3.4 MB

masonry2.webp ADDED Viewed

Git LFS Details

SHA256: e54b5a7b245ebf03c0f2be056decd958c47d4e1351bd209675302a4d15021013
Pointer size: 132 Bytes
Size of remote file: 1.91 MB