Spaces:

radames
/

Enhance-This-DemoFusion-SDXL

Runtime error

App Files Files Community

radames commited on Dec 11, 2023

Commit

83718c8

1 Parent(s): 30970e0

use compel for prompt encoding

Browse files

Files changed (2) hide show

app.py +109 -12
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 from gradio_imageslider import ImageSlider
 import torch
 from diffusers import DiffusionPipeline, AutoencoderKL
 from PIL import Image
 from torchvision import transforms
 import tempfile
@@ -30,7 +31,12 @@ pipe = DiffusionPipeline.from_pretrained(
     use_safetensors=True,
     vae=vae,
 )
 pipe = pipe.to(device)
@@ -70,6 +76,11 @@ def predict(
     prompt,
     negative_prompt,
     seed,
     scale=2,
     progress=gr.Progress(track_tqdm=True),
 ):
@@ -77,11 +88,14 @@ def predict(
         raise gr.Error("Please upload an image.")
     padded_image = pad_image(input_image).resize((1024, 1024)).convert("RGB")
     image_lr = load_and_process_image(padded_image).to(device)
     generator = torch.manual_seed(seed)
     last_time = time.time()
     images = pipe(
-        prompt,
-        negative_prompt=negative_prompt,
         image_lr=image_lr,
         width=1024 * scale,
         height=1024 * scale,
@@ -89,11 +103,11 @@ def predict(
         stride=64,
         generator=generator,
         num_inference_steps=40,
-        guidance_scale=8.5,
-        cosine_scale_1=3,
-        cosine_scale_2=1,
-        cosine_scale_3=1,
-        sigma=0.8,
         multi_decoder=1024 * scale > 2048,
         show_image=False,
         lowvram=LOW_MEMORY,
@@ -145,13 +159,48 @@ GPU Time Comparison: T4: ~276s - A10G: ~113.6s A100: ~43.5s RTX 4090: ~48.1s
                 label="Negative Prompt",
                 value="blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
             )
             scale = gr.Slider(
                 minimum=1,
                 maximum=5,
                 value=2,
                 step=1,
                 label="x Scale",
-                interactive=False,
             )
             seed = gr.Slider(
                 minimum=0,
@@ -165,8 +214,19 @@ GPU Time Comparison: T4: ~276s - A10G: ~113.6s A100: ~43.5s RTX 4090: ~48.1s
         with gr.Column(scale=2):
             image_slider = ImageSlider(position=0.5)
             files = gr.Files()
-    # inputs = [image_input, prompt, negative_prompt, seed, scale]
-    inputs = [image_input, prompt, negative_prompt, seed]
     outputs = [image_slider, files]
     btn.click(predict, inputs=inputs, outputs=outputs, concurrency_limit=1)
     gr.Examples(
@@ -177,6 +237,12 @@ GPU Time Comparison: T4: ~276s - A10G: ~113.6s A100: ~43.5s RTX 4090: ~48.1s
                 "photography of lara croft 8k high definition award winning",
                 "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
                 5436236241,
                 2,
             ],
             [
@@ -184,6 +250,12 @@ GPU Time Comparison: T4: ~276s - A10G: ~113.6s A100: ~43.5s RTX 4090: ~48.1s
                 "photo of tesla cybertruck futuristic car 8k high definition on a sand dune in mars, future",
                 "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
                 383472451451,
                 2,
             ],
             [
@@ -191,6 +263,7 @@ GPU Time Comparison: T4: ~276s - A10G: ~113.6s A100: ~43.5s RTX 4090: ~48.1s
                 "a photorealistic painting of Jesus Christ, 4k high definition",
                 "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
                 13317204146129588000,
                 2,
             ],
             [
@@ -198,6 +271,12 @@ GPU Time Comparison: T4: ~276s - A10G: ~113.6s A100: ~43.5s RTX 4090: ~48.1s
                 "A crowded stadium with enthusiastic fans watching a daytime sporting event, the stands filled with colorful attire and the sun casting a warm glow",
                 "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
                 5623124123512,
                 2,
             ],
             [
@@ -205,12 +284,30 @@ GPU Time Comparison: T4: ~276s - A10G: ~113.6s A100: ~43.5s RTX 4090: ~48.1s
                 "a large red flower on a black background 4k high definition",
                 "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
                 23123412341234,
                 2,
             ],
         ],
         inputs=inputs,
         outputs=outputs,
-        cache_examples=True,
     )

 from gradio_imageslider import ImageSlider
 import torch
 from diffusers import DiffusionPipeline, AutoencoderKL
+from compel import Compel, ReturnedEmbeddingsType
 from PIL import Image
 from torchvision import transforms
 import tempfile
     use_safetensors=True,
     vae=vae,
 )
+compel = Compel(
+    tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
+    text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
+    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+    requires_pooled=[False, True],
+)
 pipe = pipe.to(device)
     prompt,
     negative_prompt,
     seed,
+    guidance_scale=8.5,
+    cosine_scale_1=3,
+    cosine_scale_2=1,
+    cosine_scale_3=1,
+    sigma=0.8,
     scale=2,
     progress=gr.Progress(track_tqdm=True),
 ):
         raise gr.Error("Please upload an image.")
     padded_image = pad_image(input_image).resize((1024, 1024)).convert("RGB")
     image_lr = load_and_process_image(padded_image).to(device)
+    conditioning, pooled = compel([prompt, negative_prompt])
     generator = torch.manual_seed(seed)
     last_time = time.time()
     images = pipe(
+        prompt_embeds=conditioning[0:1],
+        pooled_prompt_embeds=pooled[0:1],
+        negative_prompt_embeds=conditioning[1:2],
+        negative_pooled_prompt_embeds=pooled[1:2],
         image_lr=image_lr,
         width=1024 * scale,
         height=1024 * scale,
         stride=64,
         generator=generator,
         num_inference_steps=40,
+        guidance_scale=guidance_scale,
+        cosine_scale_1=cosine_scale_1,
+        cosine_scale_2=cosine_scale_2,
+        cosine_scale_3=cosine_scale_3,
+        sigma=sigma,
         multi_decoder=1024 * scale > 2048,
         show_image=False,
         lowvram=LOW_MEMORY,
                 label="Negative Prompt",
                 value="blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
             )
+            guidance_scale = gr.Slider(
+                minimum=0,
+                maximum=50,
+                value=8.5,
+                step=0.001,
+                label="Guidance Scale",
+            )
             scale = gr.Slider(
                 minimum=1,
                 maximum=5,
                 value=2,
                 step=1,
                 label="x Scale",
+                interactive=True,
+            )
+            cosine_scale_1 = gr.Slider(
+                minimum=0,
+                maximum=5,
+                value=3,
+                step=0.01,
+                label="Cosine Scale 1",
+            )
+            cosine_scale_2 = gr.Slider(
+                minimum=0,
+                maximum=5,
+                value=1,
+                step=0.01,
+                label="Cosine Scale 2",
+            )
+            cosine_scale_3 = gr.Slider(
+                minimum=0,
+                maximum=5,
+                value=1,
+                step=0.01,
+                label="Cosine Scale 3",
+            )
+            sigma = gr.Slider(
+                minimum=0,
+                maximum=1,
+                value=0.8,
+                step=0.01,
+                label="Sigma",
             )
             seed = gr.Slider(
                 minimum=0,
         with gr.Column(scale=2):
             image_slider = ImageSlider(position=0.5)
             files = gr.Files()
+    inputs = [
+        image_input,
+        prompt,
+        negative_prompt,
+        seed,
+        guidance_scale,
+        cosine_scale_1,
+        cosine_scale_2,
+        cosine_scale_3,
+        sigma,
+        scale,
+    ]
+    # inputs = [image_input, prompt, negative_prompt, seed]
     outputs = [image_slider, files]
     btn.click(predict, inputs=inputs, outputs=outputs, concurrency_limit=1)
     gr.Examples(
                 "photography of lara croft 8k high definition award winning",
                 "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
                 5436236241,
+                8.5,
+                3,
+                1,
+                1,
+                1,
+                0.8,
                 2,
             ],
             [
                 "photo of tesla cybertruck futuristic car 8k high definition on a sand dune in mars, future",
                 "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
                 383472451451,
+                8.5,
+                3,
+                1,
+                1,
+                1,
+                0.8,
                 2,
             ],
             [
                 "a photorealistic painting of Jesus Christ, 4k high definition",
                 "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
                 13317204146129588000,
+                8.5,
                 2,
             ],
             [
                 "A crowded stadium with enthusiastic fans watching a daytime sporting event, the stands filled with colorful attire and the sun casting a warm glow",
                 "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
                 5623124123512,
+                8.5,
+                3,
+                1,
+                1,
+                1,
+                0.8,
                 2,
             ],
             [
                 "a large red flower on a black background 4k high definition",
                 "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
                 23123412341234,
+                8.5,
+                3,
+                1,
+                1,
+                1,
+                0.8,
                 2,
             ],
+            [
+                "./examples/huggingface.jpg",
+                "photo realistic huggingface human+++ emoji costume, round, yellow, skin+++ texture+++",
+                "blurry, ugly, duplicate, poorly drawn, deformed, mosaic, emoji cartoon,  drawing, pixelated",
+                5532144938416372000,
+                20.0,
+                4.64,
+                1,
+                1,
+                0.49,
+                3,
+            ],
         ],
         inputs=inputs,
         outputs=outputs,
+        cache_examples=False,
     )

requirements.txt CHANGED Viewed

@@ -10,4 +10,5 @@ accelerate
 invisible-watermark
 huggingface-hub
 hf-transfer
-gradio_imageslider==0.0.16

 invisible-watermark
 huggingface-hub
 hf-transfer
+gradio_imageslider==0.0.16
+compel