Spaces:

yingzhac
/

instruct_pix2pix

Running on Zero

App Files Files Community

yingzhac commited on May 19

Commit

eaa2696

1 Parent(s): f0c0f38

Update to use timbrooks/instruct-pix2pix model

Browse files

Files changed (4) hide show

.gitignore +79 -0
README.md +42 -1
app.py +38 -38
requirements.txt +8 -6

.gitignore ADDED Viewed

	@@ -0,0 +1,79 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Jupyter Notebook
+.ipynb_checkpoints
+# Virtual environments
+venv/
+env/
+ENV/
+.env
+# Model files and large binaries
+*.bin
+*.pt
+*.pth
+*.onnx
+*.ckpt
+*.safetensors
+# Logs and outputs
+logs/
+runs/
+outputs/
+# OS specific
+.DS_Store
+Thumbs.db
+# PyCharm
+.idea/
+# VS Code
+.vscode/

README.md CHANGED Viewed

@@ -11,4 +11,45 @@ license: mit
 short_description: sdxl_refiner
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 short_description: sdxl_refiner
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# InstructPix2Pix Application
+This application allows you to edit images using natural language instructions powered by the [InstructPix2Pix](https://github.com/timothybrooks/instruct-pix2pix) model.
+## Setup
+1. Install the required dependencies:
+```bash
+pip install -r requirements.txt
+```
+2. Run the application:
+```bash
+python app.py
+```
+## Usage
+1. Upload an image or use one of the examples
+2. Enter an instruction for how you want to edit the image (e.g., "Make it look like winter", "Turn the sky into a sunset")
+3. Click "Run" to generate the edited image
+4. Adjust settings in the "Advanced Settings" section for more control:
+   - Image guidance scale: Controls how closely the output follows the input image structure
+   - Guidance scale: Controls how closely the output follows your text instruction
+   - Number of inference steps: Higher values provide better quality but take longer
+## Examples of Instructions
+- "Turn the sky into a sunset"
+- "Make it look like winter"
+- "Turn him into a cyborg"
+- "Make it look like a painting"
+- "Add rain to the scene"
+- "Make it look like night time"
+## Technical Details
+This app uses the [timbrooks/instruct-pix2pix](https://huggingface.co/timbrooks/instruct-pix2pix) model from Hugging Face with the Diffusers library. The model was designed to edit images based on natural language instructions.

app.py CHANGED Viewed

@@ -3,25 +3,25 @@ import numpy as np
 import random
 import spaces
-from diffusers import StableDiffusionXLImg2ImgPipeline
-from diffusers.utils import load_image
 import torch
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model_repo_id = "stabilityai/stable-diffusion-xl-refiner-1.0"
 if torch.cuda.is_available():
     torch_dtype = torch.float16
 else:
     torch_dtype = torch.float32
-pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
     model_repo_id,
     torch_dtype=torch_dtype,
-    variant="fp16" if torch.cuda.is_available() else None,
-    use_safetensors=True
 )
 pipe = pipe.to(device)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
@@ -33,7 +33,7 @@ def infer(
     negative_prompt,
     seed,
     randomize_seed,
-    strength,
     guidance_scale,
     num_inference_steps,
     progress=gr.Progress(track_tqdm=True),
@@ -61,8 +61,8 @@ def infer(
         image=input_image,
         negative_prompt=negative_prompt,
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
-        strength=strength,
         generator=generator,
     ).images[0]
@@ -70,9 +70,9 @@ def infer(
 examples = [
-    ["Astronaut in a jungle, cold color palette, muted colors, detailed, 8k", "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"],
-    ["An astronaut riding a green horse", "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"],
-    ["A delicious ceviche cheesecake slice", "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"],
 ]
 css = """
@@ -84,7 +84,7 @@ css = """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # SDXL Refiner - Image-to-Image")
         with gr.Row():
             with gr.Column(scale=1):
@@ -97,8 +97,8 @@ with gr.Blocks(css=css) as demo:
                 result = gr.Image(label="Result", height=400)
         prompt = gr.Text(
-            label="Prompt",
-            placeholder="Enter your prompt",
         )
         run_button = gr.Button("Run", variant="primary")
@@ -110,13 +110,22 @@ with gr.Blocks(css=css) as demo:
                 placeholder="Enter a negative prompt",
             )
-            strength = gr.Slider(
-                label="Strength",
-                minimum=0.0,
-                maximum=1.0,
-                step=0.05,
-                value=0.7,
-            )
             seed = gr.Slider(
                 label="Seed",
@@ -128,22 +137,13 @@ with gr.Blocks(css=css) as demo:
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=1.0,
-                    maximum=20.0,
-                    step=0.1,
-                    value=7.5,
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=100,
-                    step=1,
-                    value=30,
-                )
         gr.Examples(
             examples=examples,
@@ -162,7 +162,7 @@ with gr.Blocks(css=css) as demo:
             negative_prompt,
             seed,
             randomize_seed,
-            strength,
             guidance_scale,
             num_inference_steps,
         ],

 import random
 import spaces
 import torch
+from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
+from diffusers.utils import load_image
 device = "cuda" if torch.cuda.is_available() else "cpu"
+model_repo_id = "timbrooks/instruct-pix2pix"
 if torch.cuda.is_available():
     torch_dtype = torch.float16
 else:
     torch_dtype = torch.float32
+pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
     model_repo_id,
     torch_dtype=torch_dtype,
+    safety_checker=None
 )
 pipe = pipe.to(device)
+pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
     negative_prompt,
     seed,
     randomize_seed,
+    image_guidance_scale,
     guidance_scale,
     num_inference_steps,
     progress=gr.Progress(track_tqdm=True),
         image=input_image,
         negative_prompt=negative_prompt,
         guidance_scale=guidance_scale,
+        image_guidance_scale=image_guidance_scale,
         num_inference_steps=num_inference_steps,
         generator=generator,
     ).images[0]
 examples = [
+    ["Turn the sky into a sunset", "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"],
+    ["Turn him into a cyborg", "https://raw.githubusercontent.com/timothybrooks/instruct-pix2pix/main/imgs/example.jpg"],
+    ["Make it look like winter", "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"],
 ]
 css = """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(" # InstructPix2Pix - Image Editing")
         with gr.Row():
             with gr.Column(scale=1):
                 result = gr.Image(label="Result", height=400)
         prompt = gr.Text(
+            label="Instruction",
+            placeholder="Enter your instruction (e.g., 'turn the sky into a sunset')",
         )
         run_button = gr.Button("Run", variant="primary")
                 placeholder="Enter a negative prompt",
             )
+            with gr.Row():
+                image_guidance_scale = gr.Slider(
+                    label="Image guidance scale",
+                    minimum=0.0,
+                    maximum=5.0,
+                    step=0.1,
+                    value=1.0,
+                )
+                guidance_scale = gr.Slider(
+                    label="Guidance scale",
+                    minimum=1.0,
+                    maximum=20.0,
+                    step=0.1,
+                    value=7.5,
+                )
             seed = gr.Slider(
                 label="Seed",
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+            num_inference_steps = gr.Slider(
+                label="Number of inference steps",
+                minimum=1,
+                maximum=100,
+                step=1,
+                value=20,
+            )
         gr.Examples(
             examples=examples,
             negative_prompt,
             seed,
             randomize_seed,
+            image_guidance_scale,
             guidance_scale,
             num_inference_steps,
         ],

requirements.txt CHANGED Viewed

@@ -1,6 +1,8 @@
-accelerate
-diffusers
-invisible_watermark
-torch
-transformers
-xformers

+torch>=2.0.0
+diffusers>=0.21.0
+transformers>=4.31.0
+accelerate>=0.21.0
+gradio>=3.50.0
+numpy>=1.24.0
+Pillow>=10.0.0
+safetensors>=0.3.2