yingzhac commited on
Commit
eaa2696
·
1 Parent(s): f0c0f38

Update to use timbrooks/instruct-pix2pix model

Browse files
Files changed (4) hide show
  1. .gitignore +79 -0
  2. README.md +42 -1
  3. app.py +38 -38
  4. requirements.txt +8 -6
.gitignore ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # PyInstaller
28
+ # Usually these files are written by a python script from a template
29
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
30
+ *.manifest
31
+ *.spec
32
+
33
+ # Installer logs
34
+ pip-log.txt
35
+ pip-delete-this-directory.txt
36
+
37
+ # Unit test / coverage reports
38
+ htmlcov/
39
+ .tox/
40
+ .coverage
41
+ .coverage.*
42
+ .cache
43
+ nosetests.xml
44
+ coverage.xml
45
+ *.cover
46
+ .hypothesis/
47
+ .pytest_cache/
48
+
49
+ # Jupyter Notebook
50
+ .ipynb_checkpoints
51
+
52
+ # Virtual environments
53
+ venv/
54
+ env/
55
+ ENV/
56
+ .env
57
+
58
+ # Model files and large binaries
59
+ *.bin
60
+ *.pt
61
+ *.pth
62
+ *.onnx
63
+ *.ckpt
64
+ *.safetensors
65
+
66
+ # Logs and outputs
67
+ logs/
68
+ runs/
69
+ outputs/
70
+
71
+ # OS specific
72
+ .DS_Store
73
+ Thumbs.db
74
+
75
+ # PyCharm
76
+ .idea/
77
+
78
+ # VS Code
79
+ .vscode/
README.md CHANGED
@@ -11,4 +11,45 @@ license: mit
11
  short_description: sdxl_refiner
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  short_description: sdxl_refiner
12
  ---
13
 
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
15
+
16
+ # InstructPix2Pix Application
17
+
18
+ This application allows you to edit images using natural language instructions powered by the [InstructPix2Pix](https://github.com/timothybrooks/instruct-pix2pix) model.
19
+
20
+ ## Setup
21
+
22
+ 1. Install the required dependencies:
23
+
24
+ ```bash
25
+ pip install -r requirements.txt
26
+ ```
27
+
28
+ 2. Run the application:
29
+
30
+ ```bash
31
+ python app.py
32
+ ```
33
+
34
+ ## Usage
35
+
36
+ 1. Upload an image or use one of the examples
37
+ 2. Enter an instruction for how you want to edit the image (e.g., "Make it look like winter", "Turn the sky into a sunset")
38
+ 3. Click "Run" to generate the edited image
39
+ 4. Adjust settings in the "Advanced Settings" section for more control:
40
+ - Image guidance scale: Controls how closely the output follows the input image structure
41
+ - Guidance scale: Controls how closely the output follows your text instruction
42
+ - Number of inference steps: Higher values provide better quality but take longer
43
+
44
+ ## Examples of Instructions
45
+
46
+ - "Turn the sky into a sunset"
47
+ - "Make it look like winter"
48
+ - "Turn him into a cyborg"
49
+ - "Make it look like a painting"
50
+ - "Add rain to the scene"
51
+ - "Make it look like night time"
52
+
53
+ ## Technical Details
54
+
55
+ This app uses the [timbrooks/instruct-pix2pix](https://huggingface.co/timbrooks/instruct-pix2pix) model from Hugging Face with the Diffusers library. The model was designed to edit images based on natural language instructions.
app.py CHANGED
@@ -3,25 +3,25 @@ import numpy as np
3
  import random
4
 
5
  import spaces
6
- from diffusers import StableDiffusionXLImg2ImgPipeline
7
- from diffusers.utils import load_image
8
  import torch
 
 
9
 
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
- model_repo_id = "stabilityai/stable-diffusion-xl-refiner-1.0"
12
 
13
  if torch.cuda.is_available():
14
  torch_dtype = torch.float16
15
  else:
16
  torch_dtype = torch.float32
17
 
18
- pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
19
  model_repo_id,
20
  torch_dtype=torch_dtype,
21
- variant="fp16" if torch.cuda.is_available() else None,
22
- use_safetensors=True
23
  )
24
  pipe = pipe.to(device)
 
25
 
26
  MAX_SEED = np.iinfo(np.int32).max
27
  MAX_IMAGE_SIZE = 1024
@@ -33,7 +33,7 @@ def infer(
33
  negative_prompt,
34
  seed,
35
  randomize_seed,
36
- strength,
37
  guidance_scale,
38
  num_inference_steps,
39
  progress=gr.Progress(track_tqdm=True),
@@ -61,8 +61,8 @@ def infer(
61
  image=input_image,
62
  negative_prompt=negative_prompt,
63
  guidance_scale=guidance_scale,
 
64
  num_inference_steps=num_inference_steps,
65
- strength=strength,
66
  generator=generator,
67
  ).images[0]
68
 
@@ -70,9 +70,9 @@ def infer(
70
 
71
 
72
  examples = [
73
- ["Astronaut in a jungle, cold color palette, muted colors, detailed, 8k", "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"],
74
- ["An astronaut riding a green horse", "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"],
75
- ["A delicious ceviche cheesecake slice", "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"],
76
  ]
77
 
78
  css = """
@@ -84,7 +84,7 @@ css = """
84
 
85
  with gr.Blocks(css=css) as demo:
86
  with gr.Column(elem_id="col-container"):
87
- gr.Markdown(" # SDXL Refiner - Image-to-Image")
88
 
89
  with gr.Row():
90
  with gr.Column(scale=1):
@@ -97,8 +97,8 @@ with gr.Blocks(css=css) as demo:
97
  result = gr.Image(label="Result", height=400)
98
 
99
  prompt = gr.Text(
100
- label="Prompt",
101
- placeholder="Enter your prompt",
102
  )
103
 
104
  run_button = gr.Button("Run", variant="primary")
@@ -110,13 +110,22 @@ with gr.Blocks(css=css) as demo:
110
  placeholder="Enter a negative prompt",
111
  )
112
 
113
- strength = gr.Slider(
114
- label="Strength",
115
- minimum=0.0,
116
- maximum=1.0,
117
- step=0.05,
118
- value=0.7,
119
- )
 
 
 
 
 
 
 
 
 
120
 
121
  seed = gr.Slider(
122
  label="Seed",
@@ -128,22 +137,13 @@ with gr.Blocks(css=css) as demo:
128
 
129
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
130
 
131
- with gr.Row():
132
- guidance_scale = gr.Slider(
133
- label="Guidance scale",
134
- minimum=1.0,
135
- maximum=20.0,
136
- step=0.1,
137
- value=7.5,
138
- )
139
-
140
- num_inference_steps = gr.Slider(
141
- label="Number of inference steps",
142
- minimum=1,
143
- maximum=100,
144
- step=1,
145
- value=30,
146
- )
147
 
148
  gr.Examples(
149
  examples=examples,
@@ -162,7 +162,7 @@ with gr.Blocks(css=css) as demo:
162
  negative_prompt,
163
  seed,
164
  randomize_seed,
165
- strength,
166
  guidance_scale,
167
  num_inference_steps,
168
  ],
 
3
  import random
4
 
5
  import spaces
 
 
6
  import torch
7
+ from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
8
+ from diffusers.utils import load_image
9
 
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ model_repo_id = "timbrooks/instruct-pix2pix"
12
 
13
  if torch.cuda.is_available():
14
  torch_dtype = torch.float16
15
  else:
16
  torch_dtype = torch.float32
17
 
18
+ pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
19
  model_repo_id,
20
  torch_dtype=torch_dtype,
21
+ safety_checker=None
 
22
  )
23
  pipe = pipe.to(device)
24
+ pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
25
 
26
  MAX_SEED = np.iinfo(np.int32).max
27
  MAX_IMAGE_SIZE = 1024
 
33
  negative_prompt,
34
  seed,
35
  randomize_seed,
36
+ image_guidance_scale,
37
  guidance_scale,
38
  num_inference_steps,
39
  progress=gr.Progress(track_tqdm=True),
 
61
  image=input_image,
62
  negative_prompt=negative_prompt,
63
  guidance_scale=guidance_scale,
64
+ image_guidance_scale=image_guidance_scale,
65
  num_inference_steps=num_inference_steps,
 
66
  generator=generator,
67
  ).images[0]
68
 
 
70
 
71
 
72
  examples = [
73
+ ["Turn the sky into a sunset", "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"],
74
+ ["Turn him into a cyborg", "https://raw.githubusercontent.com/timothybrooks/instruct-pix2pix/main/imgs/example.jpg"],
75
+ ["Make it look like winter", "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"],
76
  ]
77
 
78
  css = """
 
84
 
85
  with gr.Blocks(css=css) as demo:
86
  with gr.Column(elem_id="col-container"):
87
+ gr.Markdown(" # InstructPix2Pix - Image Editing")
88
 
89
  with gr.Row():
90
  with gr.Column(scale=1):
 
97
  result = gr.Image(label="Result", height=400)
98
 
99
  prompt = gr.Text(
100
+ label="Instruction",
101
+ placeholder="Enter your instruction (e.g., 'turn the sky into a sunset')",
102
  )
103
 
104
  run_button = gr.Button("Run", variant="primary")
 
110
  placeholder="Enter a negative prompt",
111
  )
112
 
113
+ with gr.Row():
114
+ image_guidance_scale = gr.Slider(
115
+ label="Image guidance scale",
116
+ minimum=0.0,
117
+ maximum=5.0,
118
+ step=0.1,
119
+ value=1.0,
120
+ )
121
+
122
+ guidance_scale = gr.Slider(
123
+ label="Guidance scale",
124
+ minimum=1.0,
125
+ maximum=20.0,
126
+ step=0.1,
127
+ value=7.5,
128
+ )
129
 
130
  seed = gr.Slider(
131
  label="Seed",
 
137
 
138
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
139
 
140
+ num_inference_steps = gr.Slider(
141
+ label="Number of inference steps",
142
+ minimum=1,
143
+ maximum=100,
144
+ step=1,
145
+ value=20,
146
+ )
 
 
 
 
 
 
 
 
 
147
 
148
  gr.Examples(
149
  examples=examples,
 
162
  negative_prompt,
163
  seed,
164
  randomize_seed,
165
+ image_guidance_scale,
166
  guidance_scale,
167
  num_inference_steps,
168
  ],
requirements.txt CHANGED
@@ -1,6 +1,8 @@
1
- accelerate
2
- diffusers
3
- invisible_watermark
4
- torch
5
- transformers
6
- xformers
 
 
 
1
+ torch>=2.0.0
2
+ diffusers>=0.21.0
3
+ transformers>=4.31.0
4
+ accelerate>=0.21.0
5
+ gradio>=3.50.0
6
+ numpy>=1.24.0
7
+ Pillow>=10.0.0
8
+ safetensors>=0.3.2