cb1cyf commited on
Commit
b59ff27
·
1 Parent(s): 40130b0

initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. .gitignore +235 -0
  3. README.md +1 -1
  4. app.py +485 -0
  5. assets/examples/OmniGen2/0/config.json +12 -0
  6. assets/examples/OmniGen2/0/ref.png +3 -0
  7. assets/examples/OmniGen2/0/result.png +3 -0
  8. assets/examples/OmniGen2/0/result_omnigen2.png +3 -0
  9. assets/examples/OmniGen2/1/config.json +12 -0
  10. assets/examples/OmniGen2/1/ref.webp +3 -0
  11. assets/examples/OmniGen2/1/result.png +3 -0
  12. assets/examples/OmniGen2/1/result_omnigen2.png +3 -0
  13. assets/examples/OmniGen2/10/config.json +12 -0
  14. assets/examples/OmniGen2/10/ref.JPG +0 -0
  15. assets/examples/OmniGen2/10/result.png +3 -0
  16. assets/examples/OmniGen2/10/result_omnigen2.png +3 -0
  17. assets/examples/OmniGen2/2/config.json +12 -0
  18. assets/examples/OmniGen2/2/ref.png +3 -0
  19. assets/examples/OmniGen2/2/result.png +3 -0
  20. assets/examples/OmniGen2/2/result_omnigen2.png +3 -0
  21. assets/examples/OmniGen2/3/config.json +12 -0
  22. assets/examples/OmniGen2/3/ref.png +3 -0
  23. assets/examples/OmniGen2/3/result.png +3 -0
  24. assets/examples/OmniGen2/3/result_omnigen2.png +3 -0
  25. assets/examples/OmniGen2/4/config.json +12 -0
  26. assets/examples/OmniGen2/4/ref_1.jpg +3 -0
  27. assets/examples/OmniGen2/4/ref_2.png +3 -0
  28. assets/examples/OmniGen2/4/result.png +3 -0
  29. assets/examples/OmniGen2/4/result_omnigen2.png +3 -0
  30. assets/examples/OmniGen2/5/config.json +12 -0
  31. assets/examples/OmniGen2/5/ref_1.jpg +3 -0
  32. assets/examples/OmniGen2/5/ref_2.png +3 -0
  33. assets/examples/OmniGen2/5/result.png +3 -0
  34. assets/examples/OmniGen2/5/result_omnigen2.png +3 -0
  35. assets/examples/OmniGen2/6/config.json +12 -0
  36. assets/examples/OmniGen2/6/ref_1.webp +3 -0
  37. assets/examples/OmniGen2/6/ref_2.webp +3 -0
  38. assets/examples/OmniGen2/6/result.png +3 -0
  39. assets/examples/OmniGen2/6/result_omnigen2.png +3 -0
  40. assets/examples/OmniGen2/7/config.json +12 -0
  41. assets/examples/OmniGen2/7/ref_1.webp +3 -0
  42. assets/examples/OmniGen2/7/ref_2.webp +3 -0
  43. assets/examples/OmniGen2/7/result.png +3 -0
  44. assets/examples/OmniGen2/7/result_omnigen2.png +3 -0
  45. assets/examples/OmniGen2/8/config.json +12 -0
  46. assets/examples/OmniGen2/8/ref_1.jpg +3 -0
  47. assets/examples/OmniGen2/8/ref_2.jpg +3 -0
  48. assets/examples/OmniGen2/8/ref_3.jpg +3 -0
  49. assets/examples/OmniGen2/8/result.png +3 -0
  50. assets/examples/OmniGen2/8/result_omnigen2.png +3 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
37
+ *.jpg filter=lfs diff=lfs merge=lfs -text
38
+ *.webp filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by https://www.toptal.com/developers/gitignore/api/macos,python
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=macos,python
3
+
4
+ ### macOS ###
5
+ # General
6
+ .DS_Store
7
+ .AppleDouble
8
+ .LSOverride
9
+
10
+ # Icon must end with two \r
11
+ Icon
12
+
13
+
14
+ # Thumbnails
15
+ ._*
16
+
17
+ # Files that might appear in the root of a volume
18
+ .DocumentRevisions-V100
19
+ .fseventsd
20
+ .Spotlight-V100
21
+ .TemporaryItems
22
+ .Trashes
23
+ .VolumeIcon.icns
24
+ .com.apple.timemachine.donotpresent
25
+
26
+ # Directories potentially created on remote AFP share
27
+ .AppleDB
28
+ .AppleDesktop
29
+ Network Trash Folder
30
+ Temporary Items
31
+ .apdisk
32
+
33
+ ### macOS Patch ###
34
+ # iCloud generated files
35
+ *.icloud
36
+
37
+ ### Python ###
38
+ # Byte-compiled / optimized / DLL files
39
+ __pycache__/
40
+ *.py[cod]
41
+ *$py.class
42
+
43
+ # C extensions
44
+ *.so
45
+
46
+ # Distribution / packaging
47
+ .Python
48
+ build/
49
+ develop-eggs/
50
+ dist/
51
+ downloads/
52
+ eggs/
53
+ .eggs/
54
+ lib/
55
+ lib64/
56
+ parts/
57
+ sdist/
58
+ var/
59
+ wheels/
60
+ share/python-wheels/
61
+ *.egg-info/
62
+ .installed.cfg
63
+ *.egg
64
+ MANIFEST
65
+
66
+ # PyInstaller
67
+ # Usually these files are written by a python script from a template
68
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
69
+ *.manifest
70
+ *.spec
71
+
72
+ # Installer logs
73
+ pip-log.txt
74
+ pip-delete-this-directory.txt
75
+
76
+ # Unit test / coverage reports
77
+ htmlcov/
78
+ .tox/
79
+ .nox/
80
+ .coverage
81
+ .coverage.*
82
+ .cache
83
+ nosetests.xml
84
+ coverage.xml
85
+ *.cover
86
+ *.py,cover
87
+ .hypothesis/
88
+ .pytest_cache/
89
+ cover/
90
+
91
+ # Translations
92
+ *.mo
93
+ *.pot
94
+
95
+ # Django stuff:
96
+ *.log
97
+ local_settings.py
98
+ db.sqlite3
99
+ db.sqlite3-journal
100
+
101
+ # Flask stuff:
102
+ instance/
103
+ .webassets-cache
104
+
105
+ # Scrapy stuff:
106
+ .scrapy
107
+
108
+ # Sphinx documentation
109
+ docs/_build/
110
+
111
+ # PyBuilder
112
+ .pybuilder/
113
+ target/
114
+
115
+ # Jupyter Notebook
116
+ .ipynb_checkpoints
117
+
118
+ # IPython
119
+ profile_default/
120
+ ipython_config.py
121
+
122
+ # pyenv
123
+ # For a library or package, you might want to ignore these files since the code is
124
+ # intended to run in multiple environments; otherwise, check them in:
125
+ # .python-version
126
+
127
+ # pipenv
128
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
129
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
130
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
131
+ # install all needed dependencies.
132
+ #Pipfile.lock
133
+
134
+ # poetry
135
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
136
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
137
+ # commonly ignored for libraries.
138
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
139
+ #poetry.lock
140
+
141
+ # pdm
142
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
143
+ #pdm.lock
144
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
145
+ # in version control.
146
+ # https://pdm.fming.dev/#use-with-ide
147
+ .pdm.toml
148
+
149
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
150
+ __pypackages__/
151
+
152
+ # Celery stuff
153
+ celerybeat-schedule
154
+ celerybeat.pid
155
+
156
+ # SageMath parsed files
157
+ *.sage.py
158
+
159
+ # Environments
160
+ .env
161
+ .venv
162
+ env/
163
+ venv/
164
+ ENV/
165
+ env.bak/
166
+ venv.bak/
167
+
168
+ # Spyder project settings
169
+ .spyderproject
170
+ .spyproject
171
+
172
+ # Rope project settings
173
+ .ropeproject
174
+
175
+ # mkdocs documentation
176
+ /site
177
+
178
+ # mypy
179
+ .mypy_cache/
180
+ .dmypy.json
181
+ dmypy.json
182
+
183
+ # Pyre type checker
184
+ .pyre/
185
+
186
+ # pytype static type analyzer
187
+ .pytype/
188
+
189
+ # Cython debug symbols
190
+ cython_debug/
191
+
192
+ # PyCharm
193
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
194
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
195
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
196
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
197
+ #.idea/
198
+
199
+ ### Python Patch ###
200
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
201
+ poetry.toml
202
+
203
+ # ruff
204
+ .ruff_cache/
205
+
206
+ # LSP config files
207
+ pyrightconfig.json
208
+
209
+ # End of https://www.toptal.com/developers/gitignore/api/macos,python
210
+
211
+ local_scripts/
212
+
213
+ omnigen2/utils/vpn_utils.py
214
+
215
+ test_tokenizer.py
216
+ save_pipeline.py
217
+ app.sh
218
+ logs/
219
+ results/
220
+ test_jsonl*
221
+ pbs_files/
222
+ convert_ckpt_to_pipeline.py
223
+ inference_test_efficiency.py
224
+ upload_pipeline*
225
+ example_images_resized/
226
+ example_t2i_test_efficiency*.sh
227
+ example_edit_test_efficiency*.sh
228
+ example_in_context_generation_test_efficiency*.sh
229
+ intro*
230
+ resize_example_images.py
231
+ save_pipeline.py
232
+ outputs_gradio/*
233
+ test.py
234
+ data_configs/test
235
+ scripts/test
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: UMO OmniGen2
3
- emoji: 📊
4
  colorFrom: green
5
  colorTo: purple
6
  sdk: gradio
 
1
  ---
2
  title: UMO OmniGen2
3
+ emoji: ⚡️
4
  colorFrom: green
5
  colorTo: purple
6
  sdk: gradio
app.py ADDED
@@ -0,0 +1,485 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2025 Bytedance Ltd. and/or its affiliates. All rights reserved.
2
+ # Copyright (c) VectorSpaceLab and its affiliates. All rights reserved.
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import os
17
+ import dotenv
18
+ dotenv.load_dotenv(override=True)
19
+
20
+ import gradio as gr
21
+ import spaces
22
+
23
+ import argparse
24
+ import json
25
+ import random
26
+ from datetime import datetime
27
+ from glob import glob
28
+ from typing import Literal
29
+
30
+ import torch
31
+ from torchvision.transforms.functional import to_pil_image, to_tensor
32
+
33
+ from accelerate import Accelerator
34
+ from peft import LoraConfig
35
+ from safetensors.torch import load_file
36
+
37
+ from omnigen2.pipelines.omnigen2.pipeline_omnigen2 import OmniGen2Pipeline
38
+ from omnigen2.models.transformers.transformer_omnigen2 import OmniGen2Transformer2DModel
39
+ from omnigen2.schedulers.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
40
+ from omnigen2.schedulers.scheduling_dpmsolver_multistep import DPMSolverMultistepScheduler
41
+ from omnigen2.utils.img_util import create_collage
42
+
43
+ NEGATIVE_PROMPT = "(((deformed))), blurry, over saturation, bad anatomy, disfigured, poorly drawn face, mutation, mutated, (extra_limb), (ugly), (poorly drawn hands), fused fingers, messy drawing, broken legs censor, censored, censor_bar"
44
+ ROOT_DIR = "projects/OmniGen2"
45
+ SAVE_DIR = "output/gradio"
46
+
47
+ pipeline = None
48
+ accelerator = None
49
+ save_images = False
50
+
51
+ def load_pipeline(accelerator, weight_dtype, args):
52
+ pipeline = OmniGen2Pipeline.from_pretrained(
53
+ args.model_path,
54
+ torch_dtype=weight_dtype,
55
+ trust_remote_code=True,
56
+ )
57
+ pipeline.transformer = OmniGen2Transformer2DModel.from_pretrained(
58
+ args.model_path,
59
+ subfolder="transformer",
60
+ torch_dtype=weight_dtype,
61
+ )
62
+ if args.lora_path is not None:
63
+ target_modules = ["to_k", "to_q", "to_v", "to_out.0"]
64
+ lora_config = LoraConfig(
65
+ r=512,
66
+ lora_alpha=512,
67
+ lora_dropout=0,
68
+ init_lora_weights="gaussian",
69
+ target_modules=target_modules,
70
+ )
71
+ pipeline.transformer.add_adapter(lora_config)
72
+ lora_state_dict = load_file(args.lora_path, device=accelerator.device.__str__())
73
+ pipeline.transformer.load_state_dict(lora_state_dict, strict=False)
74
+ pipeline.transformer.fuse_lora(lora_scale=1, safe_fusing=False, adapter_names=["default"])
75
+ pipeline.transformer.unload_lora()
76
+ if args.enable_sequential_cpu_offload:
77
+ pipeline.enable_sequential_cpu_offload()
78
+ elif args.enable_model_cpu_offload:
79
+ pipeline.enable_model_cpu_offload()
80
+ else:
81
+ pipeline = pipeline.to(accelerator.device)
82
+ return pipeline
83
+
84
+ @spaces.GPU(duration=120)
85
+ def run(
86
+ instruction,
87
+ width_input,
88
+ height_input,
89
+ image_input_1,
90
+ image_input_2,
91
+ image_input_3,
92
+ scheduler: Literal["euler", "dpmsolver++"] = "euler",
93
+ num_inference_steps: int = 50,
94
+ negative_prompt: str = NEGATIVE_PROMPT,
95
+ guidance_scale_input: float = 5.0,
96
+ img_guidance_scale_input: float = 2.0,
97
+ cfg_range_start: float = 0.0,
98
+ cfg_range_end: float = 1.0,
99
+ num_images_per_prompt: int = 1,
100
+ max_input_image_side_length: int = 2048,
101
+ max_pixels: int = 1024 * 1024,
102
+ seed_input: int = -1,
103
+ align_res: bool = True,
104
+ progress=gr.Progress(),
105
+ ):
106
+ input_images = [image_input_1, image_input_2, image_input_3]
107
+ input_images = [img for img in input_images if img is not None]
108
+
109
+ if len(input_images) == 0:
110
+ input_images = None
111
+
112
+ if seed_input == -1:
113
+ seed_input = random.randint(0, 2**16 - 1)
114
+
115
+ generator = torch.Generator(device=accelerator.device).manual_seed(seed_input)
116
+
117
+ def progress_callback(cur_step, timesteps):
118
+ frac = (cur_step + 1) / float(timesteps)
119
+ progress(frac)
120
+
121
+ if scheduler == 'euler':
122
+ pipeline.scheduler = FlowMatchEulerDiscreteScheduler()
123
+ elif scheduler == 'dpmsolver++':
124
+ pipeline.scheduler = DPMSolverMultistepScheduler(
125
+ algorithm_type="dpmsolver++",
126
+ solver_type="midpoint",
127
+ solver_order=2,
128
+ prediction_type="flow_prediction",
129
+ )
130
+
131
+ results = pipeline(
132
+ prompt=instruction,
133
+ input_images=input_images,
134
+ width=width_input,
135
+ height=height_input,
136
+ align_res=align_res,
137
+ max_input_image_side_length=max_input_image_side_length,
138
+ max_pixels=max_pixels,
139
+ num_inference_steps=num_inference_steps,
140
+ max_sequence_length=1024,
141
+ text_guidance_scale=guidance_scale_input,
142
+ image_guidance_scale=img_guidance_scale_input,
143
+ cfg_range=(cfg_range_start, cfg_range_end),
144
+ negative_prompt=negative_prompt,
145
+ num_images_per_prompt=num_images_per_prompt,
146
+ generator=generator,
147
+ output_type="pil",
148
+ step_func=progress_callback,
149
+ )
150
+
151
+ progress(1.0)
152
+
153
+ vis_images = [to_tensor(image) * 2 - 1 for image in results.images]
154
+ output_image = create_collage(vis_images)
155
+
156
+ if save_images:
157
+ # Create outputs directory if it doesn't exist
158
+ output_dir = SAVE_DIR
159
+ os.makedirs(output_dir, exist_ok=True)
160
+
161
+ # Generate unique filename with timestamp
162
+ timestamp = datetime.now().strftime("%Y_%m_%d-%H_%M_%S")
163
+
164
+ # Generate unique filename with timestamp
165
+ output_path = os.path.join(output_dir, f"{timestamp}_seed{seed_input}_{instruction[:20]}.png")
166
+ # Save the image
167
+ output_image.save(output_path)
168
+
169
+ # Save All Generated Images
170
+ if len(results.images) > 1:
171
+ for i, image in enumerate(results.images):
172
+ image_name, ext = os.path.splitext(output_path)
173
+ image.save(f"{image_name}_{i}{ext}")
174
+ return output_image
175
+
176
+
177
+ def get_examples(base_dir="assets/examples/OmniGen2"):
178
+ example_keys = ["instruction", "width_input", "height_input", "image_input_1", "image_input_2", "image_input_3", "seed_input", "align_res", "output_image", "output_image_OmniGen2"]
179
+ examples = []
180
+ example_configs = glob(os.path.join(base_dir, "*", "config.json"))
181
+ for config_path in example_configs:
182
+ with open(config_path, "r", encoding="utf-8") as f:
183
+ config = json.load(f)
184
+ _example = [config.get(k, None) for k in example_keys]
185
+ examples.append(_example)
186
+ return examples
187
+
188
+
189
+ with open("assets/logo.svg", "r", encoding="utf-8") as svg_file:
190
+ logo_content = svg_file.read()
191
+ title = f"""
192
+ <div style="display: flex; align-items: center; justify-content: center;">
193
+ <span style="transform: scale(0.7);margin-right: -5px;">{logo_content}</span>
194
+ <span style="font-size: 1.8em;margin-left: -10px;font-weight: bold; font-family: Gill Sans;">UMO (based on OmniGen2) by UXO Team</span>
195
+ </div>
196
+ """.strip()
197
+
198
+ badges_text = r"""
199
+ <div style="text-align: center; display: flex; justify-content: center; gap: 5px;">
200
+ <a href="https://github.com/bytedance/UMO"><img alt="Build" src="https://img.shields.io/github/stars/bytedance/UMO"></a>
201
+ <a href="https://bytedance.github.io/UMO/"><img alt="Build" src="https://img.shields.io/badge/Project%20Page-UMO-yellow"></a>
202
+ <a href="https://arxiv.org/abs/25xx.xxxxx"><img alt="Build" src="https://img.shields.io/badge/arXiv%20paper-UMO-b31b1b.svg"></a>
203
+ <a href="https://huggingface.co/bytedance-research/UMO"><img src="https://img.shields.io/static/v1?label=%F0%9F%A4%97%20Hugging%20Face&message=Model&color=orange"></a>
204
+ <a href="https://huggingface.co/spaces/bytedance-research/UMO-FLUX"><img src="https://img.shields.io/static/v1?label=%F0%9F%A4%97%20Hugging%20Face&message=demo&color=orange"></a>
205
+ </div>
206
+ """.strip()
207
+
208
+ tips = """
209
+ 📌 ***UMO*** is a **U**nified **M**ulti-identity **O**ptimization framework to *boost the multi-ID fidelity and mitigate confusion* for image customization model, and the latest addition to the UXO family (<a href='https://github.com/bytedance/UMO' target='_blank'> UMO</a>, <a href='https://github.com/bytedance/USO' target='_blank'> USO</a> and <a href='https://github.com/bytedance/UNO' target='_blank'> UNO</a>).
210
+
211
+ 🎨 UMO in the demo is trained based on <a href='https://github.com/VectorSpaceLab/OmniGen2' target='_blank'> OmniGen2</a>.
212
+
213
+ 💡 We provide step-by-step instructions in our <a href='https://github.com/bytedance/UMO' target='_blank'> Github Repo</a>. Additionally, try the examples and comparison provided below the demo to quickly get familiar with UMO and spark your creativity!
214
+
215
+ <details>
216
+ <summary style="cursor: pointer; color: #d34c0e; font-weight: 500;"> ⚡️ Tips from the based OmniGen2</summary>
217
+
218
+ - Image Quality: Use high-resolution images (**at least 512x512 recommended**).
219
+ - Be Specific: Instead of "Add bird to desk", try "Add the bird from image 1 to the desk in image 2".
220
+ - Use English: English prompts currently yield better results.
221
+ - Increase image_guidance_scale for better consistency with the reference image:
222
+ - Image Editing: 1.3 - 2.0
223
+ - In-context Generation: 2.0 - 3.0
224
+ - For in-context edit (edit based multiple images), we recommend using the following prompt format: "Edit the first image: add/replace (the [object] with) the [object] from the second image. [descripton for your target image]."
225
+ - For example: "Edit the first image: add the man from the second image. The man is talking with a woman in the kitchen"
226
+ """.strip()
227
+
228
+ article = """
229
+ ```bibtex
230
+ @article{cheng2025umo,
231
+ title={UMO: Scaling Multi-Identity Consistency for Image Customization via Matching Reward},
232
+ author={Cheng, Yufeng and Wu, Wenxu and Wu, Shaojin and Huang, Mengqi and Ding, Fei and He, Qian},
233
+ journal={arXiv preprint arXiv:25xx.xxxxx},
234
+ year={2025}
235
+ }
236
+ ```
237
+ """.strip()
238
+
239
+ star = f"""
240
+ If UMO is helpful, please help to ⭐ our <a href='https://github.com/bytedance/UMO' target='_blank'> Github Repo</a> or cite our paper. Thanks a lot!
241
+ {article}
242
+ """
243
+
244
+
245
+ def main(args):
246
+
247
+ # Gradio
248
+ with gr.Blocks() as demo:
249
+ gr.Markdown(title)
250
+ gr.Markdown(badges_text)
251
+ gr.Markdown(tips)
252
+
253
+ with gr.Row():
254
+ with gr.Column():
255
+ # text prompt
256
+ instruction = gr.Textbox(
257
+ label='Enter your prompt',
258
+ info='Use "first/second image" or “第一张图/第二张图” as reference.',
259
+ placeholder="Type your prompt here...",
260
+ )
261
+
262
+ with gr.Row(equal_height=True):
263
+ # input images
264
+ image_input_1 = gr.Image(label="First Image", type="pil")
265
+ image_input_2 = gr.Image(label="Second Image", type="pil")
266
+ image_input_3 = gr.Image(label="Third Image", type="pil")
267
+
268
+ generate_button = gr.Button("Generate Image")
269
+
270
+ negative_prompt = gr.Textbox(
271
+ label="Enter your negative prompt",
272
+ placeholder="Type your negative prompt here...",
273
+ value=NEGATIVE_PROMPT,
274
+ )
275
+
276
+ # slider
277
+ with gr.Row(equal_height=True):
278
+ height_input = gr.Slider(
279
+ label="Height", minimum=256, maximum=2048, value=1024, step=128
280
+ )
281
+ width_input = gr.Slider(
282
+ label="Width", minimum=256, maximum=2048, value=1024, step=128
283
+ )
284
+
285
+ with gr.Accordion("Advanced Options", open=False):
286
+ with gr.Row(equal_height=True):
287
+ align_res = gr.Checkbox(
288
+ label="Align Resolution",
289
+ info="Align output's resolution with the first reference image. Only valid when there is only one reference image.",
290
+ value=True
291
+ )
292
+ with gr.Row(equal_height=True):
293
+ text_guidance_scale_input = gr.Slider(
294
+ label="Text Guidance Scale",
295
+ minimum=1.0,
296
+ maximum=8.0,
297
+ value=5.0,
298
+ step=0.1,
299
+ )
300
+
301
+ image_guidance_scale_input = gr.Slider(
302
+ label="Image Guidance Scale",
303
+ minimum=1.0,
304
+ maximum=3.0,
305
+ value=2.0,
306
+ step=0.1,
307
+ )
308
+ with gr.Row(equal_height=True):
309
+ cfg_range_start = gr.Slider(
310
+ label="CFG Range Start",
311
+ minimum=0.0,
312
+ maximum=1.0,
313
+ value=0.0,
314
+ step=0.1,
315
+ )
316
+
317
+ cfg_range_end = gr.Slider(
318
+ label="CFG Range End",
319
+ minimum=0.0,
320
+ maximum=1.0,
321
+ value=1.0,
322
+ step=0.1,
323
+ )
324
+
325
+ def adjust_end_slider(start_val, end_val):
326
+ return max(start_val, end_val)
327
+
328
+ def adjust_start_slider(end_val, start_val):
329
+ return min(end_val, start_val)
330
+
331
+ cfg_range_start.input(
332
+ fn=adjust_end_slider,
333
+ inputs=[cfg_range_start, cfg_range_end],
334
+ outputs=[cfg_range_end]
335
+ )
336
+
337
+ cfg_range_end.input(
338
+ fn=adjust_start_slider,
339
+ inputs=[cfg_range_end, cfg_range_start],
340
+ outputs=[cfg_range_start]
341
+ )
342
+
343
+ with gr.Row(equal_height=True):
344
+ scheduler_input = gr.Dropdown(
345
+ label="Scheduler",
346
+ choices=["euler", "dpmsolver++"],
347
+ value="euler",
348
+ info="The scheduler to use for the model.",
349
+ )
350
+
351
+ num_inference_steps = gr.Slider(
352
+ label="Inference Steps", minimum=20, maximum=100, value=50, step=1
353
+ )
354
+ with gr.Row(equal_height=True):
355
+ num_images_per_prompt = gr.Slider(
356
+ label="Number of images per prompt",
357
+ minimum=1,
358
+ maximum=4,
359
+ value=1,
360
+ step=1,
361
+ )
362
+
363
+ seed_input = gr.Slider(
364
+ label="Seed", minimum=-1, maximum=2147483647, value=-1, step=1
365
+ )
366
+ with gr.Row(equal_height=True):
367
+ max_input_image_side_length = gr.Slider(
368
+ label="max_input_image_side_length",
369
+ minimum=256,
370
+ maximum=2048,
371
+ value=2048,
372
+ step=256,
373
+ )
374
+ max_pixels = gr.Slider(
375
+ label="max_pixels",
376
+ minimum=256 * 256,
377
+ maximum=1536 * 1536,
378
+ value=1024 * 1024,
379
+ step=256 * 256,
380
+ )
381
+
382
+ with gr.Column():
383
+ with gr.Column():
384
+ # output image
385
+ output_image = gr.Image(label="Output Image")
386
+ global save_images
387
+ save_images = gr.Checkbox(label="Save generated images", value=True)
388
+ with gr.Accordion("Examples Comparison with OmniGen2", open=False):
389
+ output_image_omnigen2 = gr.Image(label="Generated Image (OmniGen2)")
390
+
391
+ gr.Markdown(star)
392
+
393
+ global accelerator
394
+ global pipeline
395
+
396
+ bf16 = True
397
+ accelerator = Accelerator(mixed_precision="bf16" if bf16 else "no")
398
+ weight_dtype = torch.bfloat16 if bf16 else torch.float32
399
+
400
+ pipeline = load_pipeline(accelerator, weight_dtype, args)
401
+
402
+ # click
403
+ generate_button.click(
404
+ run,
405
+ inputs=[
406
+ instruction,
407
+ width_input,
408
+ height_input,
409
+ image_input_1,
410
+ image_input_2,
411
+ image_input_3,
412
+ scheduler_input,
413
+ num_inference_steps,
414
+ negative_prompt,
415
+ text_guidance_scale_input,
416
+ image_guidance_scale_input,
417
+ cfg_range_start,
418
+ cfg_range_end,
419
+ num_images_per_prompt,
420
+ max_input_image_side_length,
421
+ max_pixels,
422
+ seed_input,
423
+ align_res,
424
+ ],
425
+ outputs=output_image,
426
+ )
427
+
428
+ gr.Examples(
429
+ examples=get_examples("assets/examples/OmniGen2"),
430
+ inputs=[
431
+ instruction,
432
+ width_input,
433
+ height_input,
434
+ image_input_1,
435
+ image_input_2,
436
+ image_input_3,
437
+ seed_input,
438
+ align_res,
439
+ output_image,
440
+ output_image_omnigen2,
441
+ ],
442
+ label="We provide examples for academic research. The vast majority of images used in this demo are either generated or from open-source datasets. If you have any concerns, please contact us, and we will promptly remove any inappropriate content.",
443
+ examples_per_page=15
444
+ )
445
+
446
+ # launch
447
+ demo.launch(share=args.share, server_port=args.port, allowed_paths=[ROOT_DIR], server_name=args.server_name)
448
+
449
+ def parse_args():
450
+ parser = argparse.ArgumentParser()
451
+ parser.add_argument("--share", action="store_true", help="Share the Gradio app")
452
+ parser.add_argument(
453
+ "--port", type=int, default=7860, help="Port to use for the Gradio app"
454
+ )
455
+ parser.add_argument(
456
+ "--server_name", type=str, default=None
457
+ )
458
+ parser.add_argument(
459
+ "--model_path",
460
+ type=str,
461
+ default="OmniGen2/OmniGen2",
462
+ help="Path or HuggingFace name of the model to load."
463
+ )
464
+ parser.add_argument(
465
+ "--enable_model_cpu_offload",
466
+ action="store_true",
467
+ help="Enable model CPU offload."
468
+ )
469
+ parser.add_argument(
470
+ "--enable_sequential_cpu_offload",
471
+ action="store_true",
472
+ help="Enable sequential CPU offload."
473
+ )
474
+ parser.add_argument(
475
+ "--lora_path",
476
+ type=str,
477
+ default=None,
478
+ help="Path to the LoRA checkpoint to load."
479
+ )
480
+ args = parser.parse_args()
481
+ return args
482
+
483
+ if __name__ == "__main__":
484
+ args = parse_args()
485
+ main(args)
assets/examples/OmniGen2/0/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_image": "assets/examples/OmniGen2/0/result.png",
3
+ "instruction": "The woman from the first image is walking on a red carpet.",
4
+ "width_input": 1024,
5
+ "height_input": 1024,
6
+ "image_input_1": "assets/examples/OmniGen2/0/ref.png",
7
+ "image_input_2": null,
8
+ "image_input_3": null,
9
+ "seed_input": 62905,
10
+ "align_res": false,
11
+ "output_image_OmniGen2": "assets/examples/OmniGen2/0/result_omnigen2.png"
12
+ }
assets/examples/OmniGen2/0/ref.png ADDED

Git LFS Details

  • SHA256: 957e5f191a970340963a6ee2d3e7b242ca0e064521a0646b7f0ab49465691459
  • Pointer size: 131 Bytes
  • Size of remote file: 926 kB
assets/examples/OmniGen2/0/result.png ADDED

Git LFS Details

  • SHA256: 533f84a2d9f4e76011850c8f10fcfa41bc3de5b37c0efab227afb1bdc6e828f1
  • Pointer size: 132 Bytes
  • Size of remote file: 1.21 MB
assets/examples/OmniGen2/0/result_omnigen2.png ADDED

Git LFS Details

  • SHA256: 22286675638bc6444e15a5d1806cc189e37b99a00849ae65f71feafa10519585
  • Pointer size: 132 Bytes
  • Size of remote file: 1.31 MB
assets/examples/OmniGen2/1/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_image": "assets/examples/OmniGen2/1/result.png",
3
+ "instruction": "Portrait of the woman from the first image.",
4
+ "width_input": 720,
5
+ "height_input": 1280,
6
+ "image_input_1": "assets/examples/OmniGen2/1/ref.webp",
7
+ "image_input_2": null,
8
+ "image_input_3": null,
9
+ "seed_input": 58519,
10
+ "align_res": false,
11
+ "output_image_OmniGen2": "assets/examples/OmniGen2/1/result_omnigen2.png"
12
+ }
assets/examples/OmniGen2/1/ref.webp ADDED

Git LFS Details

  • SHA256: 2817ce53da228754f930b89ba56a68ffc512752273d9cea2f255b2bdefa55003
  • Pointer size: 130 Bytes
  • Size of remote file: 48.2 kB
assets/examples/OmniGen2/1/result.png ADDED

Git LFS Details

  • SHA256: b896d3e5070ec754d4c53869454bfe4b1ca1694b73040b62dee56d04b51fbcbc
  • Pointer size: 132 Bytes
  • Size of remote file: 1.13 MB
assets/examples/OmniGen2/1/result_omnigen2.png ADDED

Git LFS Details

  • SHA256: 2db916b5548f9d1db0a1f87c8418b81da98d480b31f373a2f59174c181737332
  • Pointer size: 132 Bytes
  • Size of remote file: 1.21 MB
assets/examples/OmniGen2/10/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_image": "assets/examples/OmniGen2/10/result.png",
3
+ "instruction": "Generate a photo of an anime-style figurine placed on a desk. The figurine model should be based on the character photo provided in the attachment, accurately replicating the full-body pose, facial expression, and clothing style of the character in the photo, ensuring the entire figurine is fully presented. The overall design should be exquisite and detailed, soft gradient colors and a delicate texture, leaning towards a Japanese anime style, rich in details, with a realistic quality and beautiful visual appeal.",
4
+ "width_input": 1024,
5
+ "height_input": 1024,
6
+ "image_input_1": "assets/examples/OmniGen2/10/ref.JPG",
7
+ "image_input_2": null,
8
+ "image_input_3": null,
9
+ "seed_input": 55301,
10
+ "align_res": true,
11
+ "output_image_OmniGen2": "assets/examples/OmniGen2/10/result_omnigen2.png"
12
+ }
assets/examples/OmniGen2/10/ref.JPG ADDED
assets/examples/OmniGen2/10/result.png ADDED

Git LFS Details

  • SHA256: fdb495a9d91f724014369b43693ad56fdfce6aaf7aa4cfbd8e444ebe7b458f13
  • Pointer size: 131 Bytes
  • Size of remote file: 981 kB
assets/examples/OmniGen2/10/result_omnigen2.png ADDED

Git LFS Details

  • SHA256: 3d57cc0d55e6ef6edd731902afb24f77c89ac5700712bf7d5c3e88301c56471e
  • Pointer size: 132 Bytes
  • Size of remote file: 1.07 MB
assets/examples/OmniGen2/2/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_image": "assets/examples/OmniGen2/2/result.png",
3
+ "instruction": "Change the man from the first image to be wearing black suit.",
4
+ "width_input": 1024,
5
+ "height_input": 1024,
6
+ "image_input_1": "assets/examples/OmniGen2/2/ref.png",
7
+ "image_input_2": null,
8
+ "image_input_3": null,
9
+ "seed_input": 7028,
10
+ "align_res": true,
11
+ "output_image_OmniGen2": "assets/examples/OmniGen2/2/result_omnigen2.png"
12
+ }
assets/examples/OmniGen2/2/ref.png ADDED

Git LFS Details

  • SHA256: 5971db9ea621c45aa8746a2c29d5d13b927768769abef9a57fcd969bbc612ade
  • Pointer size: 132 Bytes
  • Size of remote file: 1.07 MB
assets/examples/OmniGen2/2/result.png ADDED

Git LFS Details

  • SHA256: 266c15f4f1b203f49c09450096414cb3077f6a83266980c86e6f0825db4b5b3a
  • Pointer size: 131 Bytes
  • Size of remote file: 471 kB
assets/examples/OmniGen2/2/result_omnigen2.png ADDED

Git LFS Details

  • SHA256: 0bb0bd981e5b322f0bde6dd3d5fb6afb0a9404dcf48ce261146d7aad1df39c57
  • Pointer size: 131 Bytes
  • Size of remote file: 441 kB
assets/examples/OmniGen2/3/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_image": "assets/examples/OmniGen2/3/result.png",
3
+ "instruction": "Change the style of the first image to Retro comic style",
4
+ "width_input": 1024,
5
+ "height_input": 1024,
6
+ "image_input_1": "assets/examples/OmniGen2/3/ref.png",
7
+ "image_input_2": null,
8
+ "image_input_3": null,
9
+ "seed_input": 22067,
10
+ "align_res": true,
11
+ "output_image_OmniGen2": "assets/examples/OmniGen2/3/result_omnigen2.png"
12
+ }
assets/examples/OmniGen2/3/ref.png ADDED

Git LFS Details

  • SHA256: 467781b0e8504e472bd2f5032c38d00aa34284757cf7c5bb5946ee4b847d5377
  • Pointer size: 131 Bytes
  • Size of remote file: 871 kB
assets/examples/OmniGen2/3/result.png ADDED

Git LFS Details

  • SHA256: eecb08bedb64745340fea6f952d53aeea80099c825b1f88be4b7aace83ea5544
  • Pointer size: 131 Bytes
  • Size of remote file: 733 kB
assets/examples/OmniGen2/3/result_omnigen2.png ADDED

Git LFS Details

  • SHA256: e030d603bb781b544d4ab8c5d0627a79e2494777b78ee517f64824e8c0c4fe52
  • Pointer size: 131 Bytes
  • Size of remote file: 768 kB
assets/examples/OmniGen2/4/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_image": "assets/examples/OmniGen2/4/result.png",
3
+ "instruction": "The man from the first image and the woman from the second image shake hands and look straight ahead, smiling, with a blackboard written \"UMO\" on it.",
4
+ "width_input": 1024,
5
+ "height_input": 1024,
6
+ "image_input_1": "assets/examples/OmniGen2/4/ref_1.jpg",
7
+ "image_input_2": "assets/examples/OmniGen2/4/ref_2.png",
8
+ "image_input_3": null,
9
+ "seed_input": 34412,
10
+ "align_res": false,
11
+ "output_image_OmniGen2": "assets/examples/OmniGen2/4/result_omnigen2.png"
12
+ }
assets/examples/OmniGen2/4/ref_1.jpg ADDED

Git LFS Details

  • SHA256: 18f353267e1ceabe73c354b47bf482c282132998c32da83b9e0134bf3531c622
  • Pointer size: 131 Bytes
  • Size of remote file: 197 kB
assets/examples/OmniGen2/4/ref_2.png ADDED

Git LFS Details

  • SHA256: 2ae24705eaa3314f7ca32cf69e32f066765d953f26f516da5fe817e456de079d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.05 MB
assets/examples/OmniGen2/4/result.png ADDED

Git LFS Details

  • SHA256: c83ef36efdc74fdefa61d88f0c8aa7cef29202b499246e2b3a19490d96a2035d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.27 MB
assets/examples/OmniGen2/4/result_omnigen2.png ADDED

Git LFS Details

  • SHA256: 470a45b575057ae21e7d5b9cc0d2a06b1d614f08008a90487d853eebd37d43e4
  • Pointer size: 132 Bytes
  • Size of remote file: 1.36 MB
assets/examples/OmniGen2/5/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_image": "assets/examples/OmniGen2/5/result.png",
3
+ "instruction": "make a photo of all these 2 people",
4
+ "width_input": 1024,
5
+ "height_input": 1024,
6
+ "image_input_1": "assets/examples/OmniGen2/5/ref_1.jpg",
7
+ "image_input_2": "assets/examples/OmniGen2/5/ref_2.png",
8
+ "image_input_3": null,
9
+ "seed_input": 0,
10
+ "align_res": false,
11
+ "output_image_OmniGen2": "assets/examples/OmniGen2/5/result_omnigen2.png"
12
+ }
assets/examples/OmniGen2/5/ref_1.jpg ADDED

Git LFS Details

  • SHA256: c713d97605447503e5813bbf4c5ba6f16e5b299e04234d803812b1733855a696
  • Pointer size: 131 Bytes
  • Size of remote file: 121 kB
assets/examples/OmniGen2/5/ref_2.png ADDED

Git LFS Details

  • SHA256: a4f1bc53eea2e133b9798de6376734d7bcd8e5800ab28dd9930e63d0832b5042
  • Pointer size: 131 Bytes
  • Size of remote file: 202 kB
assets/examples/OmniGen2/5/result.png ADDED

Git LFS Details

  • SHA256: 55bee39af377445057cb6e15e3918d72f9d7f3b162244a388cfcd503e657654f
  • Pointer size: 132 Bytes
  • Size of remote file: 1.38 MB
assets/examples/OmniGen2/5/result_omnigen2.png ADDED

Git LFS Details

  • SHA256: dff1b62e9432a125d3bfb347d5b097b8520a6d56e6aafd50d3f98dab35af7260
  • Pointer size: 132 Bytes
  • Size of remote file: 1.46 MB
assets/examples/OmniGen2/6/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_image": "assets/examples/OmniGen2/6/result.png",
3
+ "instruction": "The two people shown in the images are sitting in a theater, watching the screen. One person points at the other person.",
4
+ "width_input": 1024,
5
+ "height_input": 1024,
6
+ "image_input_1": "assets/examples/OmniGen2/6/ref_1.webp",
7
+ "image_input_2": "assets/examples/OmniGen2/6/ref_2.webp",
8
+ "image_input_3": null,
9
+ "seed_input": 37984,
10
+ "align_res": false,
11
+ "output_image_OmniGen2": "assets/examples/OmniGen2/6/result_omnigen2.png"
12
+ }
assets/examples/OmniGen2/6/ref_1.webp ADDED

Git LFS Details

  • SHA256: 7a523a6becadf7f04dd27ce1d596214894560d336a770f050365b96c37bf69cf
  • Pointer size: 130 Bytes
  • Size of remote file: 84.8 kB
assets/examples/OmniGen2/6/ref_2.webp ADDED

Git LFS Details

  • SHA256: 739d7de128f079139fee10514b30a181631066950a38a398784190b8e1c3f9eb
  • Pointer size: 131 Bytes
  • Size of remote file: 281 kB
assets/examples/OmniGen2/6/result.png ADDED

Git LFS Details

  • SHA256: b685e311ad54e60fd7e996847b4d16f392785c197b4ddd8dc766b76621e7874a
  • Pointer size: 132 Bytes
  • Size of remote file: 1.16 MB
assets/examples/OmniGen2/6/result_omnigen2.png ADDED

Git LFS Details

  • SHA256: 3916be3333e04c078a09a1df8ea093dae346acdcc97617b4d030fe73002d91b5
  • Pointer size: 132 Bytes
  • Size of remote file: 1.18 MB
assets/examples/OmniGen2/7/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_image": "assets/examples/OmniGen2/7/result.png",
3
+ "instruction": "The two people shown in the images are drinking wine, their eyes locked in an intense gaze at the camera, seated at an elegantly set table in a dimly lit room. Soft, golden light spills from a nearby chandelier, casting a warm glow over the polished wood surface, while the ambient sound of quiet conversation fills the air.",
4
+ "width_input": 1024,
5
+ "height_input": 1024,
6
+ "image_input_1": "assets/examples/OmniGen2/7/ref_1.webp",
7
+ "image_input_2": "assets/examples/OmniGen2/7/ref_2.webp",
8
+ "image_input_3": null,
9
+ "seed_input": 61593,
10
+ "align_res": false,
11
+ "output_image_OmniGen2": "assets/examples/OmniGen2/7/result_omnigen2.png"
12
+ }
assets/examples/OmniGen2/7/ref_1.webp ADDED

Git LFS Details

  • SHA256: 4978ae9c17cd3fe3aae3bc0ad75cd5e8dd1d0fab89e304e1864eb722a26d292d
  • Pointer size: 130 Bytes
  • Size of remote file: 30.2 kB
assets/examples/OmniGen2/7/ref_2.webp ADDED

Git LFS Details

  • SHA256: 58585deb8283e993b3bb096ce8b7dd57e3ca242c006930b9cedbc6dd4c07aed5
  • Pointer size: 130 Bytes
  • Size of remote file: 38.1 kB
assets/examples/OmniGen2/7/result.png ADDED

Git LFS Details

  • SHA256: d52b814db08b1a747e19231cffcdb72a8e5c069ebc46f1f72cc362411b653041
  • Pointer size: 131 Bytes
  • Size of remote file: 984 kB
assets/examples/OmniGen2/7/result_omnigen2.png ADDED

Git LFS Details

  • SHA256: 27f3a7c0d5575927290acfcd691c78777a38a49fdd6c9819c52f15796d35829b
  • Pointer size: 132 Bytes
  • Size of remote file: 1.02 MB
assets/examples/OmniGen2/8/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_image": "assets/examples/OmniGen2/8/result.png",
3
+ "instruction": "The man from the first image is on the left. The old man with white hair and glasses in the second image is in the middle. The woman from the third image is on the right.",
4
+ "width_input": 1280,
5
+ "height_input": 720,
6
+ "image_input_1": "assets/examples/OmniGen2/8/ref_1.jpg",
7
+ "image_input_2": "assets/examples/OmniGen2/8/ref_2.jpg",
8
+ "image_input_3": "assets/examples/OmniGen2/8/ref_3.jpg",
9
+ "seed_input": 58101,
10
+ "align_res": false,
11
+ "output_image_OmniGen2": "assets/examples/OmniGen2/8/result_omnigen2.png"
12
+ }
assets/examples/OmniGen2/8/ref_1.jpg ADDED

Git LFS Details

  • SHA256: 78c8991b224c8d54dd6043a8cf9a09d5d338048280360fd134c9a764d17f4a08
  • Pointer size: 130 Bytes
  • Size of remote file: 88.9 kB
assets/examples/OmniGen2/8/ref_2.jpg ADDED

Git LFS Details

  • SHA256: 041002bb03f887a1ad87990cedb41a226fab9187db3da60c655bc1e352c969b4
  • Pointer size: 131 Bytes
  • Size of remote file: 198 kB
assets/examples/OmniGen2/8/ref_3.jpg ADDED

Git LFS Details

  • SHA256: e9f9d74fb8c46a6f73cf29709399846235165edaf797663ebb664b5deef903df
  • Pointer size: 130 Bytes
  • Size of remote file: 66.7 kB
assets/examples/OmniGen2/8/result.png ADDED

Git LFS Details

  • SHA256: 31a709e15cd1a35a0c7d7b887b1ecc9e8e77c3f50012b960789d7693f894ca82
  • Pointer size: 132 Bytes
  • Size of remote file: 1.23 MB
assets/examples/OmniGen2/8/result_omnigen2.png ADDED

Git LFS Details

  • SHA256: 067536fcb1a9a18feb295ac24f03ef1458df98f6af5cf760ed5467cc34329e21
  • Pointer size: 132 Bytes
  • Size of remote file: 1.31 MB