Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -182,7 +182,9 @@ def infer(ref_style_file, style_description, caption, progress):
|
|
182 |
lam_style=1, lam_txt_alignment=1.0,
|
183 |
use_ddim_sampler=True,
|
184 |
)
|
185 |
-
for (sampled_c, _, _) in enumerate(
|
|
|
|
|
186 |
sampled_c = sampled_c
|
187 |
|
188 |
progress(0.7, "Starting Stage B reverse process")
|
@@ -195,7 +197,9 @@ def infer(ref_style_file, style_description, caption, progress):
|
|
195 |
models_b.generator, conditions_b, stage_b_latent_shape,
|
196 |
unconditions_b, device=device, **extras_b.sampling_configs,
|
197 |
)
|
198 |
-
for (sampled_b, _, _) in enumerate(
|
|
|
|
|
199 |
sampled_b = sampled_b
|
200 |
sampled = models_b.stage_a.decode(sampled_b).float()
|
201 |
|
@@ -222,7 +226,7 @@ def infer(ref_style_file, style_description, caption, progress):
|
|
222 |
# Clear CUDA cache
|
223 |
torch.cuda.empty_cache()
|
224 |
|
225 |
-
def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
|
226 |
global models_rbm, models_b, device, sam_model
|
227 |
if low_vram:
|
228 |
models_to(models_rbm, device=device)
|
@@ -246,13 +250,15 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
|
|
246 |
extras_b.sampling_configs['timesteps'] = 10
|
247 |
extras_b.sampling_configs['t_start'] = 1.0
|
248 |
|
|
|
249 |
ref_style = resize_image(PIL.Image.open(ref_style_file).convert("RGB")).unsqueeze(0).expand(batch_size, -1, -1, -1).to(device)
|
250 |
ref_images = resize_image(PIL.Image.open(ref_sub_file).convert("RGB")).unsqueeze(0).expand(batch_size, -1, -1, -1).to(device)
|
251 |
|
252 |
batch = {'captions': [caption] * batch_size}
|
253 |
batch['style'] = ref_style
|
254 |
batch['images'] = ref_images
|
255 |
-
|
|
|
256 |
x0_forward = models_rbm.effnet(extras.effnet_preprocess(ref_images.to(device)))
|
257 |
x0_style_forward = models_rbm.effnet(extras.effnet_preprocess(ref_style.to(device)))
|
258 |
|
@@ -264,7 +270,8 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
|
|
264 |
sam_mask, boxes, phrases, logits = sam_model.predict(x0_preview_pil, sam_prompt)
|
265 |
# sam_mask, boxes, phrases, logits = sam_model.predict(transform(x0_preview[0]), sam_prompt)
|
266 |
sam_mask = sam_mask.detach().unsqueeze(dim=0).to(device)
|
267 |
-
|
|
|
268 |
conditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=False, eval_image_embeds=True, eval_subject_style=True, eval_csd=False)
|
269 |
unconditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=True, eval_image_embeds=False, eval_subject_style=True)
|
270 |
conditions_b = core_b.get_conditions(batch, models_b, extras_b, is_eval=True, is_unconditional=False)
|
@@ -274,7 +281,8 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
|
|
274 |
models_to(models_rbm, device="cpu", excepts=["generator", "previewer"])
|
275 |
models_to(sam_model, device="cpu")
|
276 |
models_to(sam_model.sam, device="cpu")
|
277 |
-
|
|
|
278 |
# Stage C reverse process.
|
279 |
sampling_c = extras.gdf.sample(
|
280 |
models_rbm.generator, conditions, stage_c_latent_shape,
|
@@ -291,9 +299,12 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
|
|
291 |
sam_prompt=sam_prompt
|
292 |
)
|
293 |
|
294 |
-
for (sampled_c, _, _) in
|
|
|
|
|
295 |
sampled_c = sampled_c
|
296 |
|
|
|
297 |
# Stage B reverse process.
|
298 |
with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.bfloat16):
|
299 |
conditions_b['effnet'] = sampled_c
|
@@ -303,10 +314,13 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
|
|
303 |
models_b.generator, conditions_b, stage_b_latent_shape,
|
304 |
unconditions_b, device=device, **extras_b.sampling_configs,
|
305 |
)
|
306 |
-
for (sampled_b, _, _) in
|
|
|
|
|
307 |
sampled_b = sampled_b
|
308 |
sampled = models_b.stage_a.decode(sampled_b).float()
|
309 |
|
|
|
310 |
sampled = torch.cat([
|
311 |
torch.nn.functional.interpolate(ref_images.cpu(), size=(height, width)),
|
312 |
torch.nn.functional.interpolate(ref_style.cpu(), size=(height, width)),
|
@@ -332,9 +346,9 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
|
|
332 |
|
333 |
def run(style_reference_image, style_description, subject_prompt, subject_reference, use_subject_ref):
|
334 |
result = None
|
335 |
-
progress = gr.Progress(
|
336 |
if use_subject_ref is True:
|
337 |
-
result = infer_compo(style_description, style_reference_image, subject_prompt, subject_reference)
|
338 |
else:
|
339 |
result = infer(style_reference_image, style_description, subject_prompt, progress)
|
340 |
return result
|
|
|
182 |
lam_style=1, lam_txt_alignment=1.0,
|
183 |
use_ddim_sampler=True,
|
184 |
)
|
185 |
+
for i, (sampled_c, _, _) in enumerate(sampling_c, 1):
|
186 |
+
if i % 5 == 0: # Update progress every 5 steps
|
187 |
+
progress(0.4 + 0.3 * (i / extras.sampling_configs['timesteps']), f"Stage C reverse process: step {i}/{extras.sampling_configs['timesteps']}")
|
188 |
sampled_c = sampled_c
|
189 |
|
190 |
progress(0.7, "Starting Stage B reverse process")
|
|
|
197 |
models_b.generator, conditions_b, stage_b_latent_shape,
|
198 |
unconditions_b, device=device, **extras_b.sampling_configs,
|
199 |
)
|
200 |
+
for i, (sampled_b, _, _) in enumerate(sampling_b, 1):
|
201 |
+
if i % 5 == 0: # Update progress every 5 steps
|
202 |
+
progress(0.7 + 0.2 * (i / extras_b.sampling_configs['timesteps']), f"Stage B reverse process: step {i}/{extras_b.sampling_configs['timesteps']}")
|
203 |
sampled_b = sampled_b
|
204 |
sampled = models_b.stage_a.decode(sampled_b).float()
|
205 |
|
|
|
226 |
# Clear CUDA cache
|
227 |
torch.cuda.empty_cache()
|
228 |
|
229 |
+
def infer_compo(style_description, ref_style_file, caption, ref_sub_file, progress):
|
230 |
global models_rbm, models_b, device, sam_model
|
231 |
if low_vram:
|
232 |
models_to(models_rbm, device=device)
|
|
|
250 |
extras_b.sampling_configs['timesteps'] = 10
|
251 |
extras_b.sampling_configs['t_start'] = 1.0
|
252 |
|
253 |
+
progress(0.1, "Loading style and subject reference images")
|
254 |
ref_style = resize_image(PIL.Image.open(ref_style_file).convert("RGB")).unsqueeze(0).expand(batch_size, -1, -1, -1).to(device)
|
255 |
ref_images = resize_image(PIL.Image.open(ref_sub_file).convert("RGB")).unsqueeze(0).expand(batch_size, -1, -1, -1).to(device)
|
256 |
|
257 |
batch = {'captions': [caption] * batch_size}
|
258 |
batch['style'] = ref_style
|
259 |
batch['images'] = ref_images
|
260 |
+
|
261 |
+
progress(0.2, "Processing reference images")
|
262 |
x0_forward = models_rbm.effnet(extras.effnet_preprocess(ref_images.to(device)))
|
263 |
x0_style_forward = models_rbm.effnet(extras.effnet_preprocess(ref_style.to(device)))
|
264 |
|
|
|
270 |
sam_mask, boxes, phrases, logits = sam_model.predict(x0_preview_pil, sam_prompt)
|
271 |
# sam_mask, boxes, phrases, logits = sam_model.predict(transform(x0_preview[0]), sam_prompt)
|
272 |
sam_mask = sam_mask.detach().unsqueeze(dim=0).to(device)
|
273 |
+
|
274 |
+
progress(0.3, "Generating conditions")
|
275 |
conditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=False, eval_image_embeds=True, eval_subject_style=True, eval_csd=False)
|
276 |
unconditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=True, eval_image_embeds=False, eval_subject_style=True)
|
277 |
conditions_b = core_b.get_conditions(batch, models_b, extras_b, is_eval=True, is_unconditional=False)
|
|
|
281 |
models_to(models_rbm, device="cpu", excepts=["generator", "previewer"])
|
282 |
models_to(sam_model, device="cpu")
|
283 |
models_to(sam_model.sam, device="cpu")
|
284 |
+
|
285 |
+
progress(0.4, "Starting Stage C reverse process")
|
286 |
# Stage C reverse process.
|
287 |
sampling_c = extras.gdf.sample(
|
288 |
models_rbm.generator, conditions, stage_c_latent_shape,
|
|
|
299 |
sam_prompt=sam_prompt
|
300 |
)
|
301 |
|
302 |
+
for i, (sampled_c, _, _) in enumerate(sampling_c, 1):
|
303 |
+
if i % 5 == 0: # Update progress every 5 steps
|
304 |
+
progress(0.4 + 0.3 * (i / extras.sampling_configs['timesteps']), f"Stage C reverse process: step {i}/{extras.sampling_configs['timesteps']}")
|
305 |
sampled_c = sampled_c
|
306 |
|
307 |
+
progress(0.7, "Starting Stage B reverse process")
|
308 |
# Stage B reverse process.
|
309 |
with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.bfloat16):
|
310 |
conditions_b['effnet'] = sampled_c
|
|
|
314 |
models_b.generator, conditions_b, stage_b_latent_shape,
|
315 |
unconditions_b, device=device, **extras_b.sampling_configs,
|
316 |
)
|
317 |
+
for i, (sampled_b, _, _) in enumerate(sampling_b, 1):
|
318 |
+
if i % 5 == 0: # Update progress every 5 steps
|
319 |
+
progress(0.7 + 0.2 * (i / extras_b.sampling_configs['timesteps']), f"Stage B reverse process: step {i}/{extras_b.sampling_configs['timesteps']}")
|
320 |
sampled_b = sampled_b
|
321 |
sampled = models_b.stage_a.decode(sampled_b).float()
|
322 |
|
323 |
+
progress(0.9, "Finalizing the output image")
|
324 |
sampled = torch.cat([
|
325 |
torch.nn.functional.interpolate(ref_images.cpu(), size=(height, width)),
|
326 |
torch.nn.functional.interpolate(ref_style.cpu(), size=(height, width)),
|
|
|
346 |
|
347 |
def run(style_reference_image, style_description, subject_prompt, subject_reference, use_subject_ref):
|
348 |
result = None
|
349 |
+
progress = gr.Progress()
|
350 |
if use_subject_ref is True:
|
351 |
+
result = infer_compo(style_description, style_reference_image, subject_prompt, subject_reference, progress)
|
352 |
else:
|
353 |
result = infer(style_reference_image, style_description, subject_prompt, progress)
|
354 |
return result
|