Spaces:

Shuang59
/

Composable-Diffusion

Runtime error

App Files Files Community

Shuang59 commited on Jul 23, 2022

Commit

5d29bbd

1 Parent(s): 2ebbd13

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -7

app.py CHANGED Viewed

@@ -20,6 +20,11 @@ from glide_text2im.model_creation import (
     model_and_diffusion_defaults_upsampler
 )
 # This notebook supports both CPU and GPU.
 # On CPU, generating one sample may take on the order of 20 minutes.
 # On a GPU, it should be under a minute.
@@ -193,14 +198,94 @@ def compose_language_descriptions(prompt):
   out_img = np.array(out_img.data.to('cpu'))
   return out_img
-# prompt = "a camel | a forest" #@param{type: 'string'}
-# out_img = compose_language_descriptions(prompt)
-examples = ['a camel | a forest', 'A cloudy blue sky  | A mountain in the horizon | Cherry Blossoms in front of the mountain']
 import gradio as gr
 gr.Interface(title='Compositional Visual Generation with Composable Diffusion Models',
-    description='Demo for Composable Diffusion (~20s per example). Project Page: https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/. This demo is based on the released GLIDE model (https://github.com/openai/glide-text2im) for composing natural language descriptions. For composing objects and object relations, see the project page for more information. When composing  multiple sentences, use `|` as the delimiter. For example "a camel | a forest" composes "a camel" and "a forest" together.',
-    fn=compose_language_descriptions, inputs='text', outputs='image', examples=examples).launch();

     model_and_diffusion_defaults_upsampler
 )
+from composable_diffusion.download import download_model
+from composable_diffusion.model_creation import create_model_and_diffusion as create_model_and_diffusion_for_clevr
+from composable_diffusion.model_creation import model_and_diffusion_defaults as model_and_diffusion_defaults_for_clevr
 # This notebook supports both CPU and GPU.
 # On CPU, generating one sample may take on the order of 20 minutes.
 # On a GPU, it should be under a minute.
   out_img = np.array(out_img.data.to('cpu'))
   return out_img
+# create model for CLEVR Objects
+timestep_respacing =  100
+clevr_options = model_and_diffusion_defaults_for_clevr()
+flags = {
+    "image_size": 128,
+    "num_channels": 192,
+    "num_res_blocks": 2,
+    "learn_sigma": True,
+    "use_scale_shift_norm": False,
+    "raw_unet": True,
+    "noise_schedule": "squaredcos_cap_v2",
+    "rescale_learned_sigmas": False,
+    "rescale_timesteps": False,
+    "num_classes": '2',
+    "dataset": "clevr_pos",
+    "use_fp16": has_cuda,
+    "timestep_respacing": str(timestep_respacing)
+}
+for key, val in flags.items():
+  clevr_options[key] = val
+clevr_model, clevr_diffusion = create_model_and_diffusion_for_clevr(**clevr_options)
+clevr_model.eval()
+if has_cuda:
+    clevr_model.convert_to_fp16()
+clevr_model.to(device)
+clevr_model.load_state_dict(th.load(download_model('clevr_pos'), device))
+def compose_clevr_objects(coordinates):
+    coordinates = [[float(x.split(',')[0].strip()), float(x.split(',')[1].strip())]
+               for x in coordinates.split('|')]
+    coordinates += [[-1, -1]] # add unconditional score label
+    batch_size = 1
+    guidance_scale = 10
+    def model_fn(x_t, ts, **kwargs):
+        half = x_t[:1]
+        combined = th.cat([half] * kwargs['y'].size(0), dim=0)
+        model_out = model(combined, ts, **kwargs)
+        eps, rest = model_out[:, :3], model_out[:, 3:]
+        masks = kwargs.get('masks')
+        cond_eps = eps[masks].mean(dim=0, keepdim=True)
+        uncond_eps = eps[~masks].mean(dim=0, keepdim=True)
+        half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps)
+        eps = th.cat([half_eps] * x_t.size(0), dim=0)
+        return th.cat([eps, rest], dim=1)
+    masks = [True] * (len(coordinates) - 1) + [False]
+    model_kwargs = dict(
+        y=th.tensor(coordinates, dtype=th.float, device=device),
+        masks=th.tensor(masks, dtype=th.bool, device=device)
+    )
+    def sample(coordinates):
+        samples = diffusion.p_sample_loop(
+            model_fn,
+            (len(coordinates), 3, options["image_size"], options["image_size"]),
+            device=device,
+            clip_denoised=True,
+            progress=True,
+            model_kwargs=model_kwargs,
+            cond_fn=None,
+        )[:batch_size]
+        return samples
+    samples = sample(coordinates)
+    out_img = samples[0].permute(1,2,0)
+    out_img = (out_img+1)/2
+    out_img = np.array(out_img.data.to('cpu'))
+    return out_img
+def compose(prompt, ver):
+    if ver == 'GLIDE':
+        return compose_language_descriptions(prompt)
+    else:
+        return compose_clevr_objects(prompt)
+examples_1 = ['a camel | a forest', 'A cloudy blue sky  | A mountain in the horizon | Cherry Blossoms in front of the mountain']
+examples_2 = ['0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5']
+examples = [[examples_1, 'GLIDE'], [examples_2, 'CLEVR Objects']]
 import gradio as gr
 gr.Interface(title='Compositional Visual Generation with Composable Diffusion Models',
+    description='<p>Demo for Composable Diffusion (~20s per example)</p><p>See more information from our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</p><ul><li>One version is based on the released <a href="https://github.com/openai/glide-text2im">GLIDE</a> for composing natural language description.</li><li>Another is based on our pre-trained CLEVR Object Model for composing objects. <br>(<b>Note</b>: We recommend using <b><i>x</i></b> in range <b><i>[0.1, 0.9]</i></b> and <b><i>y</i></b> in range <b><i>[0.25, 0.7]</i></b>, since the training dataset labels are in given ranges.).</li></ul><p>When composing  multiple sentences, use `|` as the delimiter, see given examples below.</p>',
+    fn=compose, inputs=['text', gr.inputs.Radio(['GLIDE','CLEVR Objects'], type="value", default='GLIDE', label='version')], outputs='image', examples=examples).launch();