|
This is the code used to create this model |
|
```python |
|
import torch |
|
import diffusers |
|
|
|
|
|
model = diffusers.UNet2DConditionModel( |
|
block_out_channels=(4, 4, 4), |
|
down_block_types=('CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', 'CrossAttnDownBlock2D'), |
|
up_block_types=('CrossAttnUpBlock2D', 'CrossAttnUpBlock2D', 'CrossAttnUpBlock2D'), |
|
norm_num_groups=2, |
|
cross_attention_dim=2, |
|
layers_per_block=1, |
|
attention_head_dim=2, |
|
addition_embed_type_num_heads=2, |
|
) |
|
|
|
# noisy latent |
|
x = torch.randn(7,4,33,33) |
|
# timestep |
|
t = torch.Tensor([1.0]) |
|
# conditioning embed |
|
z = torch.randn(7, 4, 2) |
|
# denoised latent |
|
y = model(x, t, z) |
|
``` |