刘虹雨
update
8ed2f16
raw
history blame
7.45 kB
import os
import imageio
import numpy as np
from typing import Union
import torch
import torchvision
from tqdm import tqdm
from einops import rearrange
def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=4, fps=8):
videos = rearrange(videos, "b c t h w -> t b c h w")
outputs = []
for x in videos:
x = torchvision.utils.make_grid(x, nrow=n_rows)
x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
if rescale:
x = (x + 1.0) / 2.0 # -1,1 -> 0,1
x = (x * 255).numpy().astype(np.uint8)
outputs.append(x)
os.makedirs(os.path.dirname(path), exist_ok=True)
imageio.mimsave(path, outputs, fps=fps)
# DDIM Inversion
@torch.no_grad()
def init_prompt(prompt, pipeline):
uncond_input = pipeline.tokenizer(
[""], padding="max_length", max_length=pipeline.tokenizer.model_max_length,
return_tensors="pt"
)
uncond_embeddings = pipeline.text_encoder(uncond_input.input_ids.to(pipeline.device))[0]
text_input = pipeline.tokenizer(
[prompt],
padding="max_length",
max_length=pipeline.tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
text_embeddings = pipeline.text_encoder(text_input.input_ids.to(pipeline.device))[0]
context = torch.cat([uncond_embeddings, text_embeddings])
return context
def next_step(model_output: Union[torch.FloatTensor, np.ndarray], timestep: int,
sample: Union[torch.FloatTensor, np.ndarray], ddim_scheduler):
timestep, next_timestep = min(
timestep - ddim_scheduler.config.num_train_timesteps // ddim_scheduler.num_inference_steps, 999), timestep
alpha_prod_t = ddim_scheduler.alphas_cumprod[timestep] if timestep >= 0 else ddim_scheduler.final_alpha_cumprod
alpha_prod_t_next = ddim_scheduler.alphas_cumprod[next_timestep]
beta_prod_t = 1 - alpha_prod_t
next_original_sample = (sample - beta_prod_t ** 0.5 * model_output) / alpha_prod_t ** 0.5
next_sample_direction = (1 - alpha_prod_t_next) ** 0.5 * model_output
next_sample = alpha_prod_t_next ** 0.5 * next_original_sample + next_sample_direction
return next_sample
def get_noise_pred_single(latents, t, context, unet):
noise_pred = unet(latents, t, encoder_hidden_states=context)["sample"]
return noise_pred
@torch.no_grad()
def ddim_loop(pipeline, ddim_scheduler, latent, num_inv_steps, prompt):
context = init_prompt(prompt, pipeline)
uncond_embeddings, cond_embeddings = context.chunk(2)
all_latent = [latent]
latent = latent.clone().detach()
for i in tqdm(range(num_inv_steps)):
t = ddim_scheduler.timesteps[len(ddim_scheduler.timesteps) - i - 1]
noise_pred = get_noise_pred_single(latent, t, cond_embeddings, pipeline.unet)
latent = next_step(noise_pred, t, latent, ddim_scheduler)
all_latent.append(latent)
return all_latent
@torch.no_grad()
def ddim_inversion(pipeline, ddim_scheduler, video_latent, num_inv_steps, prompt=""):
ddim_latents = ddim_loop(pipeline, ddim_scheduler, video_latent, num_inv_steps, prompt)
return ddim_latents
def rendering():
pass
def force_zero_snr(betas):
alphas = 1 - betas
alphas_bar = torch.cumprod(alphas, dim=0)
alphas_bar_sqrt = alphas_bar ** (1/2)
alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() - 1e-6
alphas_bar_sqrt -= alphas_bar_sqrt_T
alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
alphas_bar = alphas_bar_sqrt ** 2
alphas = alphas_bar[1:] / alphas_bar[:-1]
alphas = torch.cat([alphas_bar[0:1], alphas], 0)
betas = 1 - alphas
return betas
def make_beta_schedule(schedule="scaled_linear", n_timestep=1000, linear_start=0.00085, linear_end=0.012, cosine_s=8e-3, shift_scale=None):
if schedule == "scaled_linear":
betas = (
torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2
)
elif schedule == 'linear':
betas = (
torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64)
)
elif schedule == "cosine":
timesteps = (
torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s
)
alphas = timesteps / (1 + cosine_s) * np.pi / 2
alphas = torch.cos(alphas).pow(2)
alphas = alphas / alphas[0]
betas = 1 - alphas[1:] / alphas[:-1]
betas = np.clip(betas, a_min=0, a_max=0.999)
elif schedule == "sqrt_linear":
betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64)
elif schedule == "sqrt":
betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5
elif schedule == 'linear_force_zero_snr':
betas = (
torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2
)
betas = force_zero_snr(betas)
elif schedule == 'linear_100':
betas = (
torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2
)
betas = betas[:100]
else:
raise ValueError(f"schedule '{schedule}' unknown.")
if shift_scale is not None:
print("shift_scale")
betas = shift_schedule(betas, shift_scale)
return betas.numpy()
def shift_schedule(base_betas, shift_scale):
alphas = 1 - base_betas
alphas_bar = torch.cumprod(alphas, dim=0)
snr = alphas_bar / (1 - alphas_bar) # snr(1-ab)=ab; snr-snr*ab=ab; snr=(1+snr)ab; ab=snr/(1+snr)
shifted_snr = snr * ((1 / shift_scale) ** 2)
shifted_alphas_bar = shifted_snr / (1 + shifted_snr)
shifted_alphas = shifted_alphas_bar[1:] / shifted_alphas_bar[:-1]
shifted_alphas = torch.cat([shifted_alphas_bar[0:1], shifted_alphas], 0)
shifted_betas = 1 - shifted_alphas
return shifted_betas
def shift_dim(x, src_dim=-1, dest_dim=-1, make_contiguous=True):
n_dims = len(x.shape)
if src_dim < 0:
src_dim = n_dims + src_dim
if dest_dim < 0:
dest_dim = n_dims + dest_dim
assert 0 <= src_dim < n_dims and 0 <= dest_dim < n_dims
dims = list(range(n_dims))
del dims[src_dim]
permutation = []
ctr = 0
for i in range(n_dims):
if i == dest_dim:
permutation.append(src_dim)
else:
permutation.append(dims[ctr])
ctr += 1
x = x.permute(permutation)
if make_contiguous:
x = x.contiguous()
return x
# reshapes tensor start from dim i (inclusive)
# to dim j (exclusive) to the desired shape
# e.g. if x.shape = (b, thw, c) then
# view_range(x, 1, 2, (t, h, w)) returns
# x of shape (b, t, h, w, c)
def view_range(x, i, j, shape):
shape = tuple(shape)
n_dims = len(x.shape)
if i < 0:
i = n_dims + i
if j is None:
j = n_dims
elif j < 0:
j = n_dims + j
assert 0 <= i < j <= n_dims
x_shape = x.shape
target_shape = x_shape[:i] + shape + x_shape[j:]
return x.view(target_shape)
def tensor_slice(x, begin, size):
assert all([b >= 0 for b in begin])
size = [l - b if s == -1 else s
for s, b, l in zip(size, begin, x.shape)]
assert all([s >= 0 for s in size])
slices = [slice(b, b + s) for b, s in zip(begin, size)]
return x[slices]