Spaces:
Running
on
Zero
Running
on
Zero
Update demo.py
Browse files
demo.py
CHANGED
|
@@ -253,44 +253,111 @@ class TrajCrafter:
|
|
| 253 |
pose_t = poses
|
| 254 |
return pose_s, pose_t, K
|
| 255 |
|
| 256 |
-
def setup_diffusion(self,opts):
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
vae = AutoencoderKLCogVideoX.from_pretrained(
|
| 261 |
-
opts.model_name,
|
| 262 |
-
subfolder="vae"
|
| 263 |
-
|
|
|
|
|
|
|
|
|
|
| 264 |
text_encoder = T5EncoderModel.from_pretrained(
|
| 265 |
-
opts.model_name,
|
| 266 |
-
|
| 267 |
-
|
|
|
|
|
|
|
|
|
|
| 268 |
Choosen_Scheduler = {
|
| 269 |
"Euler": EulerDiscreteScheduler,
|
| 270 |
"Euler A": EulerAncestralDiscreteScheduler,
|
| 271 |
-
"DPM++": DPMSolverMultistepScheduler,
|
| 272 |
"PNDM": PNDMScheduler,
|
| 273 |
"DDIM_Cog": CogVideoXDDIMScheduler,
|
| 274 |
"DDIM_Origin": DDIMScheduler,
|
| 275 |
}[opts.sampler_name]
|
| 276 |
scheduler = Choosen_Scheduler.from_pretrained(
|
| 277 |
-
opts.model_name,
|
| 278 |
subfolder="scheduler"
|
| 279 |
)
|
| 280 |
-
|
|
|
|
| 281 |
self.pipeline = TrajCrafter_Pipeline.from_pretrained(
|
| 282 |
opts.model_name,
|
| 283 |
vae=vae,
|
| 284 |
text_encoder=text_encoder,
|
| 285 |
transformer=transformer,
|
| 286 |
scheduler=scheduler,
|
| 287 |
-
torch_dtype=opts.weight_dtype
|
| 288 |
)
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
| 290 |
if opts.low_gpu_memory_mode:
|
| 291 |
-
|
| 292 |
-
|
|
|
|
| 293 |
self.pipeline.enable_model_cpu_offload()
|
|
|
|
|
|
|
| 294 |
|
| 295 |
def run_gradio(self,input_video, stride, radius_scale, pose, steps, seed):
|
| 296 |
frames = read_video_frames(input_video, self.opts.video_length, stride,self.opts.max_res)
|
|
|
|
| 253 |
pose_t = poses
|
| 254 |
return pose_s, pose_t, K
|
| 255 |
|
| 256 |
+
# def setup_diffusion(self,opts):
|
| 257 |
+
# # transformer = CrossTransformer3DModel.from_pretrained_cus(opts.transformer_path).to(opts.weight_dtype)
|
| 258 |
+
# transformer = CrossTransformer3DModel.from_pretrained(opts.transformer_path).to(opts.weight_dtype)
|
| 259 |
+
# # transformer = transformer.to(opts.weight_dtype)
|
| 260 |
+
# vae = AutoencoderKLCogVideoX.from_pretrained(
|
| 261 |
+
# opts.model_name,
|
| 262 |
+
# subfolder="vae"
|
| 263 |
+
# ).to(opts.weight_dtype)
|
| 264 |
+
# text_encoder = T5EncoderModel.from_pretrained(
|
| 265 |
+
# opts.model_name, subfolder="text_encoder", torch_dtype=opts.weight_dtype
|
| 266 |
+
# )
|
| 267 |
+
# # Get Scheduler
|
| 268 |
+
# Choosen_Scheduler = {
|
| 269 |
+
# "Euler": EulerDiscreteScheduler,
|
| 270 |
+
# "Euler A": EulerAncestralDiscreteScheduler,
|
| 271 |
+
# "DPM++": DPMSolverMultistepScheduler,
|
| 272 |
+
# "PNDM": PNDMScheduler,
|
| 273 |
+
# "DDIM_Cog": CogVideoXDDIMScheduler,
|
| 274 |
+
# "DDIM_Origin": DDIMScheduler,
|
| 275 |
+
# }[opts.sampler_name]
|
| 276 |
+
# scheduler = Choosen_Scheduler.from_pretrained(
|
| 277 |
+
# opts.model_name,
|
| 278 |
+
# subfolder="scheduler"
|
| 279 |
+
# )
|
| 280 |
+
|
| 281 |
+
# self.pipeline = TrajCrafter_Pipeline.from_pretrained(
|
| 282 |
+
# opts.model_name,
|
| 283 |
+
# vae=vae,
|
| 284 |
+
# text_encoder=text_encoder,
|
| 285 |
+
# transformer=transformer,
|
| 286 |
+
# scheduler=scheduler,
|
| 287 |
+
# torch_dtype=opts.weight_dtype
|
| 288 |
+
# )
|
| 289 |
+
|
| 290 |
+
# if opts.low_gpu_memory_mode:
|
| 291 |
+
# self.pipeline.enable_sequential_cpu_offload()
|
| 292 |
+
# else:
|
| 293 |
+
# self.pipeline.enable_model_cpu_offload()
|
| 294 |
+
|
| 295 |
+
def setup_diffusion(self, opts):
|
| 296 |
+
import torch
|
| 297 |
+
|
| 298 |
+
# 1) 选择设备
|
| 299 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 300 |
+
|
| 301 |
+
# (可选)在 CPU 跑时避免 efficient attention 报错;在 CUDA 上也无害
|
| 302 |
+
try:
|
| 303 |
+
torch.backends.cuda.enable_flash_sdp(False)
|
| 304 |
+
torch.backends.cuda.enable_mem_efficient_sdp(True)
|
| 305 |
+
torch.backends.cuda.enable_math_sdp(True)
|
| 306 |
+
except Exception:
|
| 307 |
+
pass
|
| 308 |
+
|
| 309 |
+
# 2) 加载/放置子模块到 device + dtype
|
| 310 |
+
# 注意:原代码只 .to(dtype),未指定 device;这里补齐
|
| 311 |
+
transformer = CrossTransformer3DModel.from_pretrained(opts.transformer_path)
|
| 312 |
+
transformer = transformer.to(device=device, dtype=opts.weight_dtype)
|
| 313 |
+
|
| 314 |
vae = AutoencoderKLCogVideoX.from_pretrained(
|
| 315 |
+
opts.model_name,
|
| 316 |
+
subfolder="vae",
|
| 317 |
+
# 仅指定 dtype;后面统一 .to(device)
|
| 318 |
+
# 某些 from_pretrained 不支持 device 形参
|
| 319 |
+
).to(dtype=opts.weight_dtype).to(device)
|
| 320 |
+
|
| 321 |
text_encoder = T5EncoderModel.from_pretrained(
|
| 322 |
+
opts.model_name,
|
| 323 |
+
subfolder="text_encoder",
|
| 324 |
+
torch_dtype=opts.weight_dtype,
|
| 325 |
+
).to(device)
|
| 326 |
+
|
| 327 |
+
# 3) 调度器照旧
|
| 328 |
Choosen_Scheduler = {
|
| 329 |
"Euler": EulerDiscreteScheduler,
|
| 330 |
"Euler A": EulerAncestralDiscreteScheduler,
|
| 331 |
+
"DPM++": DPMSolverMultistepScheduler,
|
| 332 |
"PNDM": PNDMScheduler,
|
| 333 |
"DDIM_Cog": CogVideoXDDIMScheduler,
|
| 334 |
"DDIM_Origin": DDIMScheduler,
|
| 335 |
}[opts.sampler_name]
|
| 336 |
scheduler = Choosen_Scheduler.from_pretrained(
|
| 337 |
+
opts.model_name,
|
| 338 |
subfolder="scheduler"
|
| 339 |
)
|
| 340 |
+
|
| 341 |
+
# 4) 组装 pipeline,并确保在正确 device/dtype
|
| 342 |
self.pipeline = TrajCrafter_Pipeline.from_pretrained(
|
| 343 |
opts.model_name,
|
| 344 |
vae=vae,
|
| 345 |
text_encoder=text_encoder,
|
| 346 |
transformer=transformer,
|
| 347 |
scheduler=scheduler,
|
| 348 |
+
torch_dtype=opts.weight_dtype,
|
| 349 |
)
|
| 350 |
+
|
| 351 |
+
# Offload 策略:
|
| 352 |
+
# - 如果你机器有足够显存,建议直接 to(device) 获得最稳的行为
|
| 353 |
+
# - 如果显存紧张,再启用 offload(需要 accelerate 支持)
|
| 354 |
if opts.low_gpu_memory_mode:
|
| 355 |
+
# 这两种 offload 会在计算时把块迁移到 GPU,空闲时回收;加速略低但更省显存
|
| 356 |
+
# 二选一:根据你之前的使用习惯保留其一
|
| 357 |
+
# self.pipeline.enable_sequential_cpu_offload()
|
| 358 |
self.pipeline.enable_model_cpu_offload()
|
| 359 |
+
else:
|
| 360 |
+
self.pipeline.to(device)
|
| 361 |
|
| 362 |
def run_gradio(self,input_video, stride, radius_scale, pose, steps, seed):
|
| 363 |
frames = read_video_frames(input_video, self.opts.video_length, stride,self.opts.max_res)
|