Doubiiu commited on
Commit
26e9969
·
verified ·
1 Parent(s): 92177c9

Update demo.py

Browse files
Files changed (1) hide show
  1. demo.py +84 -17
demo.py CHANGED
@@ -253,44 +253,111 @@ class TrajCrafter:
253
  pose_t = poses
254
  return pose_s, pose_t, K
255
 
256
- def setup_diffusion(self,opts):
257
- # transformer = CrossTransformer3DModel.from_pretrained_cus(opts.transformer_path).to(opts.weight_dtype)
258
- transformer = CrossTransformer3DModel.from_pretrained(opts.transformer_path).to(opts.weight_dtype)
259
- # transformer = transformer.to(opts.weight_dtype)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  vae = AutoencoderKLCogVideoX.from_pretrained(
261
- opts.model_name,
262
- subfolder="vae"
263
- ).to(opts.weight_dtype)
 
 
 
264
  text_encoder = T5EncoderModel.from_pretrained(
265
- opts.model_name, subfolder="text_encoder", torch_dtype=opts.weight_dtype
266
- )
267
- # Get Scheduler
 
 
 
268
  Choosen_Scheduler = {
269
  "Euler": EulerDiscreteScheduler,
270
  "Euler A": EulerAncestralDiscreteScheduler,
271
- "DPM++": DPMSolverMultistepScheduler,
272
  "PNDM": PNDMScheduler,
273
  "DDIM_Cog": CogVideoXDDIMScheduler,
274
  "DDIM_Origin": DDIMScheduler,
275
  }[opts.sampler_name]
276
  scheduler = Choosen_Scheduler.from_pretrained(
277
- opts.model_name,
278
  subfolder="scheduler"
279
  )
280
-
 
281
  self.pipeline = TrajCrafter_Pipeline.from_pretrained(
282
  opts.model_name,
283
  vae=vae,
284
  text_encoder=text_encoder,
285
  transformer=transformer,
286
  scheduler=scheduler,
287
- torch_dtype=opts.weight_dtype
288
  )
289
-
 
 
 
290
  if opts.low_gpu_memory_mode:
291
- self.pipeline.enable_sequential_cpu_offload()
292
- else:
 
293
  self.pipeline.enable_model_cpu_offload()
 
 
294
 
295
  def run_gradio(self,input_video, stride, radius_scale, pose, steps, seed):
296
  frames = read_video_frames(input_video, self.opts.video_length, stride,self.opts.max_res)
 
253
  pose_t = poses
254
  return pose_s, pose_t, K
255
 
256
+ # def setup_diffusion(self,opts):
257
+ # # transformer = CrossTransformer3DModel.from_pretrained_cus(opts.transformer_path).to(opts.weight_dtype)
258
+ # transformer = CrossTransformer3DModel.from_pretrained(opts.transformer_path).to(opts.weight_dtype)
259
+ # # transformer = transformer.to(opts.weight_dtype)
260
+ # vae = AutoencoderKLCogVideoX.from_pretrained(
261
+ # opts.model_name,
262
+ # subfolder="vae"
263
+ # ).to(opts.weight_dtype)
264
+ # text_encoder = T5EncoderModel.from_pretrained(
265
+ # opts.model_name, subfolder="text_encoder", torch_dtype=opts.weight_dtype
266
+ # )
267
+ # # Get Scheduler
268
+ # Choosen_Scheduler = {
269
+ # "Euler": EulerDiscreteScheduler,
270
+ # "Euler A": EulerAncestralDiscreteScheduler,
271
+ # "DPM++": DPMSolverMultistepScheduler,
272
+ # "PNDM": PNDMScheduler,
273
+ # "DDIM_Cog": CogVideoXDDIMScheduler,
274
+ # "DDIM_Origin": DDIMScheduler,
275
+ # }[opts.sampler_name]
276
+ # scheduler = Choosen_Scheduler.from_pretrained(
277
+ # opts.model_name,
278
+ # subfolder="scheduler"
279
+ # )
280
+
281
+ # self.pipeline = TrajCrafter_Pipeline.from_pretrained(
282
+ # opts.model_name,
283
+ # vae=vae,
284
+ # text_encoder=text_encoder,
285
+ # transformer=transformer,
286
+ # scheduler=scheduler,
287
+ # torch_dtype=opts.weight_dtype
288
+ # )
289
+
290
+ # if opts.low_gpu_memory_mode:
291
+ # self.pipeline.enable_sequential_cpu_offload()
292
+ # else:
293
+ # self.pipeline.enable_model_cpu_offload()
294
+
295
+ def setup_diffusion(self, opts):
296
+ import torch
297
+
298
+ # 1) 选择设备
299
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
300
+
301
+ # (可选)在 CPU 跑时避免 efficient attention 报错;在 CUDA 上也无害
302
+ try:
303
+ torch.backends.cuda.enable_flash_sdp(False)
304
+ torch.backends.cuda.enable_mem_efficient_sdp(True)
305
+ torch.backends.cuda.enable_math_sdp(True)
306
+ except Exception:
307
+ pass
308
+
309
+ # 2) 加载/放置子模块到 device + dtype
310
+ # 注意:原代码只 .to(dtype),未指定 device;这里补齐
311
+ transformer = CrossTransformer3DModel.from_pretrained(opts.transformer_path)
312
+ transformer = transformer.to(device=device, dtype=opts.weight_dtype)
313
+
314
  vae = AutoencoderKLCogVideoX.from_pretrained(
315
+ opts.model_name,
316
+ subfolder="vae",
317
+ # 仅指定 dtype;后面统一 .to(device)
318
+ # 某些 from_pretrained 不支持 device 形参
319
+ ).to(dtype=opts.weight_dtype).to(device)
320
+
321
  text_encoder = T5EncoderModel.from_pretrained(
322
+ opts.model_name,
323
+ subfolder="text_encoder",
324
+ torch_dtype=opts.weight_dtype,
325
+ ).to(device)
326
+
327
+ # 3) 调度器照旧
328
  Choosen_Scheduler = {
329
  "Euler": EulerDiscreteScheduler,
330
  "Euler A": EulerAncestralDiscreteScheduler,
331
+ "DPM++": DPMSolverMultistepScheduler,
332
  "PNDM": PNDMScheduler,
333
  "DDIM_Cog": CogVideoXDDIMScheduler,
334
  "DDIM_Origin": DDIMScheduler,
335
  }[opts.sampler_name]
336
  scheduler = Choosen_Scheduler.from_pretrained(
337
+ opts.model_name,
338
  subfolder="scheduler"
339
  )
340
+
341
+ # 4) 组装 pipeline,并确保在正确 device/dtype
342
  self.pipeline = TrajCrafter_Pipeline.from_pretrained(
343
  opts.model_name,
344
  vae=vae,
345
  text_encoder=text_encoder,
346
  transformer=transformer,
347
  scheduler=scheduler,
348
+ torch_dtype=opts.weight_dtype,
349
  )
350
+
351
+ # Offload 策略:
352
+ # - 如果你机器有足够显存,建议直接 to(device) 获得最稳的行为
353
+ # - 如果显存紧张,再启用 offload(需要 accelerate 支持)
354
  if opts.low_gpu_memory_mode:
355
+ # 这两种 offload 会在计算时把块迁移到 GPU,空闲时回收;加速略低但更省显存
356
+ # 二选一:根据你之前的使用习惯保留其一
357
+ # self.pipeline.enable_sequential_cpu_offload()
358
  self.pipeline.enable_model_cpu_offload()
359
+ else:
360
+ self.pipeline.to(device)
361
 
362
  def run_gradio(self,input_video, stride, radius_scale, pose, steps, seed):
363
  frames = read_video_frames(input_video, self.opts.video_length, stride,self.opts.max_res)