Spaces:

1inkusFace
/

SkyReels

Build error

App Files Files Community

1inkusFace commited on Mar 6

Commit

4d4355a

verified ·

1 Parent(s): 5abf5ea

Update app.py

Browse files

Files changed (1) hide show

app.py +207 -70

app.py CHANGED Viewed

@@ -27,100 +27,247 @@ torch.backends.cudnn.allow_tf32 = False
 torch.backends.cudnn.deterministic = False
 torch.backends.cudnn.benchmark = False
 torch.set_float32_matmul_precision("highest")
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 logger = logging.getLogger(__name__)
-_predictor = None
-task_type = TaskType.I2V
-def init_predictor():
-    global _predictor
-    global task_type  # Access global task_type
-    logger = logging.getLogger(__name__)  # Logger within function
-    if _predictor is None:
-        if task_type == TaskType.I2V:
-            model_id = "Skywork/SkyReels-V1-Hunyuan-I2V"
-        elif task_type == TaskType.T2V:
-            model_id = "your_t2v_model_id"  # REPLACE with your T2V model
         else:
-            raise ValueError(f"Invalid task_type: {task_type}")
-        _predictor = SkyReelsVideoSingleGpuInfer(
-            task_type=task_type,  # Pass the task_type
-            model_id=model_id,
-            quant_model=True,
-            is_offload=True,
-            offload_config=OffloadConfig(
-                high_cpu_memory=True,
-                parameters_level=True,
-                compiler_transformer=False,
-            ),
-        )
-        _predictor.initialize()
-        logger.info("Predictor initialized")
-    else:
-        logger.warning("Predictor already initialized (should be rare).")
 @spaces.GPU(duration=90)
 def generate_video(prompt, seed, image=None):
     global _predictor
-    global task_type
     if seed == -1:
         random.seed()
         seed = int(random.randrange(4294967294))
-    kwargs = {
-        "prompt": prompt,
-        "height": 512,
-        "width": 512,
-        "num_frames": 97,
-        "num_inference_steps": 30,
-        "seed": seed,
-        "guidance_scale": 6.0,
-        "embedded_guidance_scale": 1.0,
-        "negative_prompt": "Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion",
-        "cfg_for": False,
-    }
-    if task_type == TaskType.I2V:
-        assert image is not None, "Please input an image for I2V task."
-        kwargs["image"] = Image.open(image)
-    elif task_type == TaskType.T2V:
-      pass #No image necessary
     else:
-      raise ValueError(f"Invalid Tasktype: {task_type}")
     if _predictor is None:
-        init_predictor()
     output = _predictor.infer(**kwargs)
     output = (output.cpu().numpy() * 255).astype(np.uint8)
     output = output.transpose(0, 2, 3, 4, 1)
-    save_dir = f"./result/{task_type.name}"
     os.makedirs(save_dir, exist_ok=True)
     video_out_file = f"{save_dir}/{prompt[:100].replace('/','')}_{seed}.mp4"
     print(f"generate video, local path: {video_out_file}")
-    export_to_video(output, video_out_file, fps=24)
     return video_out_file, kwargs
 def create_gradio_interface():
     with gr.Blocks() as demo:
         with gr.Row():
-          with gr.Column():
-            image = gr.Image(label="Upload Image", type="filepath")
-            prompt = gr.Textbox(label="Input Prompt")
-            seed = gr.Number(label="Random Seed", value=-1)
-          with gr.Column():
-            submit_button = gr.Button("Generate Video")
-            output_video = gr.Video(label="Generated Video")
-            output_params = gr.Textbox(label="Output Parameters")
         submit_button.click(
             fn=generate_video,
             inputs=[prompt, seed, image],
@@ -130,15 +277,5 @@ def create_gradio_interface():
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--task_type", type=str, default="i2v", choices=["t2v", "i2v"],
-                        help="Task type, 't2v' for text-to-video, 'i2v' for image-to-video.")
-    args = parser.parse_args()
-    if args.task_type == "t2v":
-        task_type = TaskType.T2V
-    elif args.task_type == "i2v":
-        task_type = TaskType.I2V
     demo = create_gradio_interface()
     demo.queue().launch()

 torch.backends.cudnn.deterministic = False
 torch.backends.cudnn.benchmark = False
 torch.set_float32_matmul_precision("highest")
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 logger = logging.getLogger(__name__)
+# --- Dummy Classes (Keep for standalone execution) ---
+class OffloadConfig:
+    def __init__(self, high_cpu_memory=False, parameters_level=False, compiler_transformer=False, compiler_cache=""):
+        self.high_cpu_memory = high_cpu_memory
+        self.parameters_level = parameters_level
+        self.compiler_transformer = compiler_transformer
+        self.compiler_cache = compiler_cache
+class TaskType: #Keep here for infer
+    T2V = 0
+    I2V = 1
+class LlamaModel:
+    @staticmethod
+    def from_pretrained(*args, **kwargs):
+        return LlamaModel()
+    def to(self, device):
+        return self
+class HunyuanVideoTransformer3DModel:
+    @staticmethod
+    def from_pretrained(*args, **kwargs):
+        return HunyuanVideoTransformer3DModel()
+    def to(self, device):
+        return self
+class SkyreelsVideoPipeline:
+    @staticmethod
+    def from_pretrained(*args, **kwargs):
+        return SkyreelsVideoPipeline()
+    def to(self, device):
+        return self
+    def __call__(self, *args, **kwargs):
+        frames = torch.randn(1, 3, 16, 512, 512)  # Correct dummy output
+        return type('obj', (object,), {'frames' : [frames]})()
+    def __init__(self):
+      super().__init__()
+      self._modules = OrderedDict()
+      self.vae = self.VAE()
+      self._modules["vae"] = self.vae
+    def named_children(self):
+      return self._modules.items()
+    class VAE:
+        def enable_tiling(self):
+            pass
+def quantize_(*args, **kwargs):
+    return
+def float8_weight_only():
+    return
+# --- End Dummy Classes ---
+class SkyReelsVideoSingleGpuInfer:
+    def _load_model(self, model_id: str, base_model_id: str = "hunyuanvideo-community/HunyuanVideo", quant_model: bool = True):
+        logger.info(f"load model model_id:{model_id} quan_model:{quant_model}")
+        text_encoder = LlamaModel.from_pretrained(
+            base_model_id, subfolder="text_encoder", torch_dtype=torch.bfloat16
+        ).to("cpu")
+        transformer = HunyuanVideoTransformer3DModel.from_pretrained(
+            model_id, torch_dtype=torch.bfloat16, device="cpu"
+        ).to("cpu")
+        if quant_model:
+            quantize_(text_encoder, float8_weight_only())
+            text_encoder.to("cpu")
+            torch.cuda.empty_cache()
+            quantize_(transformer, float8_weight_only())
+            transformer.to("cpu")
+            torch.cuda.empty_cache()
+        pipe = SkyreelsVideoPipeline.from_pretrained(
+            base_model_id, transformer=transformer, text_encoder=text_encoder, torch_dtype=torch.bfloat16
+        ).to("cpu")
+        pipe.vae.enable_tiling()
+        torch.cuda.empty_cache()
+        return pipe
+    def __init__(
+        self,
+        task_type: TaskType,
+        model_id: str,
+        quant_model: bool = True,
+        is_offload: bool = True,
+        offload_config: OffloadConfig = OffloadConfig(),
+        enable_cfg_parallel: bool = True,
+    ):
+        self.task_type = task_type
+        self.model_id = model_id
+        self.quant_model = quant_model
+        self.is_offload = is_offload
+        self.offload_config = offload_config
+        self.enable_cfg_parallel = enable_cfg_parallel
+        self.pipe = None
+        self.is_initialized = False
+        self.gpu_device = None
+    def initialize(self):
+        """Initializes the model and moves it to the GPU."""
+        if self.is_initialized:
+            return
+        if not torch.cuda.is_available():
+            raise RuntimeError("CUDA is not available. Cannot initialize model.")
+        self.gpu_device = "cuda:0"
+        self.pipe = self._load_model(model_id=self.model_id, quant_model=self.quant_model)
+        if self.is_offload:
+          pass
         else:
+            self.pipe.to(self.gpu_device)
+        if self.offload_config.compiler_transformer:
+            torch._dynamo.config.suppress_errors = True
+            os.environ["TORCHINDUCTOR_FX_GRAPH_CACHE"] = "1"
+            os.environ["TORCHINDUCTOR_CACHE_DIR"] = f"{self.offload_config.compiler_cache}"
+            self.pipe.transformer = torch.compile(
+                self.pipe.transformer, mode="max-autotune-no-cudagraphs", dynamic=True
+            )
+            if self.offload_config.compiler_transformer:
+                self.warm_up()
+        self.is_initialized = True
+    def warm_up(self):
+      if not self.is_initialized:
+          raise RuntimeError("Model must be initialized before warm-up.")
+      init_kwargs = {
+            "prompt": "A woman is dancing in a room",
+            "height": 544,
+            "width": 960,
+            "guidance_scale": 6,
+            "num_inference_steps": 1,
+            "negative_prompt": "bad quality",
+            "num_frames": 16,
+            "generator": torch.Generator(self.gpu_device).manual_seed(42),
+            "embedded_guidance_scale": 1.0,
+        }
+      if self.task_type == TaskType.I2V:
+        init_kwargs["image"] = Image.new("RGB",(544,960), color="black")
+      self.pipe(**init_kwargs)
+      logger.info("Warm-up complete.")
+    def infer(self, **kwargs):
+        """Handles inference requests."""
+        if not self.is_initialized:
+          self.initialize()
+        if "seed" in kwargs:
+            kwargs["generator"] = torch.Generator(self.gpu_device).manual_seed(kwargs["seed"])
+            del kwargs["seed"]
+        assert (self.task_type == TaskType.I2V and "image" in kwargs) or self.task_type == TaskType.T2V
+        result = self.pipe(**kwargs).frames[0]
+        return result
+_predictor = None  # Global _predictor
 @spaces.GPU(duration=90)
 def generate_video(prompt, seed, image=None):
     global _predictor
     if seed == -1:
         random.seed()
         seed = int(random.randrange(4294967294))
+    if image is None:
+        task_type = TaskType.T2V
+        model_id = "Skywork/SkyReels-V1-Hunyuan-T2V" # Need to change this when you use the real model.
+        kwargs = {  # Text-to-Video kwargs
+            "prompt": prompt,
+            "height": 512,
+            "width": 512,
+            "num_frames": 16,  # Use a reasonable default
+            "num_inference_steps": 30,
+            "seed": seed,
+            "guidance_scale": 7.5,  # Adjust as needed
+            "negative_prompt": "bad quality, worst quality",  # Your negative prompt
+        }
     else:
+        task_type = TaskType.I2V
+        model_id = "Skywork/SkyReels-V1-Hunyuan-I2V"
+        kwargs = {  # Image-to-Video kwargs
+            "prompt": prompt,
+            "image": Image.open(image),
+            "height": 512,
+            "width": 512,
+            "num_frames": 97,
+            "num_inference_steps": 30,
+            "seed": seed,
+            "guidance_scale": 6.0,
+            "embedded_guidance_scale": 1.0,
+            "negative_prompt": "Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion",
+            "cfg_for": False,
+        }
     if _predictor is None:
+        # Initialize _predictor based on task type
+        _predictor = SkyReelsVideoSingleGpuInfer(
+            task_type=task_type,
+            model_id=model_id,
+            quant_model=True,
+            is_offload=True,
+            offload_config=OffloadConfig(
+                high_cpu_memory=True,
+                parameters_level=True,
+                compiler_transformer=False,  # Change to True for warm-up
+            ),
+        )
+        _predictor.initialize()
+        logger.info("Predictor initialized")
     output = _predictor.infer(**kwargs)
+    # Convert and save video
     output = (output.cpu().numpy() * 255).astype(np.uint8)
     output = output.transpose(0, 2, 3, 4, 1)
+    save_dir = f"./result/{task_type.name}"  # Use task_type.name
     os.makedirs(save_dir, exist_ok=True)
     video_out_file = f"{save_dir}/{prompt[:100].replace('/','')}_{seed}.mp4"
     print(f"generate video, local path: {video_out_file}")
+    export_to_video(output, video_out_file, fps=24)  # Use a reasonable FPS
     return video_out_file, kwargs
 def create_gradio_interface():
     with gr.Blocks() as demo:
         with gr.Row():
+            with gr.Column():
+                image = gr.Image(label="Upload Image", type="filepath")
+                prompt = gr.Textbox(label="Input Prompt")
+                seed = gr.Number(label="Random Seed", value=-1)
+            with gr.Column():
+                submit_button = gr.Button("Generate Video")
+                output_video = gr.Video(label="Generated Video")
+                output_params = gr.Textbox(label="Output Parameters")
         submit_button.click(
             fn=generate_video,
             inputs=[prompt, seed, image],
 if __name__ == "__main__":
     demo = create_gradio_interface()
     demo.queue().launch()