import spaces import os import torch import subprocess package_to_uninstall = "flash-attn" command = ["python", "-m", "pip", "uninstall", "-y", package_to_uninstall] result = subprocess.run(command, check=True, capture_output=True, text=True) import gradio as gr import imageio import time import random import gc from PIL import Image # Import necessary components from the cloned repository from skyreels_v2_infer.modules import download_model from skyreels_v2_infer.pipelines import Image2VideoPipeline, resizecrop # --- Global Configuration & Model Loading --- #MODEL_ID = "Skywork/SkyReels-V2-I2V-14B-720P" MODEL_ID = "Skywork/SkyReels-V2-I2V-1.3B-540P" HEIGHT = 540 WIDTH = 540 OUTPUT_DIR = "video_out" # Create output directory os.makedirs(OUTPUT_DIR, exist_ok=True) print("Downloading and loading model... This may take a while.") # Download model files to the cache cached_model_path = download_model(MODEL_ID) # Load the pipeline. This is done once when the Space starts. # We enable offload by default to be compatible with GPUs like A10G-Large (24GB VRAM) pipe = Image2VideoPipeline( model_path=cached_model_path, dit_path=cached_model_path, use_usp=False, offload=True # Enable CPU offload to save VRAM ) print("Model loaded successfully.") # --- Inference Function --- @spaces.GPU(required=True) def generate_video(input_image, prompt, guidance_scale, inference_steps, num_frames, fps, seed): """ Main function to generate video from an image and a prompt. """ if input_image is None: raise gr.Error("You must upload an initial image.") if not prompt: raise gr.Error("Prompt cannot be empty.") # Use provided seed or generate a random one if seed == -1: seed = random.randint(0, 2**32 - 1) generator = torch.Generator(device="cuda").manual_seed(seed) # Prepare the input image (resize and crop) image = Image.fromarray(input_image).convert("RGB") processed_image = resizecrop(image, HEIGHT, WIDTH) # Define a default negative prompt negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, worst quality, low quality, JPEG compression residue, ugly, deformed." # Set up generation parameters kwargs = { "image": processed_image, "prompt": prompt, "negative_prompt": negative_prompt, "num_frames": num_frames, "num_inference_steps": inference_steps, "guidance_scale": guidance_scale, "shift": 8.0, # Default value from original script "generator": generator, "height": HEIGHT, "width": WIDTH, } print(f"Generating video with seed: {seed}") start_time = time.time() # Run inference with torch.cuda.amp.autocast(dtype=pipe.transformer.dtype), torch.no_grad(): video_frames = pipe(**kwargs)[0] end_time = time.time() print(f"Inference took {end_time - start_time:.2f} seconds.") # Save the output video # Sanitize prompt for filename safe_prompt = "".join(c for c in prompt if c.isalnum() or c in " _-").strip()[:50] output_filename = f"{safe_prompt}_{seed}.mp4" output_path = os.path.join(OUTPUT_DIR, output_filename) imageio.mimwrite(output_path, video_frames, fps=fps, quality=8, output_params=["-loglevel", "error"]) print(f"Video saved to {output_path}") # Clean up memory gc.collect() torch.cuda.empty_cache() return output_path # --- Gradio UI --- with gr.Blocks(css="footer {display: none !important}") as demo: gr.Markdown( """ # SkyReels-V2 Image-to-Video Generator ### Model: Skywork/SkyReels-V2-I2V-14B-720P This Space demonstrates the SkyReels V2 model for generating video from a single starting image and a text prompt. **Note:** This is a very large model. Generation can take several minutes, even on powerful GPUs. """ ) with gr.Row(): with gr.Column(): input_image = gr.Image(type="numpy", label="Initial Image") prompt = gr.Textbox(label="Prompt", placeholder="e.g., A cinematic shot of a car driving on a rainy street at night.") with gr.Accordion("Advanced Settings", open=False): guidance_scale = gr.Slider(minimum=1.0, maximum=15.0, value=6.0, step=0.5, label="Guidance Scale") inference_steps = gr.Slider(minimum=10, maximum=100, value=30, step=1, label="Inference Steps") num_frames = gr.Slider(minimum=25, maximum=145, value=97, step=8, label="Number of Frames") fps = gr.Slider(minimum=8, maximum=30, value=24, step=1, label="Frames Per Second (FPS)") seed = gr.Number(value=-1, label="Seed (-1 for random)") with gr.Column(): output_video = gr.Video(label="Generated Video") run_button = gr.Button("Generate Video", variant="primary") gr.Examples( examples=[ ["./examples/car.png", "A cinematic shot of a car driving on a rainy street at night, neon lights reflecting on the wet pavement.", 7.0, 30, 97, 24, 12345], ["./examples/castle.png", "An epic fantasy castle in the mountains, dragons flying in the sky, cinematic lighting.", 6.0, 40, 97, 12, 54321], ], inputs=[input_image, prompt, guidance_scale, inference_steps, num_frames, fps, seed], outputs=output_video, fn=generate_video, cache_examples=False, # Set to True if you have GPU and want to pre-process examples ) # Add example images to your space in a folder named 'examples' for this to work # Or simply remove the gr.Examples block. run_button.click(fn=generate_video, inputs=[input_image, prompt, guidance_scale, inference_steps, num_frames, fps, seed], outputs=output_video) if __name__ == "__main__": demo.launch()