Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,900 Bytes
926c659 9493512 926c659 3150bd7 f7f713a 3150bd7 926c659 9493512 99de6a6 2d2623a 99de6a6 9493512 926c659 9493512 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import spaces
import os
import torch
import subprocess
package_to_uninstall = "flash-attn"
command = ["python", "-m", "pip", "uninstall", "-y", package_to_uninstall]
result = subprocess.run(command, check=True, capture_output=True, text=True)
import gradio as gr
import imageio
import time
import random
import gc
from PIL import Image
# Import necessary components from the cloned repository
from skyreels_v2_infer.modules import download_model
from skyreels_v2_infer.pipelines import Image2VideoPipeline, resizecrop
# --- Global Configuration & Model Loading ---
#MODEL_ID = "Skywork/SkyReels-V2-I2V-14B-720P"
MODEL_ID = "Skywork/SkyReels-V2-I2V-1.3B-540P"
HEIGHT = 540
WIDTH = 540
OUTPUT_DIR = "video_out"
# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)
print("Downloading and loading model... This may take a while.")
# Download model files to the cache
cached_model_path = download_model(MODEL_ID)
# Load the pipeline. This is done once when the Space starts.
# We enable offload by default to be compatible with GPUs like A10G-Large (24GB VRAM)
pipe = Image2VideoPipeline(
model_path=cached_model_path,
dit_path=cached_model_path,
use_usp=False,
offload=True # Enable CPU offload to save VRAM
)
print("Model loaded successfully.")
# --- Inference Function ---
@spaces.GPU(required=True)
def generate_video(input_image, prompt, guidance_scale, inference_steps, num_frames, fps, seed):
"""
Main function to generate video from an image and a prompt.
"""
if input_image is None:
raise gr.Error("You must upload an initial image.")
if not prompt:
raise gr.Error("Prompt cannot be empty.")
# Use provided seed or generate a random one
if seed == -1:
seed = random.randint(0, 2**32 - 1)
generator = torch.Generator(device="cuda").manual_seed(seed)
# Prepare the input image (resize and crop)
image = Image.fromarray(input_image).convert("RGB")
processed_image = resizecrop(image, HEIGHT, WIDTH)
# Define a default negative prompt
negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, worst quality, low quality, JPEG compression residue, ugly, deformed."
# Set up generation parameters
kwargs = {
"image": processed_image,
"prompt": prompt,
"negative_prompt": negative_prompt,
"num_frames": num_frames,
"num_inference_steps": inference_steps,
"guidance_scale": guidance_scale,
"shift": 8.0, # Default value from original script
"generator": generator,
"height": HEIGHT,
"width": WIDTH,
}
print(f"Generating video with seed: {seed}")
start_time = time.time()
# Run inference
with torch.cuda.amp.autocast(dtype=pipe.transformer.dtype), torch.no_grad():
video_frames = pipe(**kwargs)[0]
end_time = time.time()
print(f"Inference took {end_time - start_time:.2f} seconds.")
# Save the output video
# Sanitize prompt for filename
safe_prompt = "".join(c for c in prompt if c.isalnum() or c in " _-").strip()[:50]
output_filename = f"{safe_prompt}_{seed}.mp4"
output_path = os.path.join(OUTPUT_DIR, output_filename)
imageio.mimwrite(output_path, video_frames, fps=fps, quality=8, output_params=["-loglevel", "error"])
print(f"Video saved to {output_path}")
# Clean up memory
gc.collect()
torch.cuda.empty_cache()
return output_path
# --- Gradio UI ---
with gr.Blocks(css="footer {display: none !important}") as demo:
gr.Markdown(
"""
# SkyReels-V2 Image-to-Video Generator
### Model: Skywork/SkyReels-V2-I2V-14B-720P
This Space demonstrates the SkyReels V2 model for generating video from a single starting image and a text prompt.
**Note:** This is a very large model. Generation can take several minutes, even on powerful GPUs.
"""
)
with gr.Row():
with gr.Column():
input_image = gr.Image(type="numpy", label="Initial Image")
prompt = gr.Textbox(label="Prompt", placeholder="e.g., A cinematic shot of a car driving on a rainy street at night.")
with gr.Accordion("Advanced Settings", open=False):
guidance_scale = gr.Slider(minimum=1.0, maximum=15.0, value=6.0, step=0.5, label="Guidance Scale")
inference_steps = gr.Slider(minimum=10, maximum=100, value=30, step=1, label="Inference Steps")
num_frames = gr.Slider(minimum=25, maximum=145, value=97, step=8, label="Number of Frames")
fps = gr.Slider(minimum=8, maximum=30, value=24, step=1, label="Frames Per Second (FPS)")
seed = gr.Number(value=-1, label="Seed (-1 for random)")
with gr.Column():
output_video = gr.Video(label="Generated Video")
run_button = gr.Button("Generate Video", variant="primary")
gr.Examples(
examples=[
["./examples/car.png", "A cinematic shot of a car driving on a rainy street at night, neon lights reflecting on the wet pavement.", 7.0, 30, 97, 24, 12345],
["./examples/castle.png", "An epic fantasy castle in the mountains, dragons flying in the sky, cinematic lighting.", 6.0, 40, 97, 12, 54321],
],
inputs=[input_image, prompt, guidance_scale, inference_steps, num_frames, fps, seed],
outputs=output_video,
fn=generate_video,
cache_examples=False, # Set to True if you have GPU and want to pre-process examples
)
# Add example images to your space in a folder named 'examples' for this to work
# Or simply remove the gr.Examples block.
run_button.click(fn=generate_video, inputs=[input_image, prompt, guidance_scale, inference_steps, num_frames, fps, seed], outputs=output_video)
if __name__ == "__main__":
demo.launch() |