File size: 5,900 Bytes
926c659
 
 
9493512
926c659
3150bd7
 
 
f7f713a
3150bd7
926c659
9493512
 
 
 
 
 
 
 
 
 
 
99de6a6
2d2623a
99de6a6
 
9493512
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
926c659
9493512
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import spaces
import os

import torch

import subprocess
package_to_uninstall = "flash-attn"
command = ["python", "-m", "pip", "uninstall", "-y", package_to_uninstall]
result = subprocess.run(command, check=True, capture_output=True, text=True)

import gradio as gr
import imageio
import time
import random
import gc
from PIL import Image

# Import necessary components from the cloned repository
from skyreels_v2_infer.modules import download_model
from skyreels_v2_infer.pipelines import Image2VideoPipeline, resizecrop

# --- Global Configuration & Model Loading ---
#MODEL_ID = "Skywork/SkyReels-V2-I2V-14B-720P"
MODEL_ID = "Skywork/SkyReels-V2-I2V-1.3B-540P"
HEIGHT = 540
WIDTH = 540
OUTPUT_DIR = "video_out"

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Downloading and loading model... This may take a while.")
# Download model files to the cache
cached_model_path = download_model(MODEL_ID)

# Load the pipeline. This is done once when the Space starts.
# We enable offload by default to be compatible with GPUs like A10G-Large (24GB VRAM)
pipe = Image2VideoPipeline(
    model_path=cached_model_path,
    dit_path=cached_model_path,
    use_usp=False,
    offload=True  # Enable CPU offload to save VRAM
)
print("Model loaded successfully.")

# --- Inference Function ---
@spaces.GPU(required=True)
def generate_video(input_image, prompt, guidance_scale, inference_steps, num_frames, fps, seed):
    """
    Main function to generate video from an image and a prompt.
    """
    if input_image is None:
        raise gr.Error("You must upload an initial image.")
    if not prompt:
        raise gr.Error("Prompt cannot be empty.")

    # Use provided seed or generate a random one
    if seed == -1:
        seed = random.randint(0, 2**32 - 1)
    
    generator = torch.Generator(device="cuda").manual_seed(seed)
    
    # Prepare the input image (resize and crop)
    image = Image.fromarray(input_image).convert("RGB")
    processed_image = resizecrop(image, HEIGHT, WIDTH)

    # Define a default negative prompt
    negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, worst quality, low quality, JPEG compression residue, ugly, deformed."

    # Set up generation parameters
    kwargs = {
        "image": processed_image,
        "prompt": prompt,
        "negative_prompt": negative_prompt,
        "num_frames": num_frames,
        "num_inference_steps": inference_steps,
        "guidance_scale": guidance_scale,
        "shift": 8.0, # Default value from original script
        "generator": generator,
        "height": HEIGHT,
        "width": WIDTH,
    }
    
    print(f"Generating video with seed: {seed}")
    start_time = time.time()
    
    # Run inference
    with torch.cuda.amp.autocast(dtype=pipe.transformer.dtype), torch.no_grad():
        video_frames = pipe(**kwargs)[0]

    end_time = time.time()
    print(f"Inference took {end_time - start_time:.2f} seconds.")

    # Save the output video
    # Sanitize prompt for filename
    safe_prompt = "".join(c for c in prompt if c.isalnum() or c in " _-").strip()[:50]
    output_filename = f"{safe_prompt}_{seed}.mp4"
    output_path = os.path.join(OUTPUT_DIR, output_filename)
    
    imageio.mimwrite(output_path, video_frames, fps=fps, quality=8, output_params=["-loglevel", "error"])
    
    print(f"Video saved to {output_path}")
    
    # Clean up memory
    gc.collect()
    torch.cuda.empty_cache()
    
    return output_path

# --- Gradio UI ---
with gr.Blocks(css="footer {display: none !important}") as demo:
    gr.Markdown(
        """
        # SkyReels-V2 Image-to-Video Generator
        ### Model: Skywork/SkyReels-V2-I2V-14B-720P
        This Space demonstrates the SkyReels V2 model for generating video from a single starting image and a text prompt.
        **Note:** This is a very large model. Generation can take several minutes, even on powerful GPUs.
        """
    )
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="numpy", label="Initial Image")
            prompt = gr.Textbox(label="Prompt", placeholder="e.g., A cinematic shot of a car driving on a rainy street at night.")
            
            with gr.Accordion("Advanced Settings", open=False):
                guidance_scale = gr.Slider(minimum=1.0, maximum=15.0, value=6.0, step=0.5, label="Guidance Scale")
                inference_steps = gr.Slider(minimum=10, maximum=100, value=30, step=1, label="Inference Steps")
                num_frames = gr.Slider(minimum=25, maximum=145, value=97, step=8, label="Number of Frames")
                fps = gr.Slider(minimum=8, maximum=30, value=24, step=1, label="Frames Per Second (FPS)")
                seed = gr.Number(value=-1, label="Seed (-1 for random)")
        
        with gr.Column():
            output_video = gr.Video(label="Generated Video")
            run_button = gr.Button("Generate Video", variant="primary")

    gr.Examples(
        examples=[
            ["./examples/car.png", "A cinematic shot of a car driving on a rainy street at night, neon lights reflecting on the wet pavement.", 7.0, 30, 97, 24, 12345],
            ["./examples/castle.png", "An epic fantasy castle in the mountains, dragons flying in the sky, cinematic lighting.", 6.0, 40, 97, 12, 54321],
        ],
        inputs=[input_image, prompt, guidance_scale, inference_steps, num_frames, fps, seed],
        outputs=output_video,
        fn=generate_video,
        cache_examples=False, # Set to True if you have GPU and want to pre-process examples
    )

    # Add example images to your space in a folder named 'examples' for this to work
    # Or simply remove the gr.Examples block.

    run_button.click(fn=generate_video, inputs=[input_image, prompt, guidance_scale, inference_steps, num_frames, fps, seed], outputs=output_video)

if __name__ == "__main__":
    demo.launch()