Spaces:

1inkusFace
/

SkyReelsV2

Running on Zero

App Files Files Community

SkyReelsV2 / app.py

1inkusFace

Update app.py

2d2623a verified 2 days ago

raw

history blame contribute delete

5.9 kB

	import spaces
	import os

	import torch

	import subprocess
	package_to_uninstall = "flash-attn"
	command = ["python", "-m", "pip", "uninstall", "-y", package_to_uninstall]
	result = subprocess.run(command, check=True, capture_output=True, text=True)

	import gradio as gr
	import imageio
	import time
	import random
	import gc
	from PIL import Image

	# Import necessary components from the cloned repository
	from skyreels_v2_infer.modules import download_model
	from skyreels_v2_infer.pipelines import Image2VideoPipeline, resizecrop

	# --- Global Configuration & Model Loading ---
	#MODEL_ID = "Skywork/SkyReels-V2-I2V-14B-720P"
	MODEL_ID = "Skywork/SkyReels-V2-I2V-1.3B-540P"
	HEIGHT = 540
	WIDTH = 540
	OUTPUT_DIR = "video_out"

	# Create output directory
	os.makedirs(OUTPUT_DIR, exist_ok=True)

	print("Downloading and loading model... This may take a while.")
	# Download model files to the cache
	cached_model_path = download_model(MODEL_ID)

	# Load the pipeline. This is done once when the Space starts.
	# We enable offload by default to be compatible with GPUs like A10G-Large (24GB VRAM)
	pipe = Image2VideoPipeline(
	model_path=cached_model_path,
	dit_path=cached_model_path,
	use_usp=False,
	offload=True # Enable CPU offload to save VRAM
	)
	print("Model loaded successfully.")

	# --- Inference Function ---
	@spaces.GPU(required=True)
	def generate_video(input_image, prompt, guidance_scale, inference_steps, num_frames, fps, seed):
	"""
	Main function to generate video from an image and a prompt.
	"""
	if input_image is None:
	raise gr.Error("You must upload an initial image.")
	if not prompt:
	raise gr.Error("Prompt cannot be empty.")

	# Use provided seed or generate a random one
	if seed == -1:
	seed = random.randint(0, 2**32 - 1)

	generator = torch.Generator(device="cuda").manual_seed(seed)

	# Prepare the input image (resize and crop)
	image = Image.fromarray(input_image).convert("RGB")
	processed_image = resizecrop(image, HEIGHT, WIDTH)

	# Define a default negative prompt
	negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, worst quality, low quality, JPEG compression residue, ugly, deformed."

	# Set up generation parameters
	kwargs = {
	"image": processed_image,
	"prompt": prompt,
	"negative_prompt": negative_prompt,
	"num_frames": num_frames,
	"num_inference_steps": inference_steps,
	"guidance_scale": guidance_scale,
	"shift": 8.0, # Default value from original script
	"generator": generator,
	"height": HEIGHT,
	"width": WIDTH,
	}

	print(f"Generating video with seed: {seed}")
	start_time = time.time()

	# Run inference
	with torch.cuda.amp.autocast(dtype=pipe.transformer.dtype), torch.no_grad():
	video_frames = pipe(**kwargs)[0]

	end_time = time.time()
	print(f"Inference took {end_time - start_time:.2f} seconds.")

	# Save the output video
	# Sanitize prompt for filename
	safe_prompt = "".join(c for c in prompt if c.isalnum() or c in " _-").strip()[:50]
	output_filename = f"{safe_prompt}_{seed}.mp4"
	output_path = os.path.join(OUTPUT_DIR, output_filename)

	imageio.mimwrite(output_path, video_frames, fps=fps, quality=8, output_params=["-loglevel", "error"])

	print(f"Video saved to {output_path}")

	# Clean up memory
	gc.collect()
	torch.cuda.empty_cache()

	return output_path

	# --- Gradio UI ---
	with gr.Blocks(css="footer {display: none !important}") as demo:
	gr.Markdown(
	"""
	# SkyReels-V2 Image-to-Video Generator
	### Model: Skywork/SkyReels-V2-I2V-14B-720P
	This Space demonstrates the SkyReels V2 model for generating video from a single starting image and a text prompt.
	Note: This is a very large model. Generation can take several minutes, even on powerful GPUs.
	"""
	)
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(type="numpy", label="Initial Image")
	prompt = gr.Textbox(label="Prompt", placeholder="e.g., A cinematic shot of a car driving on a rainy street at night.")

	with gr.Accordion("Advanced Settings", open=False):
	guidance_scale = gr.Slider(minimum=1.0, maximum=15.0, value=6.0, step=0.5, label="Guidance Scale")
	inference_steps = gr.Slider(minimum=10, maximum=100, value=30, step=1, label="Inference Steps")
	num_frames = gr.Slider(minimum=25, maximum=145, value=97, step=8, label="Number of Frames")
	fps = gr.Slider(minimum=8, maximum=30, value=24, step=1, label="Frames Per Second (FPS)")
	seed = gr.Number(value=-1, label="Seed (-1 for random)")

	with gr.Column():
	output_video = gr.Video(label="Generated Video")
	run_button = gr.Button("Generate Video", variant="primary")

	gr.Examples(
	examples=[
	["./examples/car.png", "A cinematic shot of a car driving on a rainy street at night, neon lights reflecting on the wet pavement.", 7.0, 30, 97, 24, 12345],
	["./examples/castle.png", "An epic fantasy castle in the mountains, dragons flying in the sky, cinematic lighting.", 6.0, 40, 97, 12, 54321],
	],
	inputs=[input_image, prompt, guidance_scale, inference_steps, num_frames, fps, seed],
	outputs=output_video,
	fn=generate_video,
	cache_examples=False, # Set to True if you have GPU and want to pre-process examples
	)

	# Add example images to your space in a folder named 'examples' for this to work
	# Or simply remove the gr.Examples block.

	run_button.click(fn=generate_video, inputs=[input_image, prompt, guidance_scale, inference_steps, num_frames, fps, seed], outputs=output_video)

	if __name__ == "__main__":
	demo.launch()