Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,289 Bytes
e18fe42 44ef737 e18fe42 44ef737 e18fe42 7f14f6f 37b2b3a 44ef737 7f14f6f e18fe42 37b2b3a 7f14f6f 44ef737 7f14f6f 1d2fe7b 7f14f6f 44ef737 7f14f6f 4df0ad4 7f14f6f e18fe42 44ef737 e18fe42 37b2b3a e18fe42 37b2b3a e18fe42 7f14f6f 37b2b3a 44ef737 e18fe42 37b2b3a e18fe42 37b2b3a e18fe42 37b2b3a e18fe42 44ef737 e18fe42 37b2b3a e18fe42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import os
os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces')
import torch
import gradio as gr
import tempfile
import random
import numpy as np
import spaces
from diffusers import WanPipeline, AutoencoderKLWan
from diffusers.utils import export_to_video
# Constants
MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers"
MAX_SEED = np.iinfo(np.int32).max
FIXED_FPS = 16
DEFAULT_NEGATIVE_PROMPT = (
"色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,"
"最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,"
"画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
)
# Setup
dtype = torch.float16 # using float16 for broader compatibility
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Load model components on correct device
vae = AutoencoderKLWan.from_pretrained(
MODEL_ID, subfolder="vae", torch_dtype=torch.float32
).to(device)
pipe = WanPipeline.from_pretrained(
MODEL_ID, vae=vae, torch_dtype=dtype
).to(device)
# Warm-up call to reduce cold-start latency
_ = pipe(
prompt="warmup",
negative_prompt=DEFAULT_NEGATIVE_PROMPT,
height=512,
width=768,
num_frames=8,
num_inference_steps=2,
generator=torch.Generator(device=device).manual_seed(0),
).frames[0]
# Estimate duration for Hugging Face Spaces GPU usage
def get_duration(prompt, negative_prompt, height, width, num_frames, guidance_scale, guidance_scale_2, num_steps, seed, randomize_seed):
return int(num_steps * 15)
@spaces.GPU(duration=get_duration)
def generate_video(
prompt,
negative_prompt,
height,
width,
num_frames,
guidance_scale,
guidance_scale_2,
num_steps,
seed,
randomize_seed
):
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
generator = torch.Generator(device=device).manual_seed(current_seed)
output = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
height=height,
width=width,
num_frames=num_frames,
guidance_scale=guidance_scale,
guidance_scale_2=guidance_scale_2,
num_inference_steps=num_steps,
generator=generator,
).frames[0]
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
export_to_video(output, tmpfile.name, fps=FIXED_FPS)
return tmpfile.name, current_seed
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🎬 Wan2.2 Text-to-Video Generator with Hugging Face Spaces GPU")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Prompt", value="Two anthropomorphic cats in comfy boxing gear fight intensely.")
negative_prompt = gr.Textbox(label="Negative Prompt", value=DEFAULT_NEGATIVE_PROMPT, lines=3)
height = gr.Slider(360, 1024, value=720, step=16, label="Height")
width = gr.Slider(360, 1920, value=1280, step=16, label="Width")
num_frames = gr.Slider(8, 81, value=81, step=1, label="Number of Frames")
num_steps = gr.Slider(10, 60, value=40, step=1, label="Inference Steps")
guidance_scale = gr.Slider(1.0, 10.0, value=4.0, step=0.5, label="Guidance Scale")
guidance_scale_2 = gr.Slider(1.0, 10.0, value=3.0, step=0.5, label="Guidance Scale 2")
seed = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed")
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
generate_button = gr.Button("🎥 Generate Video")
with gr.Column():
video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
final_seed_display = gr.Number(label="Used Seed", interactive=False)
generate_button.click(
fn=generate_video,
inputs=[prompt, negative_prompt, height, width, num_frames, guidance_scale, guidance_scale_2, num_steps, seed, randomize_seed],
outputs=[video_output, final_seed_display],
)
if __name__ == "__main__":
demo.queue().launch()
|