Video-Diffusion-WebUI

Paused

App Files Files Community

Video-Diffusion-WebUI / video_diffusion /stable_diffusion_video /stable_video_text2video.py

wrdias

Duplicate from ArtGAN/Video-Diffusion-WebUI

2cc4443 over 2 years ago

raw

history blame

6.08 kB

	import gradio as gr
	import numpy as np
	import torch

	from video_diffusion.stable_diffusion_video.stable_diffusion_pipeline import StableDiffusionWalkPipeline
	from video_diffusion.utils.model_list import stable_model_list


	class StableDiffusionText2VideoGenerator:
	def __init__(self):
	self.pipe = None

	def load_model(
	self,
	model_path,
	):
	if self.pipe is None:
	self.pipe = StableDiffusionWalkPipeline.from_pretrained(
	model_path,
	torch_dtype=torch.float16,
	revision="fp16",
	)

	self.pipe.to("cuda")
	self.pipe.enable_xformers_memory_efficient_attention()
	self.pipe.enable_attention_slicing()

	return self.pipe

	def generate_video(
	self,
	model_path: str,
	first_prompts: str,
	second_prompts: str,
	negative_prompt: str,
	num_interpolation_steps: int,
	guidance_scale: int,
	num_inference_step: int,
	height: int,
	width: int,
	upsample: bool,
	fps=int,
	):
	first_seed = np.random.randint(0, 100000)
	second_seed = np.random.randint(0, 100000)
	seeds = [first_seed, second_seed]
	prompts = [first_prompts, second_prompts]
	pipe = self.load_model(model_path=model_path)

	output_video = pipe.walk(
	prompts=prompts,
	num_interpolation_steps=int(num_interpolation_steps),
	height=height,
	width=width,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_step,
	negative_prompt=negative_prompt,
	seeds=seeds,
	upsample=upsample,
	fps=fps,
	)

	return output_video

	def app():
	with gr.Blocks():
	with gr.Row():
	with gr.Column():
	stable_text2video_first_prompt = gr.Textbox(
	lines=1,
	placeholder="First Prompt",
	show_label=False,
	)
	stable_text2video_second_prompt = gr.Textbox(
	lines=1,
	placeholder="Second Prompt",
	show_label=False,
	)
	stable_text2video_negative_prompt = gr.Textbox(
	lines=1,
	placeholder="Negative Prompt ",
	show_label=False,
	)
	with gr.Row():
	with gr.Column():
	stable_text2video_model_path = gr.Dropdown(
	choices=stable_model_list,
	label="Stable Model List",
	value=stable_model_list[0],
	)
	stable_text2video_guidance_scale = gr.Slider(
	minimum=0,
	maximum=15,
	step=1,
	value=8.5,
	label="Guidance Scale",
	)
	stable_text2video_num_inference_steps = gr.Slider(
	minimum=1,
	maximum=100,
	step=1,
	value=30,
	label="Number of Inference Steps",
	)
	stable_text2video_fps = gr.Slider(
	minimum=1,
	maximum=60,
	step=1,
	value=10,
	label="Fps",
	)
	with gr.Row():
	with gr.Column():
	stable_text2video_num_interpolation_steps = gr.Number(
	value=10,
	label="Number of Interpolation Steps",
	)
	stable_text2video_height = gr.Slider(
	minimum=1,
	maximum=1000,
	step=1,
	value=512,
	label="Height",
	)
	stable_text2video_width = gr.Slider(
	minimum=1,
	maximum=1000,
	step=1,
	value=512,
	label="Width",
	)
	stable_text2video_upsample = gr.Checkbox(
	label="Upsample",
	default=False,
	)

	text2video_generate = gr.Button(value="Generator")

	with gr.Column():
	text2video_output = gr.Video(label="Output")

	text2video_generate.click(
	fn=StableDiffusionText2VideoGenerator().generate_video,
	inputs=[
	stable_text2video_model_path,
	stable_text2video_first_prompt,
	stable_text2video_second_prompt,
	stable_text2video_negative_prompt,
	stable_text2video_num_interpolation_steps,
	stable_text2video_guidance_scale,
	stable_text2video_num_inference_steps,
	stable_text2video_height,
	stable_text2video_width,
	stable_text2video_upsample,
	stable_text2video_fps,
	],
	outputs=text2video_output,
	)