YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
- Tuned by
- https://huggingface.co/datasets/svjack/video-dataset-genshin-impact-ep-landscape-organized
- https://huggingface.co/datasets/svjack/video-dataset-genshin-impact-ep-character-organized
- To have video in game style in tiny dataset
Installtion
pip install git+https://github.com/huggingface/diffusers.git peft transformers torch sentencepiece opencv-python
Example
LandScape Example
from diffusers import MochiPipeline
from diffusers.utils import export_to_video
import torch
pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview", torch_dtype = torch.float16)
pipe.load_lora_weights("svjack/mochi_game_mix_early_lora")
pipe.enable_model_cpu_offload()
pipe.enable_sequential_cpu_offload()
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()
i = 50
generator = torch.Generator("cpu").manual_seed(i)
pipeline_args = {
"prompt": "The video presents a tranquil scene of a small, isolated island with a rocky outcrop. The island is covered in lush greenery and dotted with vibrant pink cherry blossom trees in full bloom. A traditional-style building with a pagoda-like roof stands prominently on the highest point of the island, suggesting a cultural or historical significance. The sky above is a gradient of soft pastel colors, transitioning from light blue to pink, indicating either dawn or dusk. The water surrounding the island is calm, reflecting the colors of the sky and the island's features. There are no visible people or moving objects, giving the scene a serene and untouched quality.",
"guidance_scale": 6.0,
"num_inference_steps": 64,
"height": 480,
"width": 848,
"max_sequence_length": 1024,
"output_type": "np",
"num_frames": 19,
"generator": generator
}
video = pipe(**pipeline_args).frames[0]
export_to_video(video, "Island_scene_mix.mp4")
from IPython import display
display.clear_output(wait = True)
display.Video("Island_scene_mix.mp4")
- Without lora
- With lora
Character Example
prompt = "The video opens with a close-up of a woman in a white and purple outfit, holding a glowing purple butterfly. She has dark hair and walks gracefully through a traditional Japanese-style village at night."
i = 50
generator = torch.Generator("cpu").manual_seed(i)
pipeline_args = {
"prompt": prompt,
"guidance_scale": 6.0,
"num_inference_steps": 64,
"height": 480,
"width": 848,
"max_sequence_length": 1024,
"output_type": "np",
"num_frames": 19,
"generator": generator
}
video = pipe(**pipeline_args).frames[0]
export_to_video(video, "char_scene_mix_{}.mp4".format(i))
from IPython import display
display.clear_output(wait = True)
display.Video("char_scene_mix_{}.mp4".format(i))
- Without lora
- With lora
STG Example (use STG make better video)
Additional Installtion
git clone https://github.com/svjack/STGuidance
cd STGuidance/diffusers/mochi
LandScape Example
import torch
from pipeline_stg_mochi import MochiSTGPipeline
from diffusers.utils import export_to_video
import os
# Load the pipeline
pipe = MochiSTGPipeline.from_pretrained("genmo/mochi-1-preview", variant="bf16", torch_dtype=torch.bfloat16)
pipe.load_lora_weights("svjack/mochi_game_mix_early_lora")
pipe.enable_model_cpu_offload()
pipe.enable_sequential_cpu_offload()
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()
pipe.enable_vae_tiling()
#pipe = pipe.to("cuda")
#--------Option--------#
prompt = "The video presents a tranquil scene of a small, isolated island with a rocky outcrop. The island is covered in lush greenery and dotted with vibrant pink cherry blossom trees in full bloom. A traditional-style building with a pagoda-like roof stands prominently on the highest point of the island, suggesting a cultural or historical significance. The sky above is a gradient of soft pastel colors, transitioning from light blue to pink, indicating either dawn or dusk. The water surrounding the island is calm, reflecting the colors of the sky and the island's features. There are no visible people or moving objects, giving the scene a serene and untouched quality."
stg_mode = "STG-R"
stg_applied_layers_idx = [35]
stg_scale = 0.8 # 0.0 for CFG (default)
do_rescaling = True # False (default)
#----------------------#
# Generate video frames
frames = pipe(
prompt,
num_frames=84,
stg_mode=stg_mode,
stg_applied_layers_idx=stg_applied_layers_idx,
stg_scale=stg_scale,
do_rescaling=do_rescaling
).frames[0]
export_to_video(frames, "Island_scene_mix_stg.mp4")
from IPython import display
display.clear_output(wait = True)
display.Video("Island_scene_mix_stg.mp4")
- With lora and STG