FramePack

Running

App Files Files Community

Fabrice-TIERCELIN commited on 20 days ago

Commit

b214256

verified ·

1 Parent(s): 6ec566a

Improve examples

Browse files

Files changed (1) hide show

app.py +32 -26

app.py CHANGED Viewed

@@ -4,7 +4,10 @@ import os
 os.environ['HF_HOME'] = os.path.abspath(os.path.realpath(os.path.join(os.path.dirname(__file__), './hf_download')))
-import spaces
 import gradio as gr
 import torch
 import traceback
@@ -113,7 +116,6 @@ default_local_storage = {
         "generation-mode": "image",
     }
-@spaces.GPU()
 @torch.no_grad()
 def video_encode(video_path, resolution, no_resize, vae, vae_batch_size=16, device="cuda", width=None, height=None):
     """
@@ -449,8 +451,12 @@ def worker(input_image, image_position, prompts, n_prompt, seed, resolution, tot
                 section_latent_frames = latent_window_size * 2
                 overlapped_frames = latent_window_size * 4 - 3
-                real_history_latents = history_latents[:, :, :min(section_latent_frames, total_generated_latent_frames), :, :] if is_last_frame else history_latents[:, :, -min(section_latent_frames, total_generated_latent_frames):, :, :]
-                history_pixels = soft_append_bcthw(vae_decode(real_history_latents, vae).cpu(), history_pixels, overlapped_frames) if is_last_frame else soft_append_bcthw(history_pixels, vae_decode(real_history_latents, vae).cpu(), overlapped_frames)
             if not high_vram:
                 unload_complete_models()
@@ -535,7 +541,6 @@ def worker(input_image, image_position, prompts, n_prompt, seed, resolution, tot
     return
 # 20250506 pftq: Modified worker to accept video input and clean frame count
-@spaces.GPU()
 @torch.no_grad()
 def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
     def encode_prompt(prompt, n_prompt):
@@ -807,6 +812,7 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
 def get_duration(input_image, image_position, prompt, generation_mode, n_prompt, randomize_seed, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf):
     return total_second_length * 60 * (0.9 if use_teacache else 1.5) * (1 + ((steps - 25) / 100))
 @spaces.GPU(duration=get_duration)
 def process(input_image,
             image_position=0,
@@ -846,7 +852,7 @@ def process(input_image,
         input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
         print("No input image provided. Using a blank white image.")
-    yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
     stream = AsyncStream()
@@ -859,11 +865,11 @@ def process(input_image,
         if flag == 'file':
             output_filename = data
-            yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True), gr.update()
         if flag == 'progress':
             preview, desc, html = data
-            yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True), gr.update()
         if flag == 'end':
             end = time.time()
@@ -872,7 +878,7 @@ def process(input_image,
             secondes = secondes - (minutes * 60)
             hours = math.floor(minutes / 60)
             minutes = minutes - (hours * 60)
-            yield output_filename, gr.update(visible=False), gr.update(), "The process has lasted " + \
             ((str(hours) + " h, ") if hours != 0 else "") + \
             ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
             str(secondes) + " sec. " + \
@@ -882,7 +888,7 @@ def process(input_image,
 def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
     return total_second_length * 60 * (1.5 if use_teacache else 2.5) * (1 + ((steps - 25) / 100))
-# 20250506 pftq: Modified process to pass clean frame count, etc from video_encode
 @spaces.GPU(duration=get_duration_video)
 def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
     start = time.time()
@@ -901,7 +907,7 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
     # 20250506 pftq: Updated assertion for video input
     assert input_video is not None, 'No input video!'
-    yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
     # 20250507 pftq: Even the H100 needs offloading if the video dimensions are 720p or higher
     if high_vram and (no_resize or resolution>640):
@@ -928,11 +934,11 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
         if flag == 'file':
             output_filename = data
-            yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True), gr.update()
         if flag == 'progress':
             preview, desc, html = data
-            yield output_filename, gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True), gr.update() # 20250506 pftq: Keep refreshing the video in case it got hidden when the tab was in the background
         if flag == 'end':
             end = time.time()
@@ -941,7 +947,7 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
             secondes = secondes - (minutes * 60)
             hours = math.floor(minutes / 60)
             minutes = minutes - (hours * 60)
-            yield output_filename, gr.update(visible=False), desc + \
             " The process has lasted " + \
             ((str(hours) + " h, ") if hours != 0 else "") + \
             ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
@@ -1048,7 +1054,7 @@ with block:
                 n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
                 latent_window_size = gr.Slider(label="Latent Window Size", minimum=1, maximum=33, value=9, step=1, info='Generate more frames at a time (larger chunks). Less degradation and better blending but higher VRAM cost. Should not change.')
-                steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1, info='Increase for more quality, especially if using high non-distilled CFG. If your animation has very few motion, you may have brutal brightness change; this can be fixed increasing the steps.')
                 with gr.Row():
                     no_resize = gr.Checkbox(label='Force Original Video Resolution (no Resizing)', value=False, info='Might run out of VRAM (720p requires > 24GB VRAM).')
@@ -1090,8 +1096,8 @@ with block:
         with gr.Column():
             warning = gr.HTML(value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
             preview_image = gr.Image(label="Next Latents", height=200, visible=False)
-            result_video = gr.Video(label="Finished Frames", autoplay=True, show_share_button=False, height=512, loop=True)
             progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
             progress_bar = gr.HTML('', elem_classes='no-generating-animation')
@@ -1113,7 +1119,7 @@ with block:
                     672, # resolution
                     1, # total_second_length
                     9, # latent_window_size
-                    25, # steps
                     1.0, # cfg
                     10.0, # gs
                     0.0, # rs
@@ -1125,7 +1131,7 @@ with block:
                 [
                     "./img_examples/Example2.webp", # input_image
                     0, # image_position
-                    "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens",
                     "image", # generation_mode
                     "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
                     True, # randomize_seed
@@ -1133,7 +1139,7 @@ with block:
                     672, # resolution
                     2, # total_second_length
                     9, # latent_window_size
-                    25, # steps
                     1.0, # cfg
                     10.0, # gs
                     0.0, # rs
@@ -1145,7 +1151,7 @@ with block:
                 [
                     "./img_examples/Example2.webp", # input_image
                     0, # image_position
-                    "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens",
                     "image", # generation_mode
                     "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
                     True, # randomize_seed
@@ -1153,7 +1159,7 @@ with block:
                     672, # resolution
                     2, # total_second_length
                     9, # latent_window_size
-                    25, # steps
                     1.0, # cfg
                     10.0, # gs
                     0.0, # rs
@@ -1173,7 +1179,7 @@ with block:
                     672, # resolution
                     1, # total_second_length
                     9, # latent_window_size
-                    25, # steps
                     1.0, # cfg
                     10.0, # gs
                     0.0, # rs
@@ -1193,7 +1199,7 @@ with block:
                     672, # resolution
                     1, # total_second_length
                     9, # latent_window_size
-                    25, # steps
                     1.0, # cfg
                     10.0, # gs
                     0.0, # rs
@@ -1223,7 +1229,7 @@ with block:
                     672, # resolution
                     1, # total_second_length
                     9, # latent_window_size
-                    25, # steps
                     1.0, # cfg
                     10.0, # gs
                     0.0, # rs
@@ -1275,10 +1281,10 @@ with block:
     timeless_prompt.change(fn=handle_timeless_prompt_change, inputs=[timeless_prompt], outputs=[final_prompt])
     start_button.click(fn = check_parameters, inputs = [
         generation_mode, input_image, input_video
-    ], outputs = [end_button, warning], queue = False, show_progress = False).success(fn=process, inputs=ips, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, end_button, warning])
     start_button_video.click(fn = check_parameters, inputs = [
         generation_mode, input_image, input_video
-    ], outputs = [end_button, warning], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button, warning])
     end_button.click(fn=end_process)
     generation_mode.change(fn = save_preferences, inputs = [

 os.environ['HF_HOME'] = os.path.abspath(os.path.realpath(os.path.join(os.path.dirname(__file__), './hf_download')))
+try:
+    import spaces
+except:
+    print("Not on HuggingFace")
 import gradio as gr
 import torch
 import traceback
         "generation-mode": "image",
     }
 @torch.no_grad()
 def video_encode(video_path, resolution, no_resize, vae, vae_batch_size=16, device="cuda", width=None, height=None):
     """
                 section_latent_frames = latent_window_size * 2
                 overlapped_frames = latent_window_size * 4 - 3
+                if is_last_frame:
+                    real_history_latents = history_latents[:, :, :min(section_latent_frames, total_generated_latent_frames), :, :]
+                    history_pixels = soft_append_bcthw(vae_decode(real_history_latents, vae).cpu(), history_pixels, overlapped_frames)
+                else:
+                    real_history_latents = history_latents[:, :, -min(section_latent_frames, total_generated_latent_frames):, :, :]
+                    history_pixels = soft_append_bcthw(history_pixels, vae_decode(real_history_latents, vae).cpu(), overlapped_frames)
             if not high_vram:
                 unload_complete_models()
     return
 # 20250506 pftq: Modified worker to accept video input and clean frame count
 @torch.no_grad()
 def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
     def encode_prompt(prompt, n_prompt):
 def get_duration(input_image, image_position, prompt, generation_mode, n_prompt, randomize_seed, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf):
     return total_second_length * 60 * (0.9 if use_teacache else 1.5) * (1 + ((steps - 25) / 100))
+# Remove this decorator if you run on local
 @spaces.GPU(duration=get_duration)
 def process(input_image,
             image_position=0,
         input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
         print("No input image provided. Using a blank white image.")
+    yield gr.update(label="Previewed Frames"), None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
     stream = AsyncStream()
         if flag == 'file':
             output_filename = data
+            yield gr.update(value=output_filename, label="Previewed Frames"), gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True), gr.update()
         if flag == 'progress':
             preview, desc, html = data
+            yield gr.update(label="Previewed Frames"), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True), gr.update()
         if flag == 'end':
             end = time.time()
             secondes = secondes - (minutes * 60)
             hours = math.floor(minutes / 60)
             minutes = minutes - (hours * 60)
+            yield gr.update(value=output_filename, label="Finished Frames"), gr.update(visible=False), gr.update(), "The process has lasted " + \
             ((str(hours) + " h, ") if hours != 0 else "") + \
             ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
             str(secondes) + " sec. " + \
 def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
     return total_second_length * 60 * (1.5 if use_teacache else 2.5) * (1 + ((steps - 25) / 100))
+# Remove this decorator if you run on local
 @spaces.GPU(duration=get_duration_video)
 def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
     start = time.time()
     # 20250506 pftq: Updated assertion for video input
     assert input_video is not None, 'No input video!'
+    yield gr.update(label="Previewed Frames"), None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
     # 20250507 pftq: Even the H100 needs offloading if the video dimensions are 720p or higher
     if high_vram and (no_resize or resolution>640):
         if flag == 'file':
             output_filename = data
+            yield gr.update(value=output_filename, label="Previewed Frames"), gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True), gr.update()
         if flag == 'progress':
             preview, desc, html = data
+            yield gr.update(label="Previewed Frames"), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True), gr.update() # 20250506 pftq: Keep refreshing the video in case it got hidden when the tab was in the background
         if flag == 'end':
             end = time.time()
             secondes = secondes - (minutes * 60)
             hours = math.floor(minutes / 60)
             minutes = minutes - (hours * 60)
+            yield gr.update(value=output_filename, label="Finished Frames"), gr.update(visible=False), desc + \
             " The process has lasted " + \
             ((str(hours) + " h, ") if hours != 0 else "") + \
             ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
                 n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
                 latent_window_size = gr.Slider(label="Latent Window Size", minimum=1, maximum=33, value=9, step=1, info='Generate more frames at a time (larger chunks). Less degradation and better blending but higher VRAM cost. Should not change.')
+                steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=30, step=1, info='Increase for more quality, especially if using high non-distilled CFG. If your animation has very few motion, you may have brutal brightness change; this can be fixed increasing the steps.')
                 with gr.Row():
                     no_resize = gr.Checkbox(label='Force Original Video Resolution (no Resizing)', value=False, info='Might run out of VRAM (720p requires > 24GB VRAM).')
         with gr.Column():
             warning = gr.HTML(value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
+            result_video = gr.Video(label="Generated Frames", autoplay=True, show_share_button=False, height=512, loop=True)
             preview_image = gr.Image(label="Next Latents", height=200, visible=False)
             progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
             progress_bar = gr.HTML('', elem_classes='no-generating-animation')
                     672, # resolution
                     1, # total_second_length
                     9, # latent_window_size
+                    30, # steps
                     1.0, # cfg
                     10.0, # gs
                     0.0, # rs
                 [
                     "./img_examples/Example2.webp", # input_image
                     0, # image_position
+                    "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks, the man stops talking and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens",
                     "image", # generation_mode
                     "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
                     True, # randomize_seed
                     672, # resolution
                     2, # total_second_length
                     9, # latent_window_size
+                    30, # steps
                     1.0, # cfg
                     10.0, # gs
                     0.0, # rs
                 [
                     "./img_examples/Example2.webp", # input_image
                     0, # image_position
+                    "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks, the woman stops talking and the woman listens A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens",
                     "image", # generation_mode
                     "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
                     True, # randomize_seed
                     672, # resolution
                     2, # total_second_length
                     9, # latent_window_size
+                    30, # steps
                     1.0, # cfg
                     10.0, # gs
                     0.0, # rs
                     672, # resolution
                     1, # total_second_length
                     9, # latent_window_size
+                    30, # steps
                     1.0, # cfg
                     10.0, # gs
                     0.0, # rs
                     672, # resolution
                     1, # total_second_length
                     9, # latent_window_size
+                    30, # steps
                     1.0, # cfg
                     10.0, # gs
                     0.0, # rs
                     672, # resolution
                     1, # total_second_length
                     9, # latent_window_size
+                    30, # steps
                     1.0, # cfg
                     10.0, # gs
                     0.0, # rs
     timeless_prompt.change(fn=handle_timeless_prompt_change, inputs=[timeless_prompt], outputs=[final_prompt])
     start_button.click(fn = check_parameters, inputs = [
         generation_mode, input_image, input_video
+    ], outputs = [end_button, warning], queue = False, show_progress = False).success(fn=process, inputs=ips, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, end_button, warning], scroll_to_output = True)
     start_button_video.click(fn = check_parameters, inputs = [
         generation_mode, input_image, input_video
+    ], outputs = [end_button, warning], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button, warning], scroll_to_output = True)
     end_button.click(fn=end_process)
     generation_mode.change(fn = save_preferences, inputs = [