FramePack

Running

App Files Files Community

Fabrice-TIERCELIN commited on 19 days ago

Commit

7df9b79

verified ·

1 Parent(s): 9b03991

Optimize GPU time

Browse files

Files changed (1) hide show

app.py +119 -47

app.py CHANGED Viewed

@@ -809,17 +809,16 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
     stream.output_queue.push(('end', None))
     return
-def get_duration(input_image, image_position, prompt, generation_mode, n_prompt, randomize_seed, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf):
     return total_second_length * 60 * (0.9 if use_teacache else 1.5) * (1 + ((steps - 25) / 100))
 # Remove this decorator if you run on local
 @spaces.GPU(duration=get_duration)
-def process(input_image,
             image_position=0,
-            prompt="",
             generation_mode="image",
             n_prompt="",
-            randomize_seed=True,
             seed=31337,
             resolution=640,
             total_second_length=5,
@@ -835,25 +834,6 @@ def process(input_image,
            ):
     start = time.time()
     global stream
-    if torch.cuda.device_count() == 0:
-        gr.Warning('Set this space to GPU config to make it work.')
-        yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible = False)
-        return
-    if randomize_seed:
-        seed = random.randint(0, np.iinfo(np.int32).max)
-    prompts = prompt.split(";")
-    # assert input_image is not None, 'No input image!'
-    if generation_mode == "text":
-        default_height, default_width = 640, 640
-        input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
-        print("No input image provided. Using a blank white image.")
-    yield gr.update(label="Previewed Frames"), None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
     stream = AsyncStream()
     async_run(worker, input_image, image_position, prompts, n_prompt, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf)
@@ -885,14 +865,25 @@ def process(input_image,
             "You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character. If you do not see the generated video above, the process may have failed. See the logs for more information. If you see an error like ''NVML_SUCCESS == r INTERNAL ASSERT FAILED'', you probably haven't enough VRAM. Test an example or other options to compare. You can share your inputs to the original space or set your space in public for a peer review.", gr.update(interactive=True), gr.update(interactive=False), gr.update(visible = False)
             break
-def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
-    return total_second_length * 60 * (1.5 if use_teacache else 2.5) * (1 + ((steps - 25) / 100))
-# Remove this decorator if you run on local
-@spaces.GPU(duration=get_duration_video)
-def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
-    start = time.time()
-    global stream, high_vram
     if torch.cuda.device_count() == 0:
         gr.Warning('Set this space to GPU config to make it work.')
@@ -904,24 +895,41 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
     prompts = prompt.split(";")
-    # 20250506 pftq: Updated assertion for video input
-    assert input_video is not None, 'No input video!'
     yield gr.update(label="Previewed Frames"), None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
-    # 20250507 pftq: Even the H100 needs offloading if the video dimensions are 720p or higher
-    if high_vram and (no_resize or resolution>640):
-        print("Disabling high vram mode due to no resize and/or potentially higher resolution...")
-        high_vram = False
-        vae.enable_slicing()
-        vae.enable_tiling()
-        DynamicSwapInstaller.install_model(transformer, device=gpu)
-        DynamicSwapInstaller.install_model(text_encoder, device=gpu)
-    # 20250508 pftq: automatically set distilled cfg to 1 if cfg is used
-    if cfg > 1:
-        gs = 1
     stream = AsyncStream()
     # 20250506 pftq: Pass num_clean_frames, vae_batch, etc
@@ -955,6 +963,39 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
             " You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character. If you do not see the generated video above, the process may have failed. See the logs for more information. If you see an error like ''NVML_SUCCESS == r INTERNAL ASSERT FAILED'', you probably haven't enough VRAM. Test an example or other options to compare. You can share your inputs to the original space or set your space in public for a peer review.", '', gr.update(interactive=True), gr.update(interactive=False), gr.update(visible = False)
             break
 def end_process():
     stream.input_queue.push('end')
@@ -1038,7 +1079,7 @@ with block:
                     timed_prompt = gr.Textbox(label="Timed prompt #" + str(digit + 1), elem_id="timed_prompt_" + str(digit), value="")
                     timed_prompt.change(fn=handle_timed_prompt_change, inputs=[timed_prompt_id, timed_prompt], outputs=[final_prompt])
-            final_prompt = gr.Textbox(label="Final prompt", value='', info='Use ; to separate in time')
             prompt_hint = gr.HTML("Video extension barely follows the prompt; to force to follow the prompt, you have to set the Distilled CFG Scale to 3.0 and the Context Frames to 2 but the video quality will be poor.")
             total_second_length = gr.Slider(label="Video Length to Generate (seconds)", minimum=1, maximum=120, value=2, step=0.1)
@@ -1054,7 +1095,7 @@ with block:
                 n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
                 latent_window_size = gr.Slider(label="Latent Window Size", minimum=1, maximum=33, value=9, step=1, info='Generate more frames at a time (larger chunks). Less degradation and better blending but higher VRAM cost. Should not change.')
-                steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=30, step=1, info='Increase for more quality, especially if using high non-distilled CFG. If your animation has very few motion, you may have brutal brightness change; this can be fixed increasing the steps.')
                 with gr.Row():
                     no_resize = gr.Checkbox(label='Force Original Video Resolution (no Resizing)', value=False, info='Might run out of VRAM (720p requires > 24GB VRAM).')
@@ -1105,6 +1146,37 @@ with block:
     ips = [input_image, image_position, final_prompt, generation_mode, n_prompt, randomize_seed, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf]
     ips_video = [input_video, final_prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch]
     gr.Examples(
         label = "Examples from image",
         examples = [

     stream.output_queue.push(('end', None))
     return
+def get_duration(input_image, image_position, prompts, generation_mode, n_prompt, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf):
     return total_second_length * 60 * (0.9 if use_teacache else 1.5) * (1 + ((steps - 25) / 100))
 # Remove this decorator if you run on local
 @spaces.GPU(duration=get_duration)
+def process_on_gpu(input_image,
             image_position=0,
+            prompts=[""],
             generation_mode="image",
             n_prompt="",
             seed=31337,
             resolution=640,
             total_second_length=5,
            ):
     start = time.time()
     global stream
     stream = AsyncStream()
     async_run(worker, input_image, image_position, prompts, n_prompt, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf)
             "You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character. If you do not see the generated video above, the process may have failed. See the logs for more information. If you see an error like ''NVML_SUCCESS == r INTERNAL ASSERT FAILED'', you probably haven't enough VRAM. Test an example or other options to compare. You can share your inputs to the original space or set your space in public for a peer review.", gr.update(interactive=True), gr.update(interactive=False), gr.update(visible = False)
             break
+def process(input_image,
+            image_position=0,
+            prompt="",
+            generation_mode="image",
+            n_prompt="",
+            randomize_seed=True,
+            seed=31337,
+            resolution=640,
+            total_second_length=5,
+            latent_window_size=9,
+            steps=25,
+            cfg=1.0,
+            gs=10.0,
+            rs=0.0,
+            gpu_memory_preservation=6,
+            enable_preview=True,
+            use_teacache=False,
+            mp4_crf=16
+           ):
     if torch.cuda.device_count() == 0:
         gr.Warning('Set this space to GPU config to make it work.')
     prompts = prompt.split(";")
+    # assert input_image is not None, 'No input image!'
+    if generation_mode == "text":
+        default_height, default_width = 640, 640
+        input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
+        print("No input image provided. Using a blank white image.")
     yield gr.update(label="Previewed Frames"), None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
+    yield from process_on_gpu(input_image,
+            image_position,
+            prompts,
+            generation_mode,
+            n_prompt,
+            seed,
+            resolution,
+            total_second_length,
+            latent_window_size,
+            steps,
+            cfg,
+            gs,
+            rs,
+            gpu_memory_preservation,
+            enable_preview,
+            use_teacache,
+            mp4_crf
+           )
+def get_duration_video(input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
+    return total_second_length * 60 * (1.5 if use_teacache else 2.5) * (1 + ((steps - 25) / 100))
+# Remove this decorator if you run on local
+@spaces.GPU(duration=get_duration_video)
+def process_video_on_gpu(input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
+    start = time.time()
+    global stream
     stream = AsyncStream()
     # 20250506 pftq: Pass num_clean_frames, vae_batch, etc
             " You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character. If you do not see the generated video above, the process may have failed. See the logs for more information. If you see an error like ''NVML_SUCCESS == r INTERNAL ASSERT FAILED'', you probably haven't enough VRAM. Test an example or other options to compare. You can share your inputs to the original space or set your space in public for a peer review.", '', gr.update(interactive=True), gr.update(interactive=False), gr.update(visible = False)
             break
+def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
+    global high_vram
+    if torch.cuda.device_count() == 0:
+        gr.Warning('Set this space to GPU config to make it work.')
+        yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible = False)
+        return
+    if randomize_seed:
+        seed = random.randint(0, np.iinfo(np.int32).max)
+    prompts = prompt.split(";")
+    # 20250506 pftq: Updated assertion for video input
+    assert input_video is not None, 'No input video!'
+    yield gr.update(label="Previewed Frames"), None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
+    # 20250507 pftq: Even the H100 needs offloading if the video dimensions are 720p or higher
+    if high_vram and (no_resize or resolution>640):
+        print("Disabling high vram mode due to no resize and/or potentially higher resolution...")
+        high_vram = False
+        vae.enable_slicing()
+        vae.enable_tiling()
+        DynamicSwapInstaller.install_model(transformer, device=gpu)
+        DynamicSwapInstaller.install_model(text_encoder, device=gpu)
+    # 20250508 pftq: automatically set distilled cfg to 1 if cfg is used
+    if cfg > 1:
+        gs = 1
+    yield from process_video_on_gpu(input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch)
 def end_process():
     stream.input_queue.push('end')
                     timed_prompt = gr.Textbox(label="Timed prompt #" + str(digit + 1), elem_id="timed_prompt_" + str(digit), value="")
                     timed_prompt.change(fn=handle_timed_prompt_change, inputs=[timed_prompt_id, timed_prompt], outputs=[final_prompt])
+            final_prompt = gr.Textbox(label="Final prompt", value='', info='Use ; to separate in time; beware to write to stop the previous action')
             prompt_hint = gr.HTML("Video extension barely follows the prompt; to force to follow the prompt, you have to set the Distilled CFG Scale to 3.0 and the Context Frames to 2 but the video quality will be poor.")
             total_second_length = gr.Slider(label="Video Length to Generate (seconds)", minimum=1, maximum=120, value=2, step=0.1)
                 n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
                 latent_window_size = gr.Slider(label="Latent Window Size", minimum=1, maximum=33, value=9, step=1, info='Generate more frames at a time (larger chunks). Less degradation and better blending but higher VRAM cost. Should not change.')
+                steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1, info='Increase for more quality, especially if using high non-distilled CFG. If your animation has very few motion, you may have brutal brightness change; this can be fixed increasing the steps.')
                 with gr.Row():
                     no_resize = gr.Checkbox(label='Force Original Video Resolution (no Resizing)', value=False, info='Might run out of VRAM (720p requires > 24GB VRAM).')
     ips = [input_image, image_position, final_prompt, generation_mode, n_prompt, randomize_seed, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf]
     ips_video = [input_video, final_prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch]
+    gr.Examples(
+        label = "Examples from text",
+        examples = [
+                [
+                    None, # input_image
+                    0, # image_position
+                    "Overcrowed street in Japan, photorealistic, realistic, intricate details, 8k, insanely detailed",
+                    "text", # generation_mode
+                    "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
+                    True, # randomize_seed
+                    42, # seed
+                    672, # resolution
+                    1, # total_second_length
+                    9, # latent_window_size
+                    30, # steps
+                    1.0, # cfg
+                    10.0, # gs
+                    0.0, # rs
+                    6, # gpu_memory_preservation
+                    False, # enable_preview
+                    False, # use_teacache
+                    16 # mp4_crf
+                ]
+            ],
+        run_on_click = True,
+        fn = process,
+	    inputs = ips,
+        outputs = [result_video, preview_image, progress_desc, progress_bar, start_button, end_button],
+        cache_examples = False,
+    )
     gr.Examples(
         label = "Examples from image",
         examples = [