FramePack

Running

App Files Files Community

Fabrice-TIERCELIN commited on Jun 4

Commit

bd6b924

verified ·

1 Parent(s): 5e2a644

Timed prompts

Browse files

Files changed (1) hide show

app.py +51 -2

app.py CHANGED Viewed

@@ -778,12 +778,24 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
                 total_generated_latent_frames += int(generated_latents.shape[2])
                 history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)
                 if not high_vram:
                     offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
                     load_model_as_complete(vae, target_device=gpu)
                 real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]
                 if history_pixels is None:
                     history_pixels = vae_decode(real_history_latents, vae).cpu()
@@ -798,9 +810,17 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
                   current_pixels = vae_decode(real_history_latents[:, :, -section_latent_frames:], vae).cpu()
                   history_pixels = soft_append_bcthw(history_pixels, current_pixels, overlapped_frames)
                 if not high_vram:
                     unload_complete_models()
                 output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
@@ -810,6 +830,10 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
                 # 20250508 pftq: Save prompt to mp4 metadata comments
                 set_mp4_comments_imageio_ffmpeg(output_filename, f"Prompt: {prompt} | Negative Prompt: {n_prompt}");
                 print(f"Prompt saved to mp4 metadata comments: {output_filename}")
                 # 20250506 pftq: Clean up previous partial files
                 if previous_video is not None and os.path.exists(previous_video):
@@ -819,13 +843,17 @@ def worker_video(input_video, prompt, n_prompt, seed, batch, resolution, total_s
                     except Exception as e:
                         print(f"Error deleting previous partial video {previous_video}: {e}")
                 previous_video = output_filename
                 print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}')
                 stream.output_queue.push(('file', output_filename))
                 end = time.time()
                 secondes = int(end - start)
-                print("££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
             seed = (seed + 1) % np.iinfo(np.int32).max
@@ -908,6 +936,15 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
 def end_process():
     stream.input_queue.push('end')
 css = make_progress_bar_css()
 block = gr.Blocks(css=css).queue()
@@ -933,7 +970,19 @@ adapted from the official code repo [FramePack](https://github.com/lllyasviel/Fr
             text_to_video_hint = gr.HTML("I discourage to use the Text-to-Video feature. You should rather generate an image with Flux and use Image-to-Video. You will save time.", visible=False)
             input_image = gr.Image(sources='upload', type="numpy", label="Image", height=320)
             input_video = gr.Video(sources='upload', label="Input Video", height=320, visible=False)
-            prompt = gr.Textbox(label="Prompt", value='')
             total_second_length = gr.Slider(label="Video Length to Generate (seconds)", minimum=1, maximum=120, value=2, step=0.1)
             with gr.Row():

                 total_generated_latent_frames += int(generated_latents.shape[2])
                 history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)
+                end = time.time()
+                secondes = int(end - start)
+                print("1 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
+                start = time.time()
                 if not high_vram:
                     offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
                     load_model_as_complete(vae, target_device=gpu)
+                end = time.time()
+                secondes = int(end - start)
+                print("2 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
+                start = time.time()
                 real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]
+                end = time.time()
+                secondes = int(end - start)
+                print("3 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
+                start = time.time()
                 if history_pixels is None:
                     history_pixels = vae_decode(real_history_latents, vae).cpu()
                   current_pixels = vae_decode(real_history_latents[:, :, -section_latent_frames:], vae).cpu()
                   history_pixels = soft_append_bcthw(history_pixels, current_pixels, overlapped_frames)
+                end = time.time()
+                secondes = int(end - start)
+                print("4 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
+                start = time.time()
                 if not high_vram:
                     unload_complete_models()
+                end = time.time()
+                secondes = int(end - start)
+                print("5 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
+                start = time.time()
                 output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
                 # 20250508 pftq: Save prompt to mp4 metadata comments
                 set_mp4_comments_imageio_ffmpeg(output_filename, f"Prompt: {prompt} | Negative Prompt: {n_prompt}");
                 print(f"Prompt saved to mp4 metadata comments: {output_filename}")
+                end = time.time()
+                secondes = int(end - start)
+                print("6 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
+                start = time.time()
                 # 20250506 pftq: Clean up previous partial files
                 if previous_video is not None and os.path.exists(previous_video):
                     except Exception as e:
                         print(f"Error deleting previous partial video {previous_video}: {e}")
                 previous_video = output_filename
+                end = time.time()
+                secondes = int(end - start)
+                print("7 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
+                start = time.time()
                 print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}')
                 stream.output_queue.push(('file', output_filename))
                 end = time.time()
                 secondes = int(end - start)
+                print("8 ££££££££££££££££££££££££££££££££££££££££ " + str(secondes))
             seed = (seed + 1) % np.iinfo(np.int32).max
 def end_process():
     stream.input_queue.push('end')
+timed_prompts = {}
+def handle_prompt_number_change():
+    timed_prompts = {}
+    return []
+def handle_generation_mode_change(timed_prompt_id, timed_prompt):
+    timed_prompts[timed_prompt_id] = timed_prompt
+    return ";".join(list(timed_prompts.values()))
 css = make_progress_bar_css()
 block = gr.Blocks(css=css).queue()
             text_to_video_hint = gr.HTML("I discourage to use the Text-to-Video feature. You should rather generate an image with Flux and use Image-to-Video. You will save time.", visible=False)
             input_image = gr.Image(sources='upload', type="numpy", label="Image", height=320)
             input_video = gr.Video(sources='upload', label="Input Video", height=320, visible=False)
+            prompt = gr.Textbox(label="Prompt", value='', info='Use ; to separate in time', placeholder="The creature starts to move, fast motion, fixed camera")
+            prompt_number = gr.Slider(label="Timed prompt number", minimum=0, maximum=1000, value=0, step=1, info='Not for video extension')
+            prompt_number.change(fn=handle_prompt_number_change, inputs=[], outputs=[])
+            @gr.render(inputs=prompt_number)
+            def show_split(prompt_number):
+                timed_prompts = {}
+                for digit in range(prompt_number):
+                    timed_prompt_id = gr.Textbox(value="timed_prompt_" + str(digit), visible=False)
+                    timed_prompt = gr.Textbox(label="Timed prompt #" + str(digit + 1), elem_id="timed_prompt_" + str(digit), value="")
+                    timed_prompt.change(fn=handle_generation_mode_change, inputs=[timed_prompt_id, timed_prompt], outputs=[prompt])
             total_second_length = gr.Slider(label="Video Length to Generate (seconds)", minimum=1, maximum=120, value=2, step=0.1)
             with gr.Row():