Fabrice-TIERCELIN commited on
Commit
ce1c404
·
verified ·
1 Parent(s): 27a6551

Display resolution and save preferences

Browse files
Files changed (1) hide show
  1. app.py +290 -127
app.py CHANGED
@@ -108,12 +108,9 @@ stream = AsyncStream()
108
  outputs_folder = './outputs/'
109
  os.makedirs(outputs_folder, exist_ok=True)
110
 
111
- def check_parameters(generation_mode, input_image, input_video):
112
- if generation_mode == "image" and input_image is None:
113
- raise gr.Error("Please provide an image to extend.")
114
- if generation_mode == "video" and input_video is None:
115
- raise gr.Error("Please provide a video to extend.")
116
- return [gr.update(interactive=True)]
117
 
118
  @spaces.GPU()
119
  @torch.no_grad()
@@ -306,7 +303,7 @@ def set_mp4_comments_imageio_ffmpeg(input_file, comments):
306
  return False
307
 
308
  @torch.no_grad()
309
- def worker(input_image, prompts, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf):
310
  def encode_prompt(prompt, n_prompt):
311
  llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
312
 
@@ -356,7 +353,7 @@ def worker(input_image, prompts, n_prompt, seed, total_second_length, latent_win
356
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Image processing ...'))))
357
 
358
  H, W, C = input_image.shape
359
- height, width = find_nearest_bucket(H, W, resolution=640)
360
  input_image_np = resize_and_center_crop(input_image, target_width=width, target_height=height)
361
 
362
  Image.fromarray(input_image_np).save(os.path.join(outputs_folder, f'{job_id}.png'))
@@ -399,23 +396,27 @@ def worker(input_image, prompts, n_prompt, seed, total_second_length, latent_win
399
  history_latents = torch.cat([history_latents, start_latent.to(history_latents)], dim=2)
400
  total_generated_latent_frames = 1
401
 
402
- def callback(d):
403
- preview = d['denoised']
404
- preview = vae_decode_fake(preview)
405
-
406
- preview = (preview * 255.0).detach().cpu().numpy().clip(0, 255).astype(np.uint8)
407
- preview = einops.rearrange(preview, 'b c t h w -> (b h) (t w) c')
408
-
409
- if stream.input_queue.top() == 'end':
410
- stream.output_queue.push(('end', None))
411
- raise KeyboardInterrupt('User ends the task.')
412
-
413
- current_step = d['i'] + 1
414
- percentage = int(100.0 * current_step / steps)
415
- hint = f'Sampling {current_step}/{steps}'
416
- desc = f'Total generated frames: {int(max(0, total_generated_latent_frames * 4 - 3))}, Video length: {max(0, (total_generated_latent_frames * 4 - 3) / 30) :.2f} seconds (FPS-30). The video is being extended now ...'
417
- stream.output_queue.push(('progress', (preview, desc, make_progress_bar_html(percentage, hint))))
418
- return
 
 
 
 
419
 
420
  indices = torch.arange(0, sum([1, 16, 2, 1, latent_window_size])).unsqueeze(0)
421
  clean_latent_indices_start, clean_latent_4x_indices, clean_latent_2x_indices, clean_latent_1x_indices, latent_indices = indices.split([1, 16, 2, 1, latent_window_size], dim=1)
@@ -495,13 +496,14 @@ def worker(input_image, prompts, n_prompt, seed, total_second_length, latent_win
495
  if not high_vram:
496
  unload_complete_models()
497
 
498
- output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
 
499
 
500
- save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=mp4_crf)
501
 
502
- print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}')
503
 
504
- stream.output_queue.push(('file', output_filename))
505
  except:
506
  traceback.print_exc()
507
 
@@ -513,8 +515,8 @@ def worker(input_image, prompts, n_prompt, seed, total_second_length, latent_win
513
  stream.output_queue.push(('end', None))
514
  return
515
 
516
- def get_duration(input_image, prompt, generation_mode, n_prompt, randomize_seed, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf):
517
- return total_second_length * 60 * (0.7 if use_teacache else 1.3)
518
 
519
 
520
  @spaces.GPU(duration=get_duration)
@@ -523,6 +525,7 @@ def process(input_image, prompt,
523
  n_prompt="",
524
  randomize_seed=True,
525
  seed=31337,
 
526
  total_second_length=5,
527
  latent_window_size=9,
528
  steps=25,
@@ -530,14 +533,16 @@ def process(input_image, prompt,
530
  gs=10.0,
531
  rs=0.0,
532
  gpu_memory_preservation=6,
 
533
  use_teacache=False,
534
  mp4_crf=16
535
  ):
536
- global stream
537
 
538
  if torch.cuda.device_count() == 0:
539
  gr.Warning('Set this space to GPU config to make it work.')
540
- return None, None, None, None, None, None
 
541
 
542
  if randomize_seed:
543
  seed = random.randint(0, np.iinfo(np.int32).max)
@@ -554,7 +559,7 @@ def process(input_image, prompt,
554
 
555
  stream = AsyncStream()
556
 
557
- async_run(worker, input_image, prompts, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf)
558
 
559
  output_filename = None
560
 
@@ -570,12 +575,13 @@ def process(input_image, prompt,
570
  yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
571
 
572
  if flag == 'end':
573
- return output_filename, gr.update(visible=False), gr.update(), '', gr.update(interactive=True), gr.update(interactive=False)
 
574
 
575
  # 20250506 pftq: Modified worker to accept video input and clean frame count
576
  @spaces.GPU()
577
  @torch.no_grad()
578
- def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
579
  def encode_prompt(prompt, n_prompt):
580
  llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
581
 
@@ -618,13 +624,8 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
618
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Video processing ...'))))
619
 
620
  # 20250506 pftq: Encode video
621
- #H, W = 640, 640 # Default resolution, will be adjusted
622
- #height, width = find_nearest_bucket(H, W, resolution=640)
623
- #start_latent, input_image_np, history_latents, fps = video_encode(input_video, vae, height, width, vae_batch_size=16, device=gpu)
624
  start_latent, input_image_np, video_latents, fps, height, width, input_video_pixels = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
625
 
626
- #Image.fromarray(input_image_np).save(os.path.join(outputs_folder, f'{job_id}.png'))
627
-
628
  # CLIP Vision
629
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
630
 
@@ -640,23 +641,27 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
640
  total_latent_sections = (total_second_length * fps) / (latent_window_size * 4)
641
  total_latent_sections = int(max(round(total_latent_sections), 1))
642
 
643
- def callback(d):
644
- preview = d['denoised']
645
- preview = vae_decode_fake(preview)
646
-
647
- preview = (preview * 255.0).detach().cpu().numpy().clip(0, 255).astype(np.uint8)
648
- preview = einops.rearrange(preview, 'b c t h w -> (b h) (t w) c')
649
-
650
- if stream.input_queue.top() == 'end':
651
- stream.output_queue.push(('end', None))
652
- raise KeyboardInterrupt('User ends the task.')
653
-
654
- current_step = d['i'] + 1
655
- percentage = int(100.0 * current_step / steps)
656
- hint = f'Sampling {current_step}/{steps}'
657
- desc = f'Total frames: {int(max(0, total_generated_latent_frames * 4 - 3))}, Video length: {max(0, (total_generated_latent_frames * 4 - 3) / fps) :.2f} seconds (FPS-{fps}), Seed: {seed}, Video {idx+1} of {batch}. The video is generating part {section_index+1} of {total_latent_sections}...'
658
- stream.output_queue.push(('progress', (preview, desc, make_progress_bar_html(percentage, hint))))
659
- return
 
 
 
 
660
 
661
  for idx in range(batch):
662
  if batch > 1:
@@ -677,10 +682,6 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
677
  history_pixels = None
678
  previous_video = None
679
 
680
- # 20250507 pftq: hot fix for initial video being corrupted by vae encoding, issue with ghosting because of slight differences
681
- #history_pixels = input_video_pixels
682
- #save_bcthw_as_mp4(vae_decode(video_latents, vae).cpu(), os.path.join(outputs_folder, f'{job_id}_input_video.mp4'), fps=fps, crf=mp4_crf) # 20250507 pftq: test fast movement corrupted by vae encoding if vae batch size too low
683
-
684
  for section_index in range(total_latent_sections):
685
  if stream.input_queue.top() == 'end':
686
  stream.output_queue.push(('end', None))
@@ -735,12 +736,14 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
735
  clean_latents_4x = splits[split_idx]
736
  split_idx = 1
737
  if clean_latents_4x.shape[2] < 2: # 20250507 pftq: edge case for <=1 sec videos
738
- clean_latents_4x = torch.cat([clean_latents_4x, clean_latents_4x], dim=2)
 
739
 
740
  if num_2x_frames > 0 and split_idx < len(splits):
741
  clean_latents_2x = splits[split_idx]
742
  if clean_latents_2x.shape[2] < 2: # 20250507 pftq: edge case for <=1 sec videos
743
- clean_latents_2x = torch.cat([clean_latents_2x, clean_latents_2x], dim=2)
 
744
  split_idx += 1
745
  elif clean_latents_2x.shape[2] < 2: # 20250507 pftq: edge case for <=1 sec videos
746
  clean_latents_2x = clean_latents_4x
@@ -804,27 +807,28 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
804
  if not high_vram:
805
  unload_complete_models()
806
 
807
- output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
808
-
809
- # 20250506 pftq: Use input video FPS for output
810
- save_bcthw_as_mp4(history_pixels, output_filename, fps=fps, crf=mp4_crf)
811
- print(f"Latest video saved: {output_filename}")
812
- # 20250508 pftq: Save prompt to mp4 metadata comments
813
- set_mp4_comments_imageio_ffmpeg(output_filename, f"Prompt: {prompts} | Negative Prompt: {n_prompt}");
814
- print(f"Prompt saved to mp4 metadata comments: {output_filename}")
815
-
816
- # 20250506 pftq: Clean up previous partial files
817
- if previous_video is not None and os.path.exists(previous_video):
818
- try:
819
- os.remove(previous_video)
820
- print(f"Previous partial video deleted: {previous_video}")
821
- except Exception as e:
822
- print(f"Error deleting previous partial video {previous_video}: {e}")
823
- previous_video = output_filename
824
-
825
- print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}')
826
-
827
- stream.output_queue.push(('file', output_filename))
 
828
 
829
  seed = (seed + 1) % np.iinfo(np.int32).max
830
 
@@ -839,17 +843,18 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
839
  stream.output_queue.push(('end', None))
840
  return
841
 
842
- def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
843
- return total_second_length * 60 * (0.7 if use_teacache else 2)
844
 
845
  # 20250506 pftq: Modified process to pass clean frame count, etc from video_encode
846
  @spaces.GPU(duration=get_duration_video)
847
- def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
848
  global stream, high_vram
849
 
850
  if torch.cuda.device_count() == 0:
851
  gr.Warning('Set this space to GPU config to make it work.')
852
- return None, None, None, None, None, None
 
853
 
854
  if randomize_seed:
855
  seed = random.randint(0, np.iinfo(np.int32).max)
@@ -877,7 +882,7 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
877
  stream = AsyncStream()
878
 
879
  # 20250506 pftq: Pass num_clean_frames, vae_batch, etc
880
- async_run(worker_video, input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch)
881
 
882
  output_filename = None
883
 
@@ -894,7 +899,8 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
894
  yield output_filename, gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True) # 20250506 pftq: Keep refreshing the video in case it got hidden when the tab was in the background
895
 
896
  if flag == 'end':
897
- return output_filename, gr.update(visible=False), desc+' Video complete.', '', gr.update(interactive=True), gr.update(interactive=False)
 
898
 
899
  def end_process():
900
  stream.input_queue.push('end')
@@ -934,8 +940,23 @@ title_html = """
934
  <p>This space is ready to work on ZeroGPU and GPU and has been tested successfully on ZeroGPU. Please leave a <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/FramePack/discussions/new">message in discussion</a> if you encounter issues.</p>
935
  """
936
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
937
  css = make_progress_bar_css()
938
- block = gr.Blocks(css=css).queue()
939
  with block:
940
  if torch.cuda.device_count() == 0:
941
  with gr.Row():
@@ -946,12 +967,13 @@ with block:
946
  </big></big></big></p>
947
  """)
948
  gr.HTML(title_html)
 
949
  with gr.Row():
950
  with gr.Column():
951
- generation_mode = gr.Radio([["Text-to-Video", "text"], ["Image-to-Video", "image"], ["Video Extension", "video"]], label="Generation mode", value = "image")
952
- text_to_video_hint = gr.HTML("I discourage to use the Text-to-Video feature. You should rather generate an image with Flux and use Image-to-Video. You will save time.", visible=False)
953
  input_image = gr.Image(sources='upload', type="numpy", label="Image", height=320)
954
- input_video = gr.Video(sources='upload', label="Input Video", height=320, visible=False)
955
  timeless_prompt = gr.Textbox(label="Timeless prompt", info='Used on the whole duration of the generation', value='', placeholder="The creature starts to move, fast motion, fixed camera, focus motion, consistent arm, consistent position, mute colors, insanely detailed")
956
  prompt_number = gr.Slider(label="Timed prompt number", minimum=0, maximum=1000, value=0, step=1, info='Prompts will automatically appear')
957
 
@@ -967,23 +989,29 @@ with block:
967
 
968
  with gr.Row():
969
  start_button = gr.Button(value="🎥 Generate", variant="primary")
970
- start_button_video = gr.Button(value="🎥 Generate", variant="primary", visible=False)
971
- end_button = gr.Button(value="End Generation", variant="stop", interactive=False, visible=False)
972
 
973
  with gr.Accordion("Advanced settings", open=False):
974
- with gr.Row():
975
- use_teacache = gr.Checkbox(label='Use TeaCache', value=False, info='Faster speed, but often makes hands and fingers slightly worse.')
976
- no_resize = gr.Checkbox(label='Force Original Video Resolution (no Resizing)', value=False, info='Might run out of VRAM (720p requires > 24GB VRAM).', visible=False)
977
 
978
- n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, unrealistic position, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
979
- randomize_seed = gr.Checkbox(label='Randomize seed', value=True, info='If checked, the seed is always different')
980
- seed = gr.Slider(label="Seed", minimum=0, maximum=np.iinfo(np.int32).max, step=1, randomize=True)
981
 
982
  latent_window_size = gr.Slider(label="Latent Window Size", minimum=1, maximum=33, value=9, step=1, info='Generate more frames at a time (larger chunks). Less degradation and better blending but higher VRAM cost. Should not change.')
983
  steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1, info='Increase for more quality, especially if using high non-distilled CFG. Changing this value is not recommended.')
984
- batch = gr.Slider(label="Batch Size (Number of Videos)", minimum=1, maximum=1000, value=1, step=1, info='Generate multiple videos each with a different seed.', visible=False)
985
 
986
- resolution = gr.Number(label="Resolution (max width or height)", value=640, precision=0, visible=False)
 
 
 
 
 
 
 
 
 
 
987
 
988
  # 20250506 pftq: Reduced default distilled guidance scale to improve adherence to input video
989
  cfg = gr.Slider(label="CFG Scale", minimum=1.0, maximum=32.0, value=1.0, step=0.01, info='Use this instead of Distilled for more detail/control + Negative Prompt (make sure Distilled set to 1). Doubles render time. Should not change.')
@@ -992,7 +1020,7 @@ with block:
992
 
993
 
994
  # 20250506 pftq: Renamed slider to Number of Context Frames and updated description
995
- num_clean_frames = gr.Slider(label="Number of Context Frames", minimum=2, maximum=10, value=5, step=1, info="Retain more video details but increase memory use. Reduce to 2 to avoid memory issues or to give more weight to the prompt.", visible=False)
996
 
997
  default_vae = 32
998
  if high_vram:
@@ -1000,12 +1028,16 @@ with block:
1000
  elif free_mem_gb>=20:
1001
  default_vae = 64
1002
 
1003
- vae_batch = gr.Slider(label="VAE Batch Size for Input Video", minimum=4, maximum=256, value=default_vae, step=4, info="Reduce if running out of memory. Increase for better quality frames during fast motion.", visible=False)
1004
 
1005
 
1006
  gpu_memory_preservation = gr.Slider(label="GPU Inference Preserved Memory (GB) (larger means slower)", minimum=6, maximum=128, value=6, step=0.1, info="Set this number to a larger value if you encounter OOM. Larger value causes slower speed.")
1007
 
1008
  mp4_crf = gr.Slider(label="MP4 Compression", minimum=0, maximum=100, value=16, step=1, info="Lower means better quality. 0 is uncompressed. Change to 16 if you get black outputs. ")
 
 
 
 
1009
 
1010
  with gr.Column():
1011
  preview_image = gr.Image(label="Next Latents", height=200, visible=False)
@@ -1014,8 +1046,28 @@ with block:
1014
  progress_bar = gr.HTML('', elem_classes='no-generating-animation')
1015
 
1016
  # 20250506 pftq: Updated inputs to include num_clean_frames
1017
- ips = [input_image, final_prompt, generation_mode, n_prompt, randomize_seed, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf]
1018
- ips_video = [input_video, final_prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1019
 
1020
  prompt_number.change(fn=handle_prompt_number_change, inputs=[], outputs=[])
1021
  timeless_prompt.change(fn=handle_timeless_prompt_change, inputs=[timeless_prompt], outputs=[final_prompt])
@@ -1027,32 +1079,127 @@ with block:
1027
  ], outputs = [end_button], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button])
1028
  end_button.click(fn=end_process)
1029
 
1030
- gr.Examples(
 
 
 
 
 
 
 
 
1031
  examples = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1032
  [
1033
  "./img_examples/Example1.png", # input_image
1034
  "View of the sea as far as the eye can see, from the seaside, a piece of land is barely visible on the horizon at the middle, the sky is radiant, reflections of the sun in the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
1035
  "image", # generation_mode
1036
- "Missing arm, unrealistic position, blurred, blurry", # n_prompt
1037
  True, # randomize_seed
1038
  42, # seed
 
1039
  1, # total_second_length
1040
  9, # latent_window_size
1041
- 25, # steps
1042
  1.0, # cfg
1043
  10.0, # gs
1044
  0.0, # rs
1045
  6, # gpu_memory_preservation
 
1046
  False, # use_teacache
1047
  16 # mp4_crf
1048
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1049
  [
1050
  "./img_examples/Example1.png", # input_image
1051
  "A dolphin emerges from the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
1052
  "image", # generation_mode
1053
- "Missing arm, unrealistic position, blurred, blurry", # n_prompt
1054
  True, # randomize_seed
1055
  42, # seed
 
1056
  1, # total_second_length
1057
  9, # latent_window_size
1058
  25, # steps
@@ -1060,7 +1207,8 @@ with block:
1060
  10.0, # gs
1061
  0.0, # rs
1062
  6, # gpu_memory_preservation
1063
- True, # use_teacache
 
1064
  16 # mp4_crf
1065
  ]
1066
  ],
@@ -1068,7 +1216,7 @@ with block:
1068
  fn = process,
1069
  inputs = ips,
1070
  outputs = [result_video, preview_image, progress_desc, progress_bar, start_button, end_button],
1071
- cache_examples = torch.cuda.device_count() > 0,
1072
  )
1073
 
1074
  gr.Examples(
@@ -1080,7 +1228,7 @@ with block:
1080
  True, # randomize_seed
1081
  42, # seed
1082
  1, # batch
1083
- 640, # resolution
1084
  1, # total_second_length
1085
  9, # latent_window_size
1086
  25, # steps
@@ -1088,37 +1236,52 @@ with block:
1088
  10.0, # gs
1089
  0.0, # rs
1090
  6, # gpu_memory_preservation
 
1091
  False, # use_teacache
1092
  False, # no_resize
1093
  16, # mp4_crf
1094
  5, # num_clean_frames
1095
  default_vae
1096
- ],
1097
  ],
1098
  run_on_click = True,
1099
  fn = process_video,
1100
  inputs = ips_video,
1101
  outputs = [result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button],
1102
- cache_examples = torch.cuda.device_count() > 0,
1103
  )
1104
-
1105
- gr.Markdown('''
1106
- # Guide
1107
- To make all your generated scenes consistent, you can then apply a face swap on the main character.
1108
- ''')
1109
 
1110
  def handle_generation_mode_change(generation_mode_data):
1111
  if generation_mode_data == "text":
1112
- return [gr.update(visible = True), gr.update(visible = False), gr.update(visible = False), gr.update(visible = True), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)]
1113
  elif generation_mode_data == "image":
1114
- return [gr.update(visible = False), gr.update(visible = True), gr.update(visible = False), gr.update(visible = True), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)]
1115
  elif generation_mode_data == "video":
1116
- return [gr.update(visible = False), gr.update(visible = False), gr.update(visible = True), gr.update(visible = False), gr.update(visible = True), gr.update(visible = True), gr.update(visible = True), gr.update(visible = True), gr.update(visible = True), gr.update(visible = True)]
1117
 
 
1118
  generation_mode.change(
1119
  fn=handle_generation_mode_change,
1120
  inputs=[generation_mode],
1121
- outputs=[text_to_video_hint, input_image, input_video, start_button, start_button_video, no_resize, batch, resolution, num_clean_frames, vae_batch]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1122
  )
1123
 
1124
  block.launch(mcp_server=True, ssr_mode=False)
 
108
  outputs_folder = './outputs/'
109
  os.makedirs(outputs_folder, exist_ok=True)
110
 
111
+ default_local_storage = {
112
+ "generation-mode": "image",
113
+ }
 
 
 
114
 
115
  @spaces.GPU()
116
  @torch.no_grad()
 
303
  return False
304
 
305
  @torch.no_grad()
306
+ def worker(input_image, prompts, n_prompt, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf):
307
  def encode_prompt(prompt, n_prompt):
308
  llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
309
 
 
353
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Image processing ...'))))
354
 
355
  H, W, C = input_image.shape
356
+ height, width = find_nearest_bucket(H, W, resolution=resolution)
357
  input_image_np = resize_and_center_crop(input_image, target_width=width, target_height=height)
358
 
359
  Image.fromarray(input_image_np).save(os.path.join(outputs_folder, f'{job_id}.png'))
 
396
  history_latents = torch.cat([history_latents, start_latent.to(history_latents)], dim=2)
397
  total_generated_latent_frames = 1
398
 
399
+ if enable_preview:
400
+ def callback(d):
401
+ preview = d['denoised']
402
+ preview = vae_decode_fake(preview)
403
+
404
+ preview = (preview * 255.0).detach().cpu().numpy().clip(0, 255).astype(np.uint8)
405
+ preview = einops.rearrange(preview, 'b c t h w -> (b h) (t w) c')
406
+
407
+ if stream.input_queue.top() == 'end':
408
+ stream.output_queue.push(('end', None))
409
+ raise KeyboardInterrupt('User ends the task.')
410
+
411
+ current_step = d['i'] + 1
412
+ percentage = int(100.0 * current_step / steps)
413
+ hint = f'Sampling {current_step}/{steps}'
414
+ desc = f'Total generated frames: {int(max(0, total_generated_latent_frames * 4 - 3))}, Video length: {max(0, (total_generated_latent_frames * 4 - 3) / 30) :.2f} seconds (FPS-30), Resolution: {height}px * {width}px. The video is being extended now ...'
415
+ stream.output_queue.push(('progress', (preview, desc, make_progress_bar_html(percentage, hint))))
416
+ return
417
+ else:
418
+ def callback(d):
419
+ return
420
 
421
  indices = torch.arange(0, sum([1, 16, 2, 1, latent_window_size])).unsqueeze(0)
422
  clean_latent_indices_start, clean_latent_4x_indices, clean_latent_2x_indices, clean_latent_1x_indices, latent_indices = indices.split([1, 16, 2, 1, latent_window_size], dim=1)
 
496
  if not high_vram:
497
  unload_complete_models()
498
 
499
+ if enable_preview or section_index == total_latent_sections - 1:
500
+ output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
501
 
502
+ save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=mp4_crf)
503
 
504
+ print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}')
505
 
506
+ stream.output_queue.push(('file', output_filename))
507
  except:
508
  traceback.print_exc()
509
 
 
515
  stream.output_queue.push(('end', None))
516
  return
517
 
518
+ def get_duration(input_image, prompt, generation_mode, n_prompt, randomize_seed, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf):
519
+ return total_second_length * 60 * (0.7 if use_teacache else 1.3) * (2**((resolution - 640) / 640)) * (1 + ((steps - 25) / 100))
520
 
521
 
522
  @spaces.GPU(duration=get_duration)
 
525
  n_prompt="",
526
  randomize_seed=True,
527
  seed=31337,
528
+ resolution=640,
529
  total_second_length=5,
530
  latent_window_size=9,
531
  steps=25,
 
533
  gs=10.0,
534
  rs=0.0,
535
  gpu_memory_preservation=6,
536
+ enable_preview=True,
537
  use_teacache=False,
538
  mp4_crf=16
539
  ):
540
+ global stream, input_image_debug_value, prompt_debug_value, total_second_length_debug_value
541
 
542
  if torch.cuda.device_count() == 0:
543
  gr.Warning('Set this space to GPU config to make it work.')
544
+ yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
545
+ return
546
 
547
  if randomize_seed:
548
  seed = random.randint(0, np.iinfo(np.int32).max)
 
559
 
560
  stream = AsyncStream()
561
 
562
+ async_run(worker, input_image, prompts, n_prompt, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf)
563
 
564
  output_filename = None
565
 
 
575
  yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True)
576
 
577
  if flag == 'end':
578
+ yield output_filename, gr.update(visible=False), gr.update(), 'To make all your generated scenes consistent, you can then apply a face swap on the main character.', gr.update(interactive=True), gr.update(interactive=False)
579
+ break
580
 
581
  # 20250506 pftq: Modified worker to accept video input and clean frame count
582
  @spaces.GPU()
583
  @torch.no_grad()
584
+ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
585
  def encode_prompt(prompt, n_prompt):
586
  llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
587
 
 
624
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Video processing ...'))))
625
 
626
  # 20250506 pftq: Encode video
 
 
 
627
  start_latent, input_image_np, video_latents, fps, height, width, input_video_pixels = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
628
 
 
 
629
  # CLIP Vision
630
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
631
 
 
641
  total_latent_sections = (total_second_length * fps) / (latent_window_size * 4)
642
  total_latent_sections = int(max(round(total_latent_sections), 1))
643
 
644
+ if enable_preview:
645
+ def callback(d):
646
+ preview = d['denoised']
647
+ preview = vae_decode_fake(preview)
648
+
649
+ preview = (preview * 255.0).detach().cpu().numpy().clip(0, 255).astype(np.uint8)
650
+ preview = einops.rearrange(preview, 'b c t h w -> (b h) (t w) c')
651
+
652
+ if stream.input_queue.top() == 'end':
653
+ stream.output_queue.push(('end', None))
654
+ raise KeyboardInterrupt('User ends the task.')
655
+
656
+ current_step = d['i'] + 1
657
+ percentage = int(100.0 * current_step / steps)
658
+ hint = f'Sampling {current_step}/{steps}'
659
+ desc = f'Total frames: {int(max(0, total_generated_latent_frames * 4 - 3))}, Video length: {max(0, (total_generated_latent_frames * 4 - 3) / fps) :.2f} seconds (FPS-{fps}), Resolution: {height}px * {width}px, Seed: {seed}, Video {idx+1} of {batch}. The video is generating part {section_index+1} of {total_latent_sections}...'
660
+ stream.output_queue.push(('progress', (preview, desc, make_progress_bar_html(percentage, hint))))
661
+ return
662
+ else:
663
+ def callback(d):
664
+ return
665
 
666
  for idx in range(batch):
667
  if batch > 1:
 
682
  history_pixels = None
683
  previous_video = None
684
 
 
 
 
 
685
  for section_index in range(total_latent_sections):
686
  if stream.input_queue.top() == 'end':
687
  stream.output_queue.push(('end', None))
 
736
  clean_latents_4x = splits[split_idx]
737
  split_idx = 1
738
  if clean_latents_4x.shape[2] < 2: # 20250507 pftq: edge case for <=1 sec videos
739
+ print("Edge case for <=1 sec videos 4x")
740
+ clean_latents_4x = clean_latents_4x.expand(-1, -1, 2, -1, -1)
741
 
742
  if num_2x_frames > 0 and split_idx < len(splits):
743
  clean_latents_2x = splits[split_idx]
744
  if clean_latents_2x.shape[2] < 2: # 20250507 pftq: edge case for <=1 sec videos
745
+ print("Edge case for <=1 sec videos 2x")
746
+ clean_latents_2x = clean_latents_2x.expand(-1, -1, 2, -1, -1)
747
  split_idx += 1
748
  elif clean_latents_2x.shape[2] < 2: # 20250507 pftq: edge case for <=1 sec videos
749
  clean_latents_2x = clean_latents_4x
 
807
  if not high_vram:
808
  unload_complete_models()
809
 
810
+ if enable_preview or section_index == total_latent_sections - 1:
811
+ output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
812
+
813
+ # 20250506 pftq: Use input video FPS for output
814
+ save_bcthw_as_mp4(history_pixels, output_filename, fps=fps, crf=mp4_crf)
815
+ print(f"Latest video saved: {output_filename}")
816
+ # 20250508 pftq: Save prompt to mp4 metadata comments
817
+ set_mp4_comments_imageio_ffmpeg(output_filename, f"Prompt: {prompts} | Negative Prompt: {n_prompt}");
818
+ print(f"Prompt saved to mp4 metadata comments: {output_filename}")
819
+
820
+ # 20250506 pftq: Clean up previous partial files
821
+ if previous_video is not None and os.path.exists(previous_video):
822
+ try:
823
+ os.remove(previous_video)
824
+ print(f"Previous partial video deleted: {previous_video}")
825
+ except Exception as e:
826
+ print(f"Error deleting previous partial video {previous_video}: {e}")
827
+ previous_video = output_filename
828
+
829
+ print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}')
830
+
831
+ stream.output_queue.push(('file', output_filename))
832
 
833
  seed = (seed + 1) % np.iinfo(np.int32).max
834
 
 
843
  stream.output_queue.push(('end', None))
844
  return
845
 
846
+ def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
847
+ return total_second_length * 60 * (0.7 if use_teacache else 2) * (2**((resolution - 640) / 640)) * (1 + ((steps - 25) / 100))
848
 
849
  # 20250506 pftq: Modified process to pass clean frame count, etc from video_encode
850
  @spaces.GPU(duration=get_duration_video)
851
+ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
852
  global stream, high_vram
853
 
854
  if torch.cuda.device_count() == 0:
855
  gr.Warning('Set this space to GPU config to make it work.')
856
+ yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
857
+ return
858
 
859
  if randomize_seed:
860
  seed = random.randint(0, np.iinfo(np.int32).max)
 
882
  stream = AsyncStream()
883
 
884
  # 20250506 pftq: Pass num_clean_frames, vae_batch, etc
885
+ async_run(worker_video, input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch)
886
 
887
  output_filename = None
888
 
 
899
  yield output_filename, gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True) # 20250506 pftq: Keep refreshing the video in case it got hidden when the tab was in the background
900
 
901
  if flag == 'end':
902
+ yield output_filename, gr.update(visible=False), desc+' Video complete. To make all your generated scenes consistent, you can then apply a face swap on the main character.', '', gr.update(interactive=True), gr.update(interactive=False)
903
+ break
904
 
905
  def end_process():
906
  stream.input_queue.push('end')
 
940
  <p>This space is ready to work on ZeroGPU and GPU and has been tested successfully on ZeroGPU. Please leave a <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/FramePack/discussions/new">message in discussion</a> if you encounter issues.</p>
941
  """
942
 
943
+ js = """
944
+ function createGradioAnimation() {
945
+ window.addEventListener("beforeunload", function (e) {
946
+ if (document.getElementById('end-button') && !document.getElementById('end-button').disabled) {
947
+ var confirmationMessage = 'A process is still running. '
948
+ + 'If you leave before saving, your changes will be lost.';
949
+
950
+ (e || window.event).returnValue = confirmationMessage;
951
+ }
952
+ return confirmationMessage;
953
+ });
954
+ return 'Animation created';
955
+ }
956
+ """
957
+
958
  css = make_progress_bar_css()
959
+ block = gr.Blocks(css=css, js=js).queue()
960
  with block:
961
  if torch.cuda.device_count() == 0:
962
  with gr.Row():
 
967
  </big></big></big></p>
968
  """)
969
  gr.HTML(title_html)
970
+ local_storage = gr.BrowserState(default_local_storage)
971
  with gr.Row():
972
  with gr.Column():
973
+ generation_mode = gr.Radio([["Text-to-Video", "text"], ["Image-to-Video", "image"], ["Video Extension", "video"]], elem_id="generation-mode", label="Generation mode", value = "image")
974
+ text_to_video_hint = gr.HTML("I discourage to use the Text-to-Video feature. You should rather generate an image with Flux and use Image-to-Video. You will save time.")
975
  input_image = gr.Image(sources='upload', type="numpy", label="Image", height=320)
976
+ input_video = gr.Video(sources='upload', label="Input Video", height=320)
977
  timeless_prompt = gr.Textbox(label="Timeless prompt", info='Used on the whole duration of the generation', value='', placeholder="The creature starts to move, fast motion, fixed camera, focus motion, consistent arm, consistent position, mute colors, insanely detailed")
978
  prompt_number = gr.Slider(label="Timed prompt number", minimum=0, maximum=1000, value=0, step=1, info='Prompts will automatically appear')
979
 
 
989
 
990
  with gr.Row():
991
  start_button = gr.Button(value="🎥 Generate", variant="primary")
992
+ start_button_video = gr.Button(value="🎥 Generate", variant="primary")
993
+ end_button = gr.Button(elem_id="end-button", value="End Generation", variant="stop", interactive=False)
994
 
995
  with gr.Accordion("Advanced settings", open=False):
996
+ enable_preview = gr.Checkbox(label='Enable preview', value=True, info='Display a preview around each second generated but it costs 2 sec. for each second generated.')
997
+ use_teacache = gr.Checkbox(label='Use TeaCache', value=False, info='Faster speed, but often makes hands and fingers slightly worse.')
 
998
 
999
+ n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, unrealistic position, impossible contortion, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
 
 
1000
 
1001
  latent_window_size = gr.Slider(label="Latent Window Size", minimum=1, maximum=33, value=9, step=1, info='Generate more frames at a time (larger chunks). Less degradation and better blending but higher VRAM cost. Should not change.')
1002
  steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1, info='Increase for more quality, especially if using high non-distilled CFG. Changing this value is not recommended.')
 
1003
 
1004
+ with gr.Row():
1005
+ no_resize = gr.Checkbox(label='Force Original Video Resolution (no Resizing)', value=False, info='Might run out of VRAM (720p requires > 24GB VRAM).')
1006
+ resolution = gr.Dropdown([
1007
+ 640,
1008
+ 672,
1009
+ 704,
1010
+ 768,
1011
+ 832,
1012
+ 864,
1013
+ 960
1014
+ ], value=640, label="Resolution (max width or height)")
1015
 
1016
  # 20250506 pftq: Reduced default distilled guidance scale to improve adherence to input video
1017
  cfg = gr.Slider(label="CFG Scale", minimum=1.0, maximum=32.0, value=1.0, step=0.01, info='Use this instead of Distilled for more detail/control + Negative Prompt (make sure Distilled set to 1). Doubles render time. Should not change.')
 
1020
 
1021
 
1022
  # 20250506 pftq: Renamed slider to Number of Context Frames and updated description
1023
+ num_clean_frames = gr.Slider(label="Number of Context Frames", minimum=2, maximum=10, value=5, step=1, info="Retain more video details but increase memory use. Reduce to 2 to avoid memory issues or to give more weight to the prompt.")
1024
 
1025
  default_vae = 32
1026
  if high_vram:
 
1028
  elif free_mem_gb>=20:
1029
  default_vae = 64
1030
 
1031
+ vae_batch = gr.Slider(label="VAE Batch Size for Input Video", minimum=4, maximum=256, value=default_vae, step=4, info="Reduce if running out of memory. Increase for better quality frames during fast motion.")
1032
 
1033
 
1034
  gpu_memory_preservation = gr.Slider(label="GPU Inference Preserved Memory (GB) (larger means slower)", minimum=6, maximum=128, value=6, step=0.1, info="Set this number to a larger value if you encounter OOM. Larger value causes slower speed.")
1035
 
1036
  mp4_crf = gr.Slider(label="MP4 Compression", minimum=0, maximum=100, value=16, step=1, info="Lower means better quality. 0 is uncompressed. Change to 16 if you get black outputs. ")
1037
+ batch = gr.Slider(label="Batch Size (Number of Videos)", minimum=1, maximum=1000, value=1, step=1, info='Generate multiple videos each with a different seed.')
1038
+ with gr.Row():
1039
+ randomize_seed = gr.Checkbox(label='Randomize seed', value=True, info='If checked, the seed is always different')
1040
+ seed = gr.Slider(label="Seed", minimum=0, maximum=np.iinfo(np.int32).max, step=1, randomize=True)
1041
 
1042
  with gr.Column():
1043
  preview_image = gr.Image(label="Next Latents", height=200, visible=False)
 
1046
  progress_bar = gr.HTML('', elem_classes='no-generating-animation')
1047
 
1048
  # 20250506 pftq: Updated inputs to include num_clean_frames
1049
+ ips = [input_image, final_prompt, generation_mode, n_prompt, randomize_seed, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf]
1050
+ ips_video = [input_video, final_prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch]
1051
+
1052
+ def save_preferences(preferences, value):
1053
+ preferences["generation-mode"] = value
1054
+ return preferences
1055
+
1056
+ def load_preferences(saved_prefs):
1057
+ saved_prefs = init_preferences(saved_prefs)
1058
+ return saved_prefs["generation-mode"]
1059
+
1060
+ def init_preferences(saved_prefs):
1061
+ if saved_prefs is None:
1062
+ saved_prefs = default_local_storage
1063
+ return saved_prefs
1064
+
1065
+ def check_parameters(generation_mode, input_image, input_video):
1066
+ if generation_mode == "image" and input_image is None:
1067
+ raise gr.Error("Please provide an image to extend.")
1068
+ if generation_mode == "video" and input_video is None:
1069
+ raise gr.Error("Please provide a video to extend.")
1070
+ return gr.update(interactive=True)
1071
 
1072
  prompt_number.change(fn=handle_prompt_number_change, inputs=[], outputs=[])
1073
  timeless_prompt.change(fn=handle_timeless_prompt_change, inputs=[timeless_prompt], outputs=[final_prompt])
 
1079
  ], outputs = [end_button], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button])
1080
  end_button.click(fn=end_process)
1081
 
1082
+ generation_mode.change(fn = save_preferences, inputs = [
1083
+ local_storage,
1084
+ generation_mode,
1085
+ ], outputs = [
1086
+ local_storage
1087
+ ])
1088
+
1089
+ with gr.Row(elem_id="image_examples", visible=False):
1090
+ gr.Examples(
1091
  examples = [
1092
+ [
1093
+ "./img_examples/Example1.png", # input_image
1094
+ "A dolphin emerges from the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
1095
+ "image", # generation_mode
1096
+ "Missing arm, unrealistic position, impossible contortion, blurred, blurry", # n_prompt
1097
+ True, # randomize_seed
1098
+ 42, # seed
1099
+ 672, # resolution
1100
+ 1, # total_second_length
1101
+ 9, # latent_window_size
1102
+ 50, # steps
1103
+ 1.0, # cfg
1104
+ 10.0, # gs
1105
+ 0.0, # rs
1106
+ 6, # gpu_memory_preservation
1107
+ False, # enable_preview
1108
+ False, # use_teacache
1109
+ 16 # mp4_crf
1110
+ ],
1111
  [
1112
  "./img_examples/Example1.png", # input_image
1113
  "View of the sea as far as the eye can see, from the seaside, a piece of land is barely visible on the horizon at the middle, the sky is radiant, reflections of the sun in the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
1114
  "image", # generation_mode
1115
+ "Missing arm, unrealistic position, impossible contortion, blurred, blurry", # n_prompt
1116
  True, # randomize_seed
1117
  42, # seed
1118
+ 672, # resolution
1119
  1, # total_second_length
1120
  9, # latent_window_size
1121
+ 35, # steps
1122
  1.0, # cfg
1123
  10.0, # gs
1124
  0.0, # rs
1125
  6, # gpu_memory_preservation
1126
+ False, # enable_preview
1127
  False, # use_teacache
1128
  16 # mp4_crf
1129
  ],
1130
+ ],
1131
+ run_on_click = True,
1132
+ fn = process,
1133
+ inputs = ips,
1134
+ outputs = [result_video, preview_image, progress_desc, progress_bar, start_button, end_button],
1135
+ cache_examples = torch.cuda.device_count() > 0,
1136
+ )
1137
+
1138
+ with gr.Row(elem_id="video_examples", visible=False):
1139
+ gr.Examples(
1140
+ examples = [
1141
+ [
1142
+ "./img_examples/Example1.mp4", # input_video
1143
+ "View of the sea as far as the eye can see, from the seaside, a piece of land is barely visible on the horizon at the middle, the sky is radiant, reflections of the sun in the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
1144
+ "Missing arm, unrealistic position, blurred, blurry", # n_prompt
1145
+ True, # randomize_seed
1146
+ 42, # seed
1147
+ 1, # batch
1148
+ 672, # resolution
1149
+ 1, # total_second_length
1150
+ 9, # latent_window_size
1151
+ 50, # steps
1152
+ 1.0, # cfg
1153
+ 10.0, # gs
1154
+ 0.0, # rs
1155
+ 6, # gpu_memory_preservation
1156
+ False, # enable_preview
1157
+ False, # use_teacache
1158
+ False, # no_resize
1159
+ 16, # mp4_crf
1160
+ 5, # num_clean_frames
1161
+ default_vae
1162
+ ],
1163
+ [
1164
+ "./img_examples/Example1.mp4", # input_video
1165
+ "View of the sea as far as the eye can see, from the seaside, a piece of land is barely visible on the horizon at the middle, the sky is radiant, reflections of the sun in the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
1166
+ "Missing arm, unrealistic position, blurred, blurry", # n_prompt
1167
+ True, # randomize_seed
1168
+ 42, # seed
1169
+ 1, # batch
1170
+ 672, # resolution
1171
+ 1, # total_second_length
1172
+ 9, # latent_window_size
1173
+ 35, # steps
1174
+ 1.0, # cfg
1175
+ 10.0, # gs
1176
+ 0.0, # rs
1177
+ 6, # gpu_memory_preservation
1178
+ False, # enable_preview
1179
+ False, # use_teacache
1180
+ False, # no_resize
1181
+ 16, # mp4_crf
1182
+ 5, # num_clean_frames
1183
+ default_vae
1184
+ ],
1185
+ ],
1186
+ run_on_click = True,
1187
+ fn = process_video,
1188
+ inputs = ips_video,
1189
+ outputs = [result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button],
1190
+ cache_examples = torch.cuda.device_count() > 0,
1191
+ )
1192
+
1193
+ gr.Examples(
1194
+ examples = [
1195
  [
1196
  "./img_examples/Example1.png", # input_image
1197
  "A dolphin emerges from the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
1198
  "image", # generation_mode
1199
+ "Missing arm, unrealistic position, impossible contortion, blurred, blurry", # n_prompt
1200
  True, # randomize_seed
1201
  42, # seed
1202
+ 672, # resolution
1203
  1, # total_second_length
1204
  9, # latent_window_size
1205
  25, # steps
 
1207
  10.0, # gs
1208
  0.0, # rs
1209
  6, # gpu_memory_preservation
1210
+ False, # enable_preview
1211
+ False, # use_teacache
1212
  16 # mp4_crf
1213
  ]
1214
  ],
 
1216
  fn = process,
1217
  inputs = ips,
1218
  outputs = [result_video, preview_image, progress_desc, progress_bar, start_button, end_button],
1219
+ cache_examples = False,
1220
  )
1221
 
1222
  gr.Examples(
 
1228
  True, # randomize_seed
1229
  42, # seed
1230
  1, # batch
1231
+ 672, # resolution
1232
  1, # total_second_length
1233
  9, # latent_window_size
1234
  25, # steps
 
1236
  10.0, # gs
1237
  0.0, # rs
1238
  6, # gpu_memory_preservation
1239
+ False, # enable_preview
1240
  False, # use_teacache
1241
  False, # no_resize
1242
  16, # mp4_crf
1243
  5, # num_clean_frames
1244
  default_vae
1245
+ ]
1246
  ],
1247
  run_on_click = True,
1248
  fn = process_video,
1249
  inputs = ips_video,
1250
  outputs = [result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button],
1251
+ cache_examples = False,
1252
  )
 
 
 
 
 
1253
 
1254
  def handle_generation_mode_change(generation_mode_data):
1255
  if generation_mode_data == "text":
1256
+ return [gr.update(visible = True), gr.update(visible = False), gr.update(visible = False), gr.update(visible = True), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)]
1257
  elif generation_mode_data == "image":
1258
+ return [gr.update(visible = False), gr.update(visible = True), gr.update(visible = False), gr.update(visible = True), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)]
1259
  elif generation_mode_data == "video":
1260
+ return [gr.update(visible = False), gr.update(visible = False), gr.update(visible = True), gr.update(visible = False), gr.update(visible = True), gr.update(visible = True), gr.update(visible = True), gr.update(visible = True), gr.update(visible = True)]
1261
 
1262
+
1263
  generation_mode.change(
1264
  fn=handle_generation_mode_change,
1265
  inputs=[generation_mode],
1266
+ outputs=[text_to_video_hint, input_image, input_video, start_button, start_button_video, no_resize, batch, num_clean_frames, vae_batch]
1267
+ )
1268
+
1269
+ # Update display when the page loads
1270
+ block.load(
1271
+ fn=handle_generation_mode_change, inputs = [
1272
+ generation_mode
1273
+ ], outputs = [
1274
+ text_to_video_hint, input_image, input_video, start_button, start_button_video, no_resize, batch, num_clean_frames, vae_batch
1275
+ ]
1276
+ )
1277
+
1278
+ # Load saved preferences when the page loads
1279
+ block.load(
1280
+ fn=load_preferences, inputs = [
1281
+ local_storage
1282
+ ], outputs = [
1283
+ generation_mode
1284
+ ]
1285
  )
1286
 
1287
  block.launch(mcp_server=True, ssr_mode=False)