Fabrice-TIERCELIN commited on
Commit
7df9b79
·
verified ·
1 Parent(s): 9b03991

Optimize GPU time

Browse files
Files changed (1) hide show
  1. app.py +119 -47
app.py CHANGED
@@ -809,17 +809,16 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
809
  stream.output_queue.push(('end', None))
810
  return
811
 
812
- def get_duration(input_image, image_position, prompt, generation_mode, n_prompt, randomize_seed, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf):
813
  return total_second_length * 60 * (0.9 if use_teacache else 1.5) * (1 + ((steps - 25) / 100))
814
 
815
  # Remove this decorator if you run on local
816
  @spaces.GPU(duration=get_duration)
817
- def process(input_image,
818
  image_position=0,
819
- prompt="",
820
  generation_mode="image",
821
  n_prompt="",
822
- randomize_seed=True,
823
  seed=31337,
824
  resolution=640,
825
  total_second_length=5,
@@ -835,25 +834,6 @@ def process(input_image,
835
  ):
836
  start = time.time()
837
  global stream
838
-
839
- if torch.cuda.device_count() == 0:
840
- gr.Warning('Set this space to GPU config to make it work.')
841
- yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible = False)
842
- return
843
-
844
- if randomize_seed:
845
- seed = random.randint(0, np.iinfo(np.int32).max)
846
-
847
- prompts = prompt.split(";")
848
-
849
- # assert input_image is not None, 'No input image!'
850
- if generation_mode == "text":
851
- default_height, default_width = 640, 640
852
- input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
853
- print("No input image provided. Using a blank white image.")
854
-
855
- yield gr.update(label="Previewed Frames"), None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
856
-
857
  stream = AsyncStream()
858
 
859
  async_run(worker, input_image, image_position, prompts, n_prompt, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf)
@@ -885,14 +865,25 @@ def process(input_image,
885
  "You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character. If you do not see the generated video above, the process may have failed. See the logs for more information. If you see an error like ''NVML_SUCCESS == r INTERNAL ASSERT FAILED'', you probably haven't enough VRAM. Test an example or other options to compare. You can share your inputs to the original space or set your space in public for a peer review.", gr.update(interactive=True), gr.update(interactive=False), gr.update(visible = False)
886
  break
887
 
888
- def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
889
- return total_second_length * 60 * (1.5 if use_teacache else 2.5) * (1 + ((steps - 25) / 100))
890
-
891
- # Remove this decorator if you run on local
892
- @spaces.GPU(duration=get_duration_video)
893
- def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
894
- start = time.time()
895
- global stream, high_vram
 
 
 
 
 
 
 
 
 
 
 
896
 
897
  if torch.cuda.device_count() == 0:
898
  gr.Warning('Set this space to GPU config to make it work.')
@@ -904,24 +895,41 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
904
 
905
  prompts = prompt.split(";")
906
 
907
- # 20250506 pftq: Updated assertion for video input
908
- assert input_video is not None, 'No input video!'
 
 
 
909
 
910
  yield gr.update(label="Previewed Frames"), None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
911
 
912
- # 20250507 pftq: Even the H100 needs offloading if the video dimensions are 720p or higher
913
- if high_vram and (no_resize or resolution>640):
914
- print("Disabling high vram mode due to no resize and/or potentially higher resolution...")
915
- high_vram = False
916
- vae.enable_slicing()
917
- vae.enable_tiling()
918
- DynamicSwapInstaller.install_model(transformer, device=gpu)
919
- DynamicSwapInstaller.install_model(text_encoder, device=gpu)
920
-
921
- # 20250508 pftq: automatically set distilled cfg to 1 if cfg is used
922
- if cfg > 1:
923
- gs = 1
 
 
 
 
 
 
 
 
 
924
 
 
 
 
 
 
925
  stream = AsyncStream()
926
 
927
  # 20250506 pftq: Pass num_clean_frames, vae_batch, etc
@@ -955,6 +963,39 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
955
  " You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character. If you do not see the generated video above, the process may have failed. See the logs for more information. If you see an error like ''NVML_SUCCESS == r INTERNAL ASSERT FAILED'', you probably haven't enough VRAM. Test an example or other options to compare. You can share your inputs to the original space or set your space in public for a peer review.", '', gr.update(interactive=True), gr.update(interactive=False), gr.update(visible = False)
956
  break
957
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
958
  def end_process():
959
  stream.input_queue.push('end')
960
 
@@ -1038,7 +1079,7 @@ with block:
1038
  timed_prompt = gr.Textbox(label="Timed prompt #" + str(digit + 1), elem_id="timed_prompt_" + str(digit), value="")
1039
  timed_prompt.change(fn=handle_timed_prompt_change, inputs=[timed_prompt_id, timed_prompt], outputs=[final_prompt])
1040
 
1041
- final_prompt = gr.Textbox(label="Final prompt", value='', info='Use ; to separate in time')
1042
  prompt_hint = gr.HTML("Video extension barely follows the prompt; to force to follow the prompt, you have to set the Distilled CFG Scale to 3.0 and the Context Frames to 2 but the video quality will be poor.")
1043
  total_second_length = gr.Slider(label="Video Length to Generate (seconds)", minimum=1, maximum=120, value=2, step=0.1)
1044
 
@@ -1054,7 +1095,7 @@ with block:
1054
  n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
1055
 
1056
  latent_window_size = gr.Slider(label="Latent Window Size", minimum=1, maximum=33, value=9, step=1, info='Generate more frames at a time (larger chunks). Less degradation and better blending but higher VRAM cost. Should not change.')
1057
- steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=30, step=1, info='Increase for more quality, especially if using high non-distilled CFG. If your animation has very few motion, you may have brutal brightness change; this can be fixed increasing the steps.')
1058
 
1059
  with gr.Row():
1060
  no_resize = gr.Checkbox(label='Force Original Video Resolution (no Resizing)', value=False, info='Might run out of VRAM (720p requires > 24GB VRAM).')
@@ -1105,6 +1146,37 @@ with block:
1105
  ips = [input_image, image_position, final_prompt, generation_mode, n_prompt, randomize_seed, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf]
1106
  ips_video = [input_video, final_prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch]
1107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1108
  gr.Examples(
1109
  label = "Examples from image",
1110
  examples = [
 
809
  stream.output_queue.push(('end', None))
810
  return
811
 
812
+ def get_duration(input_image, image_position, prompts, generation_mode, n_prompt, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf):
813
  return total_second_length * 60 * (0.9 if use_teacache else 1.5) * (1 + ((steps - 25) / 100))
814
 
815
  # Remove this decorator if you run on local
816
  @spaces.GPU(duration=get_duration)
817
+ def process_on_gpu(input_image,
818
  image_position=0,
819
+ prompts=[""],
820
  generation_mode="image",
821
  n_prompt="",
 
822
  seed=31337,
823
  resolution=640,
824
  total_second_length=5,
 
834
  ):
835
  start = time.time()
836
  global stream
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837
  stream = AsyncStream()
838
 
839
  async_run(worker, input_image, image_position, prompts, n_prompt, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf)
 
865
  "You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character. If you do not see the generated video above, the process may have failed. See the logs for more information. If you see an error like ''NVML_SUCCESS == r INTERNAL ASSERT FAILED'', you probably haven't enough VRAM. Test an example or other options to compare. You can share your inputs to the original space or set your space in public for a peer review.", gr.update(interactive=True), gr.update(interactive=False), gr.update(visible = False)
866
  break
867
 
868
+ def process(input_image,
869
+ image_position=0,
870
+ prompt="",
871
+ generation_mode="image",
872
+ n_prompt="",
873
+ randomize_seed=True,
874
+ seed=31337,
875
+ resolution=640,
876
+ total_second_length=5,
877
+ latent_window_size=9,
878
+ steps=25,
879
+ cfg=1.0,
880
+ gs=10.0,
881
+ rs=0.0,
882
+ gpu_memory_preservation=6,
883
+ enable_preview=True,
884
+ use_teacache=False,
885
+ mp4_crf=16
886
+ ):
887
 
888
  if torch.cuda.device_count() == 0:
889
  gr.Warning('Set this space to GPU config to make it work.')
 
895
 
896
  prompts = prompt.split(";")
897
 
898
+ # assert input_image is not None, 'No input image!'
899
+ if generation_mode == "text":
900
+ default_height, default_width = 640, 640
901
+ input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
902
+ print("No input image provided. Using a blank white image.")
903
 
904
  yield gr.update(label="Previewed Frames"), None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
905
 
906
+ yield from process_on_gpu(input_image,
907
+ image_position,
908
+ prompts,
909
+ generation_mode,
910
+ n_prompt,
911
+ seed,
912
+ resolution,
913
+ total_second_length,
914
+ latent_window_size,
915
+ steps,
916
+ cfg,
917
+ gs,
918
+ rs,
919
+ gpu_memory_preservation,
920
+ enable_preview,
921
+ use_teacache,
922
+ mp4_crf
923
+ )
924
+
925
+ def get_duration_video(input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
926
+ return total_second_length * 60 * (1.5 if use_teacache else 2.5) * (1 + ((steps - 25) / 100))
927
 
928
+ # Remove this decorator if you run on local
929
+ @spaces.GPU(duration=get_duration_video)
930
+ def process_video_on_gpu(input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
931
+ start = time.time()
932
+ global stream
933
  stream = AsyncStream()
934
 
935
  # 20250506 pftq: Pass num_clean_frames, vae_batch, etc
 
963
  " You can upscale the result with RIFE. To make all your generated scenes consistent, you can then apply a face swap on the main character. If you do not see the generated video above, the process may have failed. See the logs for more information. If you see an error like ''NVML_SUCCESS == r INTERNAL ASSERT FAILED'', you probably haven't enough VRAM. Test an example or other options to compare. You can share your inputs to the original space or set your space in public for a peer review.", '', gr.update(interactive=True), gr.update(interactive=False), gr.update(visible = False)
964
  break
965
 
966
+ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
967
+ global high_vram
968
+
969
+ if torch.cuda.device_count() == 0:
970
+ gr.Warning('Set this space to GPU config to make it work.')
971
+ yield gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible = False)
972
+ return
973
+
974
+ if randomize_seed:
975
+ seed = random.randint(0, np.iinfo(np.int32).max)
976
+
977
+ prompts = prompt.split(";")
978
+
979
+ # 20250506 pftq: Updated assertion for video input
980
+ assert input_video is not None, 'No input video!'
981
+
982
+ yield gr.update(label="Previewed Frames"), None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
983
+
984
+ # 20250507 pftq: Even the H100 needs offloading if the video dimensions are 720p or higher
985
+ if high_vram and (no_resize or resolution>640):
986
+ print("Disabling high vram mode due to no resize and/or potentially higher resolution...")
987
+ high_vram = False
988
+ vae.enable_slicing()
989
+ vae.enable_tiling()
990
+ DynamicSwapInstaller.install_model(transformer, device=gpu)
991
+ DynamicSwapInstaller.install_model(text_encoder, device=gpu)
992
+
993
+ # 20250508 pftq: automatically set distilled cfg to 1 if cfg is used
994
+ if cfg > 1:
995
+ gs = 1
996
+
997
+ yield from process_video_on_gpu(input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch)
998
+
999
  def end_process():
1000
  stream.input_queue.push('end')
1001
 
 
1079
  timed_prompt = gr.Textbox(label="Timed prompt #" + str(digit + 1), elem_id="timed_prompt_" + str(digit), value="")
1080
  timed_prompt.change(fn=handle_timed_prompt_change, inputs=[timed_prompt_id, timed_prompt], outputs=[final_prompt])
1081
 
1082
+ final_prompt = gr.Textbox(label="Final prompt", value='', info='Use ; to separate in time; beware to write to stop the previous action')
1083
  prompt_hint = gr.HTML("Video extension barely follows the prompt; to force to follow the prompt, you have to set the Distilled CFG Scale to 3.0 and the Context Frames to 2 but the video quality will be poor.")
1084
  total_second_length = gr.Slider(label="Video Length to Generate (seconds)", minimum=1, maximum=120, value=2, step=0.1)
1085
 
 
1095
  n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
1096
 
1097
  latent_window_size = gr.Slider(label="Latent Window Size", minimum=1, maximum=33, value=9, step=1, info='Generate more frames at a time (larger chunks). Less degradation and better blending but higher VRAM cost. Should not change.')
1098
+ steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1, info='Increase for more quality, especially if using high non-distilled CFG. If your animation has very few motion, you may have brutal brightness change; this can be fixed increasing the steps.')
1099
 
1100
  with gr.Row():
1101
  no_resize = gr.Checkbox(label='Force Original Video Resolution (no Resizing)', value=False, info='Might run out of VRAM (720p requires > 24GB VRAM).')
 
1146
  ips = [input_image, image_position, final_prompt, generation_mode, n_prompt, randomize_seed, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf]
1147
  ips_video = [input_video, final_prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch]
1148
 
1149
+ gr.Examples(
1150
+ label = "Examples from text",
1151
+ examples = [
1152
+ [
1153
+ None, # input_image
1154
+ 0, # image_position
1155
+ "Overcrowed street in Japan, photorealistic, realistic, intricate details, 8k, insanely detailed",
1156
+ "text", # generation_mode
1157
+ "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1158
+ True, # randomize_seed
1159
+ 42, # seed
1160
+ 672, # resolution
1161
+ 1, # total_second_length
1162
+ 9, # latent_window_size
1163
+ 30, # steps
1164
+ 1.0, # cfg
1165
+ 10.0, # gs
1166
+ 0.0, # rs
1167
+ 6, # gpu_memory_preservation
1168
+ False, # enable_preview
1169
+ False, # use_teacache
1170
+ 16 # mp4_crf
1171
+ ]
1172
+ ],
1173
+ run_on_click = True,
1174
+ fn = process,
1175
+ inputs = ips,
1176
+ outputs = [result_video, preview_image, progress_desc, progress_bar, start_button, end_button],
1177
+ cache_examples = False,
1178
+ )
1179
+
1180
  gr.Examples(
1181
  label = "Examples from image",
1182
  examples = [