Fabrice-TIERCELIN commited on
Commit
b214256
·
verified ·
1 Parent(s): 6ec566a

Improve examples

Browse files
Files changed (1) hide show
  1. app.py +32 -26
app.py CHANGED
@@ -4,7 +4,10 @@ import os
4
 
5
  os.environ['HF_HOME'] = os.path.abspath(os.path.realpath(os.path.join(os.path.dirname(__file__), './hf_download')))
6
 
7
- import spaces
 
 
 
8
  import gradio as gr
9
  import torch
10
  import traceback
@@ -113,7 +116,6 @@ default_local_storage = {
113
  "generation-mode": "image",
114
  }
115
 
116
- @spaces.GPU()
117
  @torch.no_grad()
118
  def video_encode(video_path, resolution, no_resize, vae, vae_batch_size=16, device="cuda", width=None, height=None):
119
  """
@@ -449,8 +451,12 @@ def worker(input_image, image_position, prompts, n_prompt, seed, resolution, tot
449
  section_latent_frames = latent_window_size * 2
450
  overlapped_frames = latent_window_size * 4 - 3
451
 
452
- real_history_latents = history_latents[:, :, :min(section_latent_frames, total_generated_latent_frames), :, :] if is_last_frame else history_latents[:, :, -min(section_latent_frames, total_generated_latent_frames):, :, :]
453
- history_pixels = soft_append_bcthw(vae_decode(real_history_latents, vae).cpu(), history_pixels, overlapped_frames) if is_last_frame else soft_append_bcthw(history_pixels, vae_decode(real_history_latents, vae).cpu(), overlapped_frames)
 
 
 
 
454
 
455
  if not high_vram:
456
  unload_complete_models()
@@ -535,7 +541,6 @@ def worker(input_image, image_position, prompts, n_prompt, seed, resolution, tot
535
  return
536
 
537
  # 20250506 pftq: Modified worker to accept video input and clean frame count
538
- @spaces.GPU()
539
  @torch.no_grad()
540
  def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
541
  def encode_prompt(prompt, n_prompt):
@@ -807,6 +812,7 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
807
  def get_duration(input_image, image_position, prompt, generation_mode, n_prompt, randomize_seed, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf):
808
  return total_second_length * 60 * (0.9 if use_teacache else 1.5) * (1 + ((steps - 25) / 100))
809
 
 
810
  @spaces.GPU(duration=get_duration)
811
  def process(input_image,
812
  image_position=0,
@@ -846,7 +852,7 @@ def process(input_image,
846
  input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
847
  print("No input image provided. Using a blank white image.")
848
 
849
- yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
850
 
851
  stream = AsyncStream()
852
 
@@ -859,11 +865,11 @@ def process(input_image,
859
 
860
  if flag == 'file':
861
  output_filename = data
862
- yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True), gr.update()
863
 
864
  if flag == 'progress':
865
  preview, desc, html = data
866
- yield gr.update(), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True), gr.update()
867
 
868
  if flag == 'end':
869
  end = time.time()
@@ -872,7 +878,7 @@ def process(input_image,
872
  secondes = secondes - (minutes * 60)
873
  hours = math.floor(minutes / 60)
874
  minutes = minutes - (hours * 60)
875
- yield output_filename, gr.update(visible=False), gr.update(), "The process has lasted " + \
876
  ((str(hours) + " h, ") if hours != 0 else "") + \
877
  ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
878
  str(secondes) + " sec. " + \
@@ -882,7 +888,7 @@ def process(input_image,
882
  def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
883
  return total_second_length * 60 * (1.5 if use_teacache else 2.5) * (1 + ((steps - 25) / 100))
884
 
885
- # 20250506 pftq: Modified process to pass clean frame count, etc from video_encode
886
  @spaces.GPU(duration=get_duration_video)
887
  def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
888
  start = time.time()
@@ -901,7 +907,7 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
901
  # 20250506 pftq: Updated assertion for video input
902
  assert input_video is not None, 'No input video!'
903
 
904
- yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
905
 
906
  # 20250507 pftq: Even the H100 needs offloading if the video dimensions are 720p or higher
907
  if high_vram and (no_resize or resolution>640):
@@ -928,11 +934,11 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
928
 
929
  if flag == 'file':
930
  output_filename = data
931
- yield output_filename, gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True), gr.update()
932
 
933
  if flag == 'progress':
934
  preview, desc, html = data
935
- yield output_filename, gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True), gr.update() # 20250506 pftq: Keep refreshing the video in case it got hidden when the tab was in the background
936
 
937
  if flag == 'end':
938
  end = time.time()
@@ -941,7 +947,7 @@ def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, re
941
  secondes = secondes - (minutes * 60)
942
  hours = math.floor(minutes / 60)
943
  minutes = minutes - (hours * 60)
944
- yield output_filename, gr.update(visible=False), desc + \
945
  " The process has lasted " + \
946
  ((str(hours) + " h, ") if hours != 0 else "") + \
947
  ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
@@ -1048,7 +1054,7 @@ with block:
1048
  n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
1049
 
1050
  latent_window_size = gr.Slider(label="Latent Window Size", minimum=1, maximum=33, value=9, step=1, info='Generate more frames at a time (larger chunks). Less degradation and better blending but higher VRAM cost. Should not change.')
1051
- steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1, info='Increase for more quality, especially if using high non-distilled CFG. If your animation has very few motion, you may have brutal brightness change; this can be fixed increasing the steps.')
1052
 
1053
  with gr.Row():
1054
  no_resize = gr.Checkbox(label='Force Original Video Resolution (no Resizing)', value=False, info='Might run out of VRAM (720p requires > 24GB VRAM).')
@@ -1090,8 +1096,8 @@ with block:
1090
 
1091
  with gr.Column():
1092
  warning = gr.HTML(value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
 
1093
  preview_image = gr.Image(label="Next Latents", height=200, visible=False)
1094
- result_video = gr.Video(label="Finished Frames", autoplay=True, show_share_button=False, height=512, loop=True)
1095
  progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
1096
  progress_bar = gr.HTML('', elem_classes='no-generating-animation')
1097
 
@@ -1113,7 +1119,7 @@ with block:
1113
  672, # resolution
1114
  1, # total_second_length
1115
  9, # latent_window_size
1116
- 25, # steps
1117
  1.0, # cfg
1118
  10.0, # gs
1119
  0.0, # rs
@@ -1125,7 +1131,7 @@ with block:
1125
  [
1126
  "./img_examples/Example2.webp", # input_image
1127
  0, # image_position
1128
- "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens",
1129
  "image", # generation_mode
1130
  "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1131
  True, # randomize_seed
@@ -1133,7 +1139,7 @@ with block:
1133
  672, # resolution
1134
  2, # total_second_length
1135
  9, # latent_window_size
1136
- 25, # steps
1137
  1.0, # cfg
1138
  10.0, # gs
1139
  0.0, # rs
@@ -1145,7 +1151,7 @@ with block:
1145
  [
1146
  "./img_examples/Example2.webp", # input_image
1147
  0, # image_position
1148
- "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens",
1149
  "image", # generation_mode
1150
  "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1151
  True, # randomize_seed
@@ -1153,7 +1159,7 @@ with block:
1153
  672, # resolution
1154
  2, # total_second_length
1155
  9, # latent_window_size
1156
- 25, # steps
1157
  1.0, # cfg
1158
  10.0, # gs
1159
  0.0, # rs
@@ -1173,7 +1179,7 @@ with block:
1173
  672, # resolution
1174
  1, # total_second_length
1175
  9, # latent_window_size
1176
- 25, # steps
1177
  1.0, # cfg
1178
  10.0, # gs
1179
  0.0, # rs
@@ -1193,7 +1199,7 @@ with block:
1193
  672, # resolution
1194
  1, # total_second_length
1195
  9, # latent_window_size
1196
- 25, # steps
1197
  1.0, # cfg
1198
  10.0, # gs
1199
  0.0, # rs
@@ -1223,7 +1229,7 @@ with block:
1223
  672, # resolution
1224
  1, # total_second_length
1225
  9, # latent_window_size
1226
- 25, # steps
1227
  1.0, # cfg
1228
  10.0, # gs
1229
  0.0, # rs
@@ -1275,10 +1281,10 @@ with block:
1275
  timeless_prompt.change(fn=handle_timeless_prompt_change, inputs=[timeless_prompt], outputs=[final_prompt])
1276
  start_button.click(fn = check_parameters, inputs = [
1277
  generation_mode, input_image, input_video
1278
- ], outputs = [end_button, warning], queue = False, show_progress = False).success(fn=process, inputs=ips, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, end_button, warning])
1279
  start_button_video.click(fn = check_parameters, inputs = [
1280
  generation_mode, input_image, input_video
1281
- ], outputs = [end_button, warning], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button, warning])
1282
  end_button.click(fn=end_process)
1283
 
1284
  generation_mode.change(fn = save_preferences, inputs = [
 
4
 
5
  os.environ['HF_HOME'] = os.path.abspath(os.path.realpath(os.path.join(os.path.dirname(__file__), './hf_download')))
6
 
7
+ try:
8
+ import spaces
9
+ except:
10
+ print("Not on HuggingFace")
11
  import gradio as gr
12
  import torch
13
  import traceback
 
116
  "generation-mode": "image",
117
  }
118
 
 
119
  @torch.no_grad()
120
  def video_encode(video_path, resolution, no_resize, vae, vae_batch_size=16, device="cuda", width=None, height=None):
121
  """
 
451
  section_latent_frames = latent_window_size * 2
452
  overlapped_frames = latent_window_size * 4 - 3
453
 
454
+ if is_last_frame:
455
+ real_history_latents = history_latents[:, :, :min(section_latent_frames, total_generated_latent_frames), :, :]
456
+ history_pixels = soft_append_bcthw(vae_decode(real_history_latents, vae).cpu(), history_pixels, overlapped_frames)
457
+ else:
458
+ real_history_latents = history_latents[:, :, -min(section_latent_frames, total_generated_latent_frames):, :, :]
459
+ history_pixels = soft_append_bcthw(history_pixels, vae_decode(real_history_latents, vae).cpu(), overlapped_frames)
460
 
461
  if not high_vram:
462
  unload_complete_models()
 
541
  return
542
 
543
  # 20250506 pftq: Modified worker to accept video input and clean frame count
 
544
  @torch.no_grad()
545
  def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
546
  def encode_prompt(prompt, n_prompt):
 
812
  def get_duration(input_image, image_position, prompt, generation_mode, n_prompt, randomize_seed, seed, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, mp4_crf):
813
  return total_second_length * 60 * (0.9 if use_teacache else 1.5) * (1 + ((steps - 25) / 100))
814
 
815
+ # Remove this decorator if you run on local
816
  @spaces.GPU(duration=get_duration)
817
  def process(input_image,
818
  image_position=0,
 
852
  input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
853
  print("No input image provided. Using a blank white image.")
854
 
855
+ yield gr.update(label="Previewed Frames"), None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
856
 
857
  stream = AsyncStream()
858
 
 
865
 
866
  if flag == 'file':
867
  output_filename = data
868
+ yield gr.update(value=output_filename, label="Previewed Frames"), gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True), gr.update()
869
 
870
  if flag == 'progress':
871
  preview, desc, html = data
872
+ yield gr.update(label="Previewed Frames"), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True), gr.update()
873
 
874
  if flag == 'end':
875
  end = time.time()
 
878
  secondes = secondes - (minutes * 60)
879
  hours = math.floor(minutes / 60)
880
  minutes = minutes - (hours * 60)
881
+ yield gr.update(value=output_filename, label="Finished Frames"), gr.update(visible=False), gr.update(), "The process has lasted " + \
882
  ((str(hours) + " h, ") if hours != 0 else "") + \
883
  ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
884
  str(secondes) + " sec. " + \
 
888
  def get_duration_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
889
  return total_second_length * 60 * (1.5 if use_teacache else 2.5) * (1 + ((steps - 25) / 100))
890
 
891
+ # Remove this decorator if you run on local
892
  @spaces.GPU(duration=get_duration_video)
893
  def process_video(input_video, prompt, n_prompt, randomize_seed, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
894
  start = time.time()
 
907
  # 20250506 pftq: Updated assertion for video input
908
  assert input_video is not None, 'No input video!'
909
 
910
+ yield gr.update(label="Previewed Frames"), None, '', '', gr.update(interactive=False), gr.update(interactive=True), gr.update()
911
 
912
  # 20250507 pftq: Even the H100 needs offloading if the video dimensions are 720p or higher
913
  if high_vram and (no_resize or resolution>640):
 
934
 
935
  if flag == 'file':
936
  output_filename = data
937
+ yield gr.update(value=output_filename, label="Previewed Frames"), gr.update(), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True), gr.update()
938
 
939
  if flag == 'progress':
940
  preview, desc, html = data
941
+ yield gr.update(label="Previewed Frames"), gr.update(visible=True, value=preview), desc, html, gr.update(interactive=False), gr.update(interactive=True), gr.update() # 20250506 pftq: Keep refreshing the video in case it got hidden when the tab was in the background
942
 
943
  if flag == 'end':
944
  end = time.time()
 
947
  secondes = secondes - (minutes * 60)
948
  hours = math.floor(minutes / 60)
949
  minutes = minutes - (hours * 60)
950
+ yield gr.update(value=output_filename, label="Finished Frames"), gr.update(visible=False), desc + \
951
  " The process has lasted " + \
952
  ((str(hours) + " h, ") if hours != 0 else "") + \
953
  ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
 
1054
  n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
1055
 
1056
  latent_window_size = gr.Slider(label="Latent Window Size", minimum=1, maximum=33, value=9, step=1, info='Generate more frames at a time (larger chunks). Less degradation and better blending but higher VRAM cost. Should not change.')
1057
+ steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=30, step=1, info='Increase for more quality, especially if using high non-distilled CFG. If your animation has very few motion, you may have brutal brightness change; this can be fixed increasing the steps.')
1058
 
1059
  with gr.Row():
1060
  no_resize = gr.Checkbox(label='Force Original Video Resolution (no Resizing)', value=False, info='Might run out of VRAM (720p requires > 24GB VRAM).')
 
1096
 
1097
  with gr.Column():
1098
  warning = gr.HTML(value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
1099
+ result_video = gr.Video(label="Generated Frames", autoplay=True, show_share_button=False, height=512, loop=True)
1100
  preview_image = gr.Image(label="Next Latents", height=200, visible=False)
 
1101
  progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
1102
  progress_bar = gr.HTML('', elem_classes='no-generating-animation')
1103
 
 
1119
  672, # resolution
1120
  1, # total_second_length
1121
  9, # latent_window_size
1122
+ 30, # steps
1123
  1.0, # cfg
1124
  10.0, # gs
1125
  0.0, # rs
 
1131
  [
1132
  "./img_examples/Example2.webp", # input_image
1133
  0, # image_position
1134
+ "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks, the man stops talking and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens",
1135
  "image", # generation_mode
1136
  "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1137
  True, # randomize_seed
 
1139
  672, # resolution
1140
  2, # total_second_length
1141
  9, # latent_window_size
1142
+ 30, # steps
1143
  1.0, # cfg
1144
  10.0, # gs
1145
  0.0, # rs
 
1151
  [
1152
  "./img_examples/Example2.webp", # input_image
1153
  0, # image_position
1154
+ "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks, the woman stops talking and the woman listens A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens",
1155
  "image", # generation_mode
1156
  "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1157
  True, # randomize_seed
 
1159
  672, # resolution
1160
  2, # total_second_length
1161
  9, # latent_window_size
1162
+ 30, # steps
1163
  1.0, # cfg
1164
  10.0, # gs
1165
  0.0, # rs
 
1179
  672, # resolution
1180
  1, # total_second_length
1181
  9, # latent_window_size
1182
+ 30, # steps
1183
  1.0, # cfg
1184
  10.0, # gs
1185
  0.0, # rs
 
1199
  672, # resolution
1200
  1, # total_second_length
1201
  9, # latent_window_size
1202
+ 30, # steps
1203
  1.0, # cfg
1204
  10.0, # gs
1205
  0.0, # rs
 
1229
  672, # resolution
1230
  1, # total_second_length
1231
  9, # latent_window_size
1232
+ 30, # steps
1233
  1.0, # cfg
1234
  10.0, # gs
1235
  0.0, # rs
 
1281
  timeless_prompt.change(fn=handle_timeless_prompt_change, inputs=[timeless_prompt], outputs=[final_prompt])
1282
  start_button.click(fn = check_parameters, inputs = [
1283
  generation_mode, input_image, input_video
1284
+ ], outputs = [end_button, warning], queue = False, show_progress = False).success(fn=process, inputs=ips, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, end_button, warning], scroll_to_output = True)
1285
  start_button_video.click(fn = check_parameters, inputs = [
1286
  generation_mode, input_image, input_video
1287
+ ], outputs = [end_button, warning], queue = False, show_progress = False).success(fn=process_video, inputs=ips_video, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button_video, end_button, warning], scroll_to_output = True)
1288
  end_button.click(fn=end_process)
1289
 
1290
  generation_mode.change(fn = save_preferences, inputs = [