Fabrice-TIERCELIN commited on
Commit
c994303
·
verified ·
1 Parent(s): e6fa5f4

Better allocation estimation

Browse files
Files changed (1) hide show
  1. app.py +24 -15
app.py CHANGED
@@ -390,9 +390,13 @@ def worker(input_image, image_position, prompts, n_prompt, seed, resolution, tot
390
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
391
 
392
  if not high_vram:
 
393
  load_model_as_complete(image_encoder, target_device=gpu)
394
 
395
  image_encoder_last_hidden_state = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder).last_hidden_state
 
 
 
396
 
397
  return [start_latent, image_encoder_last_hidden_state]
398
 
@@ -468,7 +472,7 @@ def worker(input_image, image_position, prompts, n_prompt, seed, resolution, tot
468
  history_pixels = soft_append_bcthw(vae_decode(real_history_latents, vae).cpu(), history_pixels, overlapped_frames)
469
 
470
  if not high_vram:
471
- unload_complete_models()
472
 
473
  if enable_preview or section_index == (0 if first_section_index == (total_latent_sections - 1) else (total_latent_sections - 1)):
474
  output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
@@ -636,6 +640,11 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
636
  load_model_as_complete(image_encoder, target_device=gpu)
637
 
638
  image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
 
 
 
 
 
639
  image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
640
 
641
  # Dtype
@@ -808,7 +817,7 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
808
  history_pixels = soft_append_bcthw(history_pixels, vae_decode(real_history_latents, vae).cpu(), overlapped_frames)
809
 
810
  if not high_vram:
811
- unload_complete_models()
812
 
813
  if enable_preview or section_index == total_latent_sections - 1:
814
  output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
@@ -909,7 +918,7 @@ def process(input_image,
909
  fps_number=30
910
  ):
911
  if auto_allocation:
912
- allocation_time = min(total_second_length * 60 * (0.9 if use_teacache else 3.0) * (1 + ((steps - 25) / 25)), 600)
913
 
914
  if torch.cuda.device_count() == 0:
915
  gr.Warning('Set this space to GPU config to make it work.')
@@ -994,7 +1003,7 @@ def process_video_on_gpu(input_video, prompts, n_prompt, seed, batch, resolution
994
  def process_video(input_video, prompt, n_prompt, randomize_seed, seed, auto_allocation, allocation_time, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
995
  global high_vram
996
  if auto_allocation:
997
- allocation_time = min(total_second_length * 60 * (1.5 if use_teacache else 3.0) * (1 + ((steps - 25) / 25)), 600)
998
 
999
  if torch.cuda.device_count() == 0:
1000
  gr.Warning('Set this space to GPU config to make it work.')
@@ -1066,7 +1075,7 @@ title_html = """
1066
 
1067
  js = """
1068
  function createGradioAnimation() {
1069
- window.addEventListener("beforeunload", function (e) {
1070
  if (document.getElementById('end-button') && !document.getElementById('end-button').disabled) {
1071
  var confirmationMessage = 'A process is still running. '
1072
  + 'If you leave before saving, your changes will be lost.';
@@ -1095,7 +1104,7 @@ with block:
1095
  with gr.Row():
1096
  with gr.Column():
1097
  generation_mode = gr.Radio([["Text-to-Video", "text"], ["Image-to-Video", "image"], ["Video Extension", "video"]], elem_id="generation-mode", label="Generation mode", value = "image")
1098
- text_to_video_hint = gr.HTML("Text-to-Video badly works. I discourage to use the Text-to-Video feature. You should rather generate an image with Flux and use Image-to-Video. You will save time.")
1099
  input_image = gr.Image(sources='upload', type="numpy", label="Image", height=320)
1100
  image_position = gr.Slider(label="Image position", minimum=0, maximum=100, value=0, step=1, info='0=Video start; 100=Video end (lower quality)')
1101
  input_video = gr.Video(sources='upload', label="Input Video", height=320)
@@ -1122,7 +1131,7 @@ with block:
1122
  enable_preview = gr.Checkbox(label='Enable preview', value=True, info='Display a preview around each second generated but it costs 2 sec. for each second generated.')
1123
  use_teacache = gr.Checkbox(label='Use TeaCache', value=False, info='Faster speed and no break in brightness, but often makes hands and fingers slightly worse.')
1124
 
1125
- n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
1126
 
1127
  fps_number = gr.Slider(label="Frame per seconds", info="The model is trained for 30 fps so other fps may generate weird results", minimum=10, maximum=60, value=30, step=1)
1128
 
@@ -1171,7 +1180,7 @@ with block:
1171
  allocation_time = gr.Slider(label="GPU allocation time (in seconds)", info='lower=May abort run, higher=Quota penalty for next runs; only useful for ZeroGPU; for instance set to 88 when you have the message "You have exceeded your GPU quota (180s requested vs. 89s left)."', value=180, minimum=60, maximum=320, step=1)
1172
 
1173
  with gr.Column():
1174
- warning = gr.HTML(value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
1175
  result_video = gr.Video(label="Generated Frames", autoplay=True, show_share_button=False, height=512, loop=True)
1176
  preview_image = gr.Image(label="Next Latents", height=200, visible=False)
1177
  progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
@@ -1189,7 +1198,7 @@ with block:
1189
  0, # image_position
1190
  "Overcrowed street in Japan, photorealistic, realistic, intricate details, 8k, insanely detailed",
1191
  "text", # generation_mode
1192
- "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1193
  True, # randomize_seed
1194
  42, # seed
1195
  True, # auto_allocation
@@ -1223,7 +1232,7 @@ with block:
1223
  0, # image_position
1224
  "A dolphin emerges from the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
1225
  "image", # generation_mode
1226
- "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1227
  True, # randomize_seed
1228
  42, # seed
1229
  True, # auto_allocation
@@ -1246,7 +1255,7 @@ with block:
1246
  0, # image_position
1247
  "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks, the man stops talking and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens",
1248
  "image", # generation_mode
1249
- "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1250
  True, # randomize_seed
1251
  42, # seed
1252
  True, # auto_allocation
@@ -1269,7 +1278,7 @@ with block:
1269
  0, # image_position
1270
  "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks, the woman stops talking and the woman listens A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens",
1271
  "image", # generation_mode
1272
- "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1273
  True, # randomize_seed
1274
  42, # seed
1275
  True, # auto_allocation
@@ -1292,7 +1301,7 @@ with block:
1292
  0, # image_position
1293
  "A boy is walking to the right, full view, full-length view, cartoon",
1294
  "image", # generation_mode
1295
- "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1296
  True, # randomize_seed
1297
  42, # seed
1298
  True, # auto_allocation
@@ -1315,7 +1324,7 @@ with block:
1315
  100, # image_position
1316
  "A building starting to explode, photorealistic, realisitc, 8k, insanely detailed",
1317
  "image", # generation_mode
1318
- "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1319
  True, # randomize_seed
1320
  42, # seed
1321
  True, # auto_allocation
@@ -1347,7 +1356,7 @@ with block:
1347
  [
1348
  "./img_examples/Example1.mp4", # input_video
1349
  "View of the sea as far as the eye can see, from the seaside, a piece of land is barely visible on the horizon at the middle, the sky is radiant, reflections of the sun in the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
1350
- "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1351
  True, # randomize_seed
1352
  42, # seed
1353
  True, # auto_allocation
 
390
  stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
391
 
392
  if not high_vram:
393
+ unload_complete_models(vae)
394
  load_model_as_complete(image_encoder, target_device=gpu)
395
 
396
  image_encoder_last_hidden_state = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder).last_hidden_state
397
+
398
+ if not high_vram:
399
+ unload_complete_models(image_encoder)
400
 
401
  return [start_latent, image_encoder_last_hidden_state]
402
 
 
472
  history_pixels = soft_append_bcthw(vae_decode(real_history_latents, vae).cpu(), history_pixels, overlapped_frames)
473
 
474
  if not high_vram:
475
+ unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae, transformer)
476
 
477
  if enable_preview or section_index == (0 if first_section_index == (total_latent_sections - 1) else (total_latent_sections - 1)):
478
  output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
 
640
  load_model_as_complete(image_encoder, target_device=gpu)
641
 
642
  image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
643
+
644
+ # Clean GPU
645
+ if not high_vram:
646
+ unload_complete_models(image_encoder)
647
+
648
  image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
649
 
650
  # Dtype
 
817
  history_pixels = soft_append_bcthw(history_pixels, vae_decode(real_history_latents, vae).cpu(), overlapped_frames)
818
 
819
  if not high_vram:
820
+ unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae, transformer)
821
 
822
  if enable_preview or section_index == total_latent_sections - 1:
823
  output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
 
918
  fps_number=30
919
  ):
920
  if auto_allocation:
921
+ allocation_time = min(total_second_length * 60 * (1.5 if use_teacache else 3.0) * (1 + ((steps - 25) / 25)), 600)
922
 
923
  if torch.cuda.device_count() == 0:
924
  gr.Warning('Set this space to GPU config to make it work.')
 
1003
  def process_video(input_video, prompt, n_prompt, randomize_seed, seed, auto_allocation, allocation_time, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
1004
  global high_vram
1005
  if auto_allocation:
1006
+ allocation_time = min(total_second_length * 60 * (2.5 if use_teacache else 3.5) * (1 + ((steps - 25) / 25)), 600)
1007
 
1008
  if torch.cuda.device_count() == 0:
1009
  gr.Warning('Set this space to GPU config to make it work.')
 
1075
 
1076
  js = """
1077
  function createGradioAnimation() {
1078
+ window.addEventListener("beforeunload", function(e) {
1079
  if (document.getElementById('end-button') && !document.getElementById('end-button').disabled) {
1080
  var confirmationMessage = 'A process is still running. '
1081
  + 'If you leave before saving, your changes will be lost.';
 
1104
  with gr.Row():
1105
  with gr.Column():
1106
  generation_mode = gr.Radio([["Text-to-Video", "text"], ["Image-to-Video", "image"], ["Video Extension", "video"]], elem_id="generation-mode", label="Generation mode", value = "image")
1107
+ text_to_video_hint = gr.HTML("Text-to-Video badly works with a flash effect at the start. I discourage to use the Text-to-Video feature. You should rather generate an image with Flux and use Image-to-Video. You will save time.")
1108
  input_image = gr.Image(sources='upload', type="numpy", label="Image", height=320)
1109
  image_position = gr.Slider(label="Image position", minimum=0, maximum=100, value=0, step=1, info='0=Video start; 100=Video end (lower quality)')
1110
  input_video = gr.Video(sources='upload', label="Input Video", height=320)
 
1131
  enable_preview = gr.Checkbox(label='Enable preview', value=True, info='Display a preview around each second generated but it costs 2 sec. for each second generated.')
1132
  use_teacache = gr.Checkbox(label='Use TeaCache', value=False, info='Faster speed and no break in brightness, but often makes hands and fingers slightly worse.')
1133
 
1134
+ n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
1135
 
1136
  fps_number = gr.Slider(label="Frame per seconds", info="The model is trained for 30 fps so other fps may generate weird results", minimum=10, maximum=60, value=30, step=1)
1137
 
 
1180
  allocation_time = gr.Slider(label="GPU allocation time (in seconds)", info='lower=May abort run, higher=Quota penalty for next runs; only useful for ZeroGPU; for instance set to 88 when you have the message "You have exceeded your GPU quota (180s requested vs. 89s left)."', value=180, minimum=60, maximum=320, step=1)
1181
 
1182
  with gr.Column():
1183
+ warning = gr.HTML(elem_id="warning", value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
1184
  result_video = gr.Video(label="Generated Frames", autoplay=True, show_share_button=False, height=512, loop=True)
1185
  preview_image = gr.Image(label="Next Latents", height=200, visible=False)
1186
  progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
 
1198
  0, # image_position
1199
  "Overcrowed street in Japan, photorealistic, realistic, intricate details, 8k, insanely detailed",
1200
  "text", # generation_mode
1201
+ "Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1202
  True, # randomize_seed
1203
  42, # seed
1204
  True, # auto_allocation
 
1232
  0, # image_position
1233
  "A dolphin emerges from the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
1234
  "image", # generation_mode
1235
+ "Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1236
  True, # randomize_seed
1237
  42, # seed
1238
  True, # auto_allocation
 
1255
  0, # image_position
1256
  "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks, the man stops talking and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens",
1257
  "image", # generation_mode
1258
+ "Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1259
  True, # randomize_seed
1260
  42, # seed
1261
  True, # auto_allocation
 
1278
  0, # image_position
1279
  "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks, the woman stops talking and the woman listens A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens",
1280
  "image", # generation_mode
1281
+ "Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1282
  True, # randomize_seed
1283
  42, # seed
1284
  True, # auto_allocation
 
1301
  0, # image_position
1302
  "A boy is walking to the right, full view, full-length view, cartoon",
1303
  "image", # generation_mode
1304
+ "Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1305
  True, # randomize_seed
1306
  42, # seed
1307
  True, # auto_allocation
 
1324
  100, # image_position
1325
  "A building starting to explode, photorealistic, realisitc, 8k, insanely detailed",
1326
  "image", # generation_mode
1327
+ "Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1328
  True, # randomize_seed
1329
  42, # seed
1330
  True, # auto_allocation
 
1356
  [
1357
  "./img_examples/Example1.mp4", # input_video
1358
  "View of the sea as far as the eye can see, from the seaside, a piece of land is barely visible on the horizon at the middle, the sky is radiant, reflections of the sun in the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
1359
+ "Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
1360
  True, # randomize_seed
1361
  42, # seed
1362
  True, # auto_allocation