Spaces:
Running
Running
Better allocation estimation
Browse files
app.py
CHANGED
@@ -390,9 +390,13 @@ def worker(input_image, image_position, prompts, n_prompt, seed, resolution, tot
|
|
390 |
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
|
391 |
|
392 |
if not high_vram:
|
|
|
393 |
load_model_as_complete(image_encoder, target_device=gpu)
|
394 |
|
395 |
image_encoder_last_hidden_state = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder).last_hidden_state
|
|
|
|
|
|
|
396 |
|
397 |
return [start_latent, image_encoder_last_hidden_state]
|
398 |
|
@@ -468,7 +472,7 @@ def worker(input_image, image_position, prompts, n_prompt, seed, resolution, tot
|
|
468 |
history_pixels = soft_append_bcthw(vae_decode(real_history_latents, vae).cpu(), history_pixels, overlapped_frames)
|
469 |
|
470 |
if not high_vram:
|
471 |
-
unload_complete_models()
|
472 |
|
473 |
if enable_preview or section_index == (0 if first_section_index == (total_latent_sections - 1) else (total_latent_sections - 1)):
|
474 |
output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
|
@@ -636,6 +640,11 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
|
|
636 |
load_model_as_complete(image_encoder, target_device=gpu)
|
637 |
|
638 |
image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
|
|
|
|
|
|
|
|
|
|
|
639 |
image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
|
640 |
|
641 |
# Dtype
|
@@ -808,7 +817,7 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
|
|
808 |
history_pixels = soft_append_bcthw(history_pixels, vae_decode(real_history_latents, vae).cpu(), overlapped_frames)
|
809 |
|
810 |
if not high_vram:
|
811 |
-
unload_complete_models()
|
812 |
|
813 |
if enable_preview or section_index == total_latent_sections - 1:
|
814 |
output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
|
@@ -909,7 +918,7 @@ def process(input_image,
|
|
909 |
fps_number=30
|
910 |
):
|
911 |
if auto_allocation:
|
912 |
-
allocation_time = min(total_second_length * 60 * (
|
913 |
|
914 |
if torch.cuda.device_count() == 0:
|
915 |
gr.Warning('Set this space to GPU config to make it work.')
|
@@ -994,7 +1003,7 @@ def process_video_on_gpu(input_video, prompts, n_prompt, seed, batch, resolution
|
|
994 |
def process_video(input_video, prompt, n_prompt, randomize_seed, seed, auto_allocation, allocation_time, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
|
995 |
global high_vram
|
996 |
if auto_allocation:
|
997 |
-
allocation_time = min(total_second_length * 60 * (
|
998 |
|
999 |
if torch.cuda.device_count() == 0:
|
1000 |
gr.Warning('Set this space to GPU config to make it work.')
|
@@ -1066,7 +1075,7 @@ title_html = """
|
|
1066 |
|
1067 |
js = """
|
1068 |
function createGradioAnimation() {
|
1069 |
-
window.addEventListener("beforeunload", function
|
1070 |
if (document.getElementById('end-button') && !document.getElementById('end-button').disabled) {
|
1071 |
var confirmationMessage = 'A process is still running. '
|
1072 |
+ 'If you leave before saving, your changes will be lost.';
|
@@ -1095,7 +1104,7 @@ with block:
|
|
1095 |
with gr.Row():
|
1096 |
with gr.Column():
|
1097 |
generation_mode = gr.Radio([["Text-to-Video", "text"], ["Image-to-Video", "image"], ["Video Extension", "video"]], elem_id="generation-mode", label="Generation mode", value = "image")
|
1098 |
-
text_to_video_hint = gr.HTML("Text-to-Video badly works. I discourage to use the Text-to-Video feature. You should rather generate an image with Flux and use Image-to-Video. You will save time.")
|
1099 |
input_image = gr.Image(sources='upload', type="numpy", label="Image", height=320)
|
1100 |
image_position = gr.Slider(label="Image position", minimum=0, maximum=100, value=0, step=1, info='0=Video start; 100=Video end (lower quality)')
|
1101 |
input_video = gr.Video(sources='upload', label="Input Video", height=320)
|
@@ -1122,7 +1131,7 @@ with block:
|
|
1122 |
enable_preview = gr.Checkbox(label='Enable preview', value=True, info='Display a preview around each second generated but it costs 2 sec. for each second generated.')
|
1123 |
use_teacache = gr.Checkbox(label='Use TeaCache', value=False, info='Faster speed and no break in brightness, but often makes hands and fingers slightly worse.')
|
1124 |
|
1125 |
-
n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
|
1126 |
|
1127 |
fps_number = gr.Slider(label="Frame per seconds", info="The model is trained for 30 fps so other fps may generate weird results", minimum=10, maximum=60, value=30, step=1)
|
1128 |
|
@@ -1171,7 +1180,7 @@ with block:
|
|
1171 |
allocation_time = gr.Slider(label="GPU allocation time (in seconds)", info='lower=May abort run, higher=Quota penalty for next runs; only useful for ZeroGPU; for instance set to 88 when you have the message "You have exceeded your GPU quota (180s requested vs. 89s left)."', value=180, minimum=60, maximum=320, step=1)
|
1172 |
|
1173 |
with gr.Column():
|
1174 |
-
warning = gr.HTML(value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
|
1175 |
result_video = gr.Video(label="Generated Frames", autoplay=True, show_share_button=False, height=512, loop=True)
|
1176 |
preview_image = gr.Image(label="Next Latents", height=200, visible=False)
|
1177 |
progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
|
@@ -1189,7 +1198,7 @@ with block:
|
|
1189 |
0, # image_position
|
1190 |
"Overcrowed street in Japan, photorealistic, realistic, intricate details, 8k, insanely detailed",
|
1191 |
"text", # generation_mode
|
1192 |
-
"Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1193 |
True, # randomize_seed
|
1194 |
42, # seed
|
1195 |
True, # auto_allocation
|
@@ -1223,7 +1232,7 @@ with block:
|
|
1223 |
0, # image_position
|
1224 |
"A dolphin emerges from the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
|
1225 |
"image", # generation_mode
|
1226 |
-
"Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1227 |
True, # randomize_seed
|
1228 |
42, # seed
|
1229 |
True, # auto_allocation
|
@@ -1246,7 +1255,7 @@ with block:
|
|
1246 |
0, # image_position
|
1247 |
"A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks, the man stops talking and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens",
|
1248 |
"image", # generation_mode
|
1249 |
-
"Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1250 |
True, # randomize_seed
|
1251 |
42, # seed
|
1252 |
True, # auto_allocation
|
@@ -1269,7 +1278,7 @@ with block:
|
|
1269 |
0, # image_position
|
1270 |
"A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks, the woman stops talking and the woman listens A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens",
|
1271 |
"image", # generation_mode
|
1272 |
-
"Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1273 |
True, # randomize_seed
|
1274 |
42, # seed
|
1275 |
True, # auto_allocation
|
@@ -1292,7 +1301,7 @@ with block:
|
|
1292 |
0, # image_position
|
1293 |
"A boy is walking to the right, full view, full-length view, cartoon",
|
1294 |
"image", # generation_mode
|
1295 |
-
"Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1296 |
True, # randomize_seed
|
1297 |
42, # seed
|
1298 |
True, # auto_allocation
|
@@ -1315,7 +1324,7 @@ with block:
|
|
1315 |
100, # image_position
|
1316 |
"A building starting to explode, photorealistic, realisitc, 8k, insanely detailed",
|
1317 |
"image", # generation_mode
|
1318 |
-
"Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1319 |
True, # randomize_seed
|
1320 |
42, # seed
|
1321 |
True, # auto_allocation
|
@@ -1347,7 +1356,7 @@ with block:
|
|
1347 |
[
|
1348 |
"./img_examples/Example1.mp4", # input_video
|
1349 |
"View of the sea as far as the eye can see, from the seaside, a piece of land is barely visible on the horizon at the middle, the sky is radiant, reflections of the sun in the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
|
1350 |
-
"Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1351 |
True, # randomize_seed
|
1352 |
42, # seed
|
1353 |
True, # auto_allocation
|
|
|
390 |
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
|
391 |
|
392 |
if not high_vram:
|
393 |
+
unload_complete_models(vae)
|
394 |
load_model_as_complete(image_encoder, target_device=gpu)
|
395 |
|
396 |
image_encoder_last_hidden_state = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder).last_hidden_state
|
397 |
+
|
398 |
+
if not high_vram:
|
399 |
+
unload_complete_models(image_encoder)
|
400 |
|
401 |
return [start_latent, image_encoder_last_hidden_state]
|
402 |
|
|
|
472 |
history_pixels = soft_append_bcthw(vae_decode(real_history_latents, vae).cpu(), history_pixels, overlapped_frames)
|
473 |
|
474 |
if not high_vram:
|
475 |
+
unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae, transformer)
|
476 |
|
477 |
if enable_preview or section_index == (0 if first_section_index == (total_latent_sections - 1) else (total_latent_sections - 1)):
|
478 |
output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
|
|
|
640 |
load_model_as_complete(image_encoder, target_device=gpu)
|
641 |
|
642 |
image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
|
643 |
+
|
644 |
+
# Clean GPU
|
645 |
+
if not high_vram:
|
646 |
+
unload_complete_models(image_encoder)
|
647 |
+
|
648 |
image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
|
649 |
|
650 |
# Dtype
|
|
|
817 |
history_pixels = soft_append_bcthw(history_pixels, vae_decode(real_history_latents, vae).cpu(), overlapped_frames)
|
818 |
|
819 |
if not high_vram:
|
820 |
+
unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae, transformer)
|
821 |
|
822 |
if enable_preview or section_index == total_latent_sections - 1:
|
823 |
output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
|
|
|
918 |
fps_number=30
|
919 |
):
|
920 |
if auto_allocation:
|
921 |
+
allocation_time = min(total_second_length * 60 * (1.5 if use_teacache else 3.0) * (1 + ((steps - 25) / 25)), 600)
|
922 |
|
923 |
if torch.cuda.device_count() == 0:
|
924 |
gr.Warning('Set this space to GPU config to make it work.')
|
|
|
1003 |
def process_video(input_video, prompt, n_prompt, randomize_seed, seed, auto_allocation, allocation_time, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
|
1004 |
global high_vram
|
1005 |
if auto_allocation:
|
1006 |
+
allocation_time = min(total_second_length * 60 * (2.5 if use_teacache else 3.5) * (1 + ((steps - 25) / 25)), 600)
|
1007 |
|
1008 |
if torch.cuda.device_count() == 0:
|
1009 |
gr.Warning('Set this space to GPU config to make it work.')
|
|
|
1075 |
|
1076 |
js = """
|
1077 |
function createGradioAnimation() {
|
1078 |
+
window.addEventListener("beforeunload", function(e) {
|
1079 |
if (document.getElementById('end-button') && !document.getElementById('end-button').disabled) {
|
1080 |
var confirmationMessage = 'A process is still running. '
|
1081 |
+ 'If you leave before saving, your changes will be lost.';
|
|
|
1104 |
with gr.Row():
|
1105 |
with gr.Column():
|
1106 |
generation_mode = gr.Radio([["Text-to-Video", "text"], ["Image-to-Video", "image"], ["Video Extension", "video"]], elem_id="generation-mode", label="Generation mode", value = "image")
|
1107 |
+
text_to_video_hint = gr.HTML("Text-to-Video badly works with a flash effect at the start. I discourage to use the Text-to-Video feature. You should rather generate an image with Flux and use Image-to-Video. You will save time.")
|
1108 |
input_image = gr.Image(sources='upload', type="numpy", label="Image", height=320)
|
1109 |
image_position = gr.Slider(label="Image position", minimum=0, maximum=100, value=0, step=1, info='0=Video start; 100=Video end (lower quality)')
|
1110 |
input_video = gr.Video(sources='upload', label="Input Video", height=320)
|
|
|
1131 |
enable_preview = gr.Checkbox(label='Enable preview', value=True, info='Display a preview around each second generated but it costs 2 sec. for each second generated.')
|
1132 |
use_teacache = gr.Checkbox(label='Use TeaCache', value=False, info='Faster speed and no break in brightness, but often makes hands and fingers slightly worse.')
|
1133 |
|
1134 |
+
n_prompt = gr.Textbox(label="Negative Prompt", value="Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", info='Requires using normal CFG (undistilled) instead of Distilled (set Distilled=1 and CFG > 1).')
|
1135 |
|
1136 |
fps_number = gr.Slider(label="Frame per seconds", info="The model is trained for 30 fps so other fps may generate weird results", minimum=10, maximum=60, value=30, step=1)
|
1137 |
|
|
|
1180 |
allocation_time = gr.Slider(label="GPU allocation time (in seconds)", info='lower=May abort run, higher=Quota penalty for next runs; only useful for ZeroGPU; for instance set to 88 when you have the message "You have exceeded your GPU quota (180s requested vs. 89s left)."', value=180, minimum=60, maximum=320, step=1)
|
1181 |
|
1182 |
with gr.Column():
|
1183 |
+
warning = gr.HTML(elem_id="warning", value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
|
1184 |
result_video = gr.Video(label="Generated Frames", autoplay=True, show_share_button=False, height=512, loop=True)
|
1185 |
preview_image = gr.Image(label="Next Latents", height=200, visible=False)
|
1186 |
progress_desc = gr.Markdown('', elem_classes='no-generating-animation')
|
|
|
1198 |
0, # image_position
|
1199 |
"Overcrowed street in Japan, photorealistic, realistic, intricate details, 8k, insanely detailed",
|
1200 |
"text", # generation_mode
|
1201 |
+
"Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1202 |
True, # randomize_seed
|
1203 |
42, # seed
|
1204 |
True, # auto_allocation
|
|
|
1232 |
0, # image_position
|
1233 |
"A dolphin emerges from the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
|
1234 |
"image", # generation_mode
|
1235 |
+
"Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1236 |
True, # randomize_seed
|
1237 |
42, # seed
|
1238 |
True, # auto_allocation
|
|
|
1255 |
0, # image_position
|
1256 |
"A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks, the man stops talking and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens",
|
1257 |
"image", # generation_mode
|
1258 |
+
"Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1259 |
True, # randomize_seed
|
1260 |
42, # seed
|
1261 |
True, # auto_allocation
|
|
|
1278 |
0, # image_position
|
1279 |
"A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks, the woman stops talking and the woman listens A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens",
|
1280 |
"image", # generation_mode
|
1281 |
+
"Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1282 |
True, # randomize_seed
|
1283 |
42, # seed
|
1284 |
True, # auto_allocation
|
|
|
1301 |
0, # image_position
|
1302 |
"A boy is walking to the right, full view, full-length view, cartoon",
|
1303 |
"image", # generation_mode
|
1304 |
+
"Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1305 |
True, # randomize_seed
|
1306 |
42, # seed
|
1307 |
True, # auto_allocation
|
|
|
1324 |
100, # image_position
|
1325 |
"A building starting to explode, photorealistic, realisitc, 8k, insanely detailed",
|
1326 |
"image", # generation_mode
|
1327 |
+
"Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1328 |
True, # randomize_seed
|
1329 |
42, # seed
|
1330 |
True, # auto_allocation
|
|
|
1356 |
[
|
1357 |
"./img_examples/Example1.mp4", # input_video
|
1358 |
"View of the sea as far as the eye can see, from the seaside, a piece of land is barely visible on the horizon at the middle, the sky is radiant, reflections of the sun in the water, photorealistic, realistic, intricate details, 8k, insanely detailed",
|
1359 |
+
"Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
1360 |
True, # randomize_seed
|
1361 |
42, # seed
|
1362 |
True, # auto_allocation
|