yhzhai commited on
Commit
18eb2fe
·
1 Parent(s): 08f4103

add resolution

Browse files
Files changed (1) hide show
  1. app.py +96 -25
app.py CHANGED
@@ -35,9 +35,7 @@ def get_modelscope_pipeline(
35
  # model_id, torch_dtype=torch.float16, variant="fp16"
36
  # )
37
  # else:
38
- pipe = DiffusionPipeline.from_pretrained(
39
- model_id
40
- )
41
  scheduler = LCMScheduler.from_pretrained(
42
  model_id,
43
  subfolder="scheduler",
@@ -98,12 +96,10 @@ def get_animatediff_pipeline(
98
  # torch_dtype=torch.float16,
99
  # )
100
  # else:
101
- adapter = MotionAdapter.from_pretrained(
102
- motion_module_path
103
- )
104
  pipe = AnimateDiffPipeline.from_pretrained(
105
  model_id,
106
- motion_adapter=adapter,
107
  )
108
  scheduler = LCMScheduler.from_pretrained(
109
  model_id,
@@ -141,7 +137,13 @@ def get_animatediff_pipeline(
141
 
142
 
143
  pipe_dict = {
144
- "ModelScope T2V": {"WebVid": None, "LAION-aes": None, "Anime": None, "Realistic": None, "3D Cartoon": None},
 
 
 
 
 
 
145
  "AnimateDiff (SD1.5)": {"WebVid": None, "LAION-aes": None},
146
  "AnimateDiff (RealisticVision)": {"WebVid": None, "LAION-aes": None},
147
  "AnimateDiff (epiCRealism)": {"WebVid": None, "LAION-aes": None},
@@ -179,9 +181,17 @@ cache_pipeline = {
179
  # else:
180
  # raise ValueError(f"Unknown base_model {base_model}")
181
 
182
- @spaces.GPU(duration=120)
 
183
  def infer(
184
- base_model, variant, prompt, num_inference_steps=4, seed=0, randomize_seed=True,
 
 
 
 
 
 
 
185
  ):
186
  # if pipe_dict[base_model][variant] is None:
187
  # if base_model == "ModelScope T2V":
@@ -245,12 +255,14 @@ def infer(
245
 
246
  generator = torch.Generator("cpu").manual_seed(seed)
247
 
248
- # progress=gr.Progress(track_tqdm=True)
249
  output = cache_pipeline["pipeline"](
250
  prompt=prompt,
251
  num_frames=16,
252
  guidance_scale=1.0,
253
  num_inference_steps=num_inference_steps,
 
 
254
  generator=generator,
255
  ).frames
256
  if not isinstance(output, list):
@@ -275,50 +287,69 @@ examples = [
275
  "ModelScope T2V",
276
  "LAION-aes",
277
  "Aerial uhd 4k view. mid-air flight over fresh and clean mountain river at sunny summer morning. Green trees and sun rays on horizon. Direct on sun.",
278
- 4
 
 
279
  ],
280
- ["ModelScope T2V", "Anime", "Timelapse misty mountain landscape", 4],
 
 
 
281
  [
282
  "ModelScope T2V",
283
  "WebVid",
284
  "Back of woman in shorts going near pure creek in beautiful mountains.",
285
- 4
 
 
286
  ],
287
  [
288
  "ModelScope T2V",
289
  "3D Cartoon",
290
  "A rotating pandoro (a traditional italian sweet yeast bread, most popular around christmas and new year) being eaten in time-lapse.",
291
- 4
 
 
292
  ],
293
  [
294
  "ModelScope T2V",
295
  "Realistic",
296
  "Slow motion avocado with a stone falls and breaks into 2 parts with splashes",
297
- 4
 
 
298
  ],
299
  [
300
  "AnimateDiff (RealisticVision)",
301
  "LAION-aes",
302
  "Slow motion of delicious salmon sachimi set with green vegetables leaves served on wood plate. make homemade japanese food at home.-dan",
303
- 8
 
 
304
  ],
305
  [
306
  "AnimateDiff (RealisticVision)",
307
  "WebVid",
308
  "Blooming meadow panorama zoom-out shot heavenly clouds and upcoming thunderstorm in mountain range harz, germany.",
309
- 8
 
 
310
  ],
311
  [
312
  "AnimateDiff (RealisticVision)",
313
  "LAION-aes",
314
  "A young woman in a yellow sweater uses vr glasses, sitting on the shore of a pond on a background of dark waves. a strong wind develops her hair, the sun's rays are reflected from the water.",
315
- 8
 
 
316
  ],
317
  [
318
  "AnimateDiff (RealisticVision)",
319
  "LAION-aes",
320
  "Female running at sunset. healthy fitness concept",
321
- 8
 
 
322
  ],
323
  ]
324
 
@@ -339,6 +370,7 @@ variants = {
339
  def update_variant(rs):
340
  return gr.update(choices=variants[rs], value=None)
341
 
 
342
  # init_pipelines()
343
 
344
  with gr.Blocks(css=css) as demo:
@@ -362,9 +394,12 @@ with gr.Blocks(css=css) as demo:
362
 
363
  gr.Markdown(
364
  f"""
365
- <p align="center"> Currently running on {device}.</p>
 
366
  """
367
  )
 
 
368
  with gr.Row():
369
  base_model = gr.Dropdown(
370
  label="Base model",
@@ -420,16 +455,50 @@ with gr.Blocks(css=css) as demo:
420
  step=1,
421
  value=4,
422
  )
423
-
424
- with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  # result = gr.Video(label="Result", show_label=False, interactive=False, height=512, width=512, autoplay=True)
426
  result = gr.Video(
427
- label="Result", show_label=False, interactive=False, autoplay=True, height=512, width=512,
 
 
 
 
 
428
  )
429
 
430
  gr.Examples(
431
  examples=examples,
432
- inputs=[base_model, variant_dropdown, prompt, num_inference_steps],
433
  cache_examples=True,
434
  fn=infer,
435
  outputs=[result, seed],
@@ -442,6 +511,8 @@ with gr.Blocks(css=css) as demo:
442
  variant_dropdown,
443
  prompt,
444
  num_inference_steps,
 
 
445
  seed,
446
  randomize_seed,
447
  ],
 
35
  # model_id, torch_dtype=torch.float16, variant="fp16"
36
  # )
37
  # else:
38
+ pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16")
 
 
39
  scheduler = LCMScheduler.from_pretrained(
40
  model_id,
41
  subfolder="scheduler",
 
96
  # torch_dtype=torch.float16,
97
  # )
98
  # else:
99
+ adapter = MotionAdapter.from_pretrained(motion_module_path)
 
 
100
  pipe = AnimateDiffPipeline.from_pretrained(
101
  model_id,
102
+ motion_adapter=adapter, torch_dtype=torch.float16
103
  )
104
  scheduler = LCMScheduler.from_pretrained(
105
  model_id,
 
137
 
138
 
139
  pipe_dict = {
140
+ "ModelScope T2V": {
141
+ "WebVid": None,
142
+ "LAION-aes": None,
143
+ "Anime": None,
144
+ "Realistic": None,
145
+ "3D Cartoon": None,
146
+ },
147
  "AnimateDiff (SD1.5)": {"WebVid": None, "LAION-aes": None},
148
  "AnimateDiff (RealisticVision)": {"WebVid": None, "LAION-aes": None},
149
  "AnimateDiff (epiCRealism)": {"WebVid": None, "LAION-aes": None},
 
181
  # else:
182
  # raise ValueError(f"Unknown base_model {base_model}")
183
 
184
+
185
+ @spaces.GPU(duration=90)
186
  def infer(
187
+ base_model,
188
+ variant,
189
+ prompt,
190
+ num_inference_steps=4,
191
+ height=256,
192
+ width=256,
193
+ seed=0,
194
+ randomize_seed=True,
195
  ):
196
  # if pipe_dict[base_model][variant] is None:
197
  # if base_model == "ModelScope T2V":
 
255
 
256
  generator = torch.Generator("cpu").manual_seed(seed)
257
 
258
+ progress = gr.Progress(track_tqdm=True)
259
  output = cache_pipeline["pipeline"](
260
  prompt=prompt,
261
  num_frames=16,
262
  guidance_scale=1.0,
263
  num_inference_steps=num_inference_steps,
264
+ height=height,
265
+ width=width,
266
  generator=generator,
267
  ).frames
268
  if not isinstance(output, list):
 
287
  "ModelScope T2V",
288
  "LAION-aes",
289
  "Aerial uhd 4k view. mid-air flight over fresh and clean mountain river at sunny summer morning. Green trees and sun rays on horizon. Direct on sun.",
290
+ 4,
291
+ 256,
292
+ 256,
293
  ],
294
+ ["ModelScope T2V", "Anime", "Timelapse misty mountain landscape", 4,
295
+ 256,
296
+ 256,
297
+ ],
298
  [
299
  "ModelScope T2V",
300
  "WebVid",
301
  "Back of woman in shorts going near pure creek in beautiful mountains.",
302
+ 4,
303
+ 256,
304
+ 256,
305
  ],
306
  [
307
  "ModelScope T2V",
308
  "3D Cartoon",
309
  "A rotating pandoro (a traditional italian sweet yeast bread, most popular around christmas and new year) being eaten in time-lapse.",
310
+ 4,
311
+ 256,
312
+ 256,
313
  ],
314
  [
315
  "ModelScope T2V",
316
  "Realistic",
317
  "Slow motion avocado with a stone falls and breaks into 2 parts with splashes",
318
+ 4,
319
+ 256,
320
+ 256,
321
  ],
322
  [
323
  "AnimateDiff (RealisticVision)",
324
  "LAION-aes",
325
  "Slow motion of delicious salmon sachimi set with green vegetables leaves served on wood plate. make homemade japanese food at home.-dan",
326
+ 8,
327
+ 512,
328
+ 512,
329
  ],
330
  [
331
  "AnimateDiff (RealisticVision)",
332
  "WebVid",
333
  "Blooming meadow panorama zoom-out shot heavenly clouds and upcoming thunderstorm in mountain range harz, germany.",
334
+ 8,
335
+ 512,
336
+ 512,
337
  ],
338
  [
339
  "AnimateDiff (RealisticVision)",
340
  "LAION-aes",
341
  "A young woman in a yellow sweater uses vr glasses, sitting on the shore of a pond on a background of dark waves. a strong wind develops her hair, the sun's rays are reflected from the water.",
342
+ 8,
343
+ 512,
344
+ 512,
345
  ],
346
  [
347
  "AnimateDiff (RealisticVision)",
348
  "LAION-aes",
349
  "Female running at sunset. healthy fitness concept",
350
+ 8,
351
+ 512,
352
+ 512,
353
  ],
354
  ]
355
 
 
370
  def update_variant(rs):
371
  return gr.update(choices=variants[rs], value=None)
372
 
373
+
374
  # init_pipelines()
375
 
376
  with gr.Blocks(css=css) as demo:
 
394
 
395
  gr.Markdown(
396
  f"""
397
+ <p align="center">Currently running on {device}.</p>
398
+ <p align="center">Model loading takes extra time.</p>
399
  """
400
  )
401
+
402
+ # <p align="center">ModelScope T2V works the best for resolution 256x256, and AnimateDiff works the best for 512x512.</p>
403
  with gr.Row():
404
  base_model = gr.Dropdown(
405
  label="Base model",
 
455
  step=1,
456
  value=4,
457
  )
458
+
459
+ with gr.Group():
460
+ with gr.Row():
461
+ text_hint = gr.Textbox(
462
+ "Hint: ModelScope T2V works the best for resolution 256x256, and AnimateDiff works the best for resolution 512x512.",
463
+ interactive=False,
464
+ label="Hint",
465
+ container=False,
466
+
467
+ )
468
+ with gr.Row():
469
+ height = gr.Slider(
470
+ label="Height",
471
+ minimum=256,
472
+ maximum=1024,
473
+ step=64,
474
+ value=512,
475
+ interactive=True,
476
+ )
477
+ width = gr.Slider(
478
+ label="Width",
479
+ minimum=256,
480
+ maximum=1024,
481
+ step=64,
482
+ value=512,
483
+ interactive=True,
484
+ )
485
+
486
+
487
+
488
+ with gr.Column(show_progress=True):
489
  # result = gr.Video(label="Result", show_label=False, interactive=False, height=512, width=512, autoplay=True)
490
  result = gr.Video(
491
+ label="Result",
492
+ show_label=False,
493
+ interactive=False,
494
+ autoplay=True,
495
+ # height=512,
496
+ # width=512,
497
  )
498
 
499
  gr.Examples(
500
  examples=examples,
501
+ inputs=[base_model, variant_dropdown, prompt, num_inference_steps, height, width],
502
  cache_examples=True,
503
  fn=infer,
504
  outputs=[result, seed],
 
511
  variant_dropdown,
512
  prompt,
513
  num_inference_steps,
514
+ height,
515
+ width,
516
  seed,
517
  randomize_seed,
518
  ],