charliebaby2023 commited on
Commit
c8094e3
·
verified ·
1 Parent(s): 4ceba74

Update scripts/main.py

Browse files
Files changed (1) hide show
  1. scripts/main.py +2 -384
scripts/main.py CHANGED
@@ -17,8 +17,7 @@ import gradio as gr
17
  from PIL import Image, PngImagePlugin
18
  import torch
19
 
20
- scheduler = LCMScheduler.from_pretrained(
21
- "charliebaby2023/cybrpny", subfolder="scheduler")
22
 
23
  pipe = LatentConsistencyModelPipeline.from_pretrained(
24
  "charliebaby2023/cybrpny", scheduler = scheduler, safety_checker = None)
@@ -129,223 +128,8 @@ def generate(
129
  return paths, seed
130
 
131
 
132
- def generate_i2i(
133
- prompt: str,
134
- image: PipelineImageInput = None,
135
- strength: float = 0.8,
136
- seed: int = 0,
137
- guidance_scale: float = 8.0,
138
- num_inference_steps: int = 4,
139
- num_images: int = 4,
140
- randomize_seed: bool = False,
141
- use_fp16: bool = True,
142
- use_torch_compile: bool = False,
143
- use_cpu: bool = False,
144
- progress=gr.Progress(track_tqdm=True),
145
- width: Optional[int] = 512,
146
- height: Optional[int] = 512,
147
- ) -> Image.Image:
148
- seed = randomize_seed_fn(seed, randomize_seed)
149
- torch.manual_seed(seed)
150
-
151
- selected_device = 'cuda'
152
- if use_cpu:
153
- selected_device = "cpu"
154
- if use_fp16:
155
- use_fp16 = False
156
- print("LCM warning: running on CPU, overrode FP16 with FP32")
157
- global pipe, scheduler
158
- pipe = LatentConsistencyModelImg2ImgPipeline(
159
- vae= pipe.vae,
160
- text_encoder = pipe.text_encoder,
161
- tokenizer = pipe.tokenizer,
162
- unet = pipe.unet,
163
- scheduler = None, #scheduler,
164
- safety_checker=None, # Disable NSFW filter
165
- feature_extractor = pipe.feature_extractor,
166
- requires_safety_checker = False,
167
- )
168
- # pipe = LatentConsistencyModelImg2ImgPipeline.from_pretrained(
169
- # "SimianLuo/LCM_Dreamshaper_v7", safety_checker = None)
170
-
171
- if use_fp16:
172
- pipe.to(torch_device=selected_device, torch_dtype=torch.float16)
173
- else:
174
- pipe.to(torch_device=selected_device, torch_dtype=torch.float32)
175
-
176
- # Windows does not support torch.compile for now
177
- if os.name != 'nt' and use_torch_compile:
178
- pipe.unet = torch.compile(pipe.unet, mode='max-autotune')
179
-
180
- width, height = image.size
181
-
182
- start_time = time.time()
183
- result = pipe(
184
- prompt=prompt,
185
- image=image,
186
- strength=strength,
187
- width=width,
188
- height=height,
189
- guidance_scale=guidance_scale,
190
- num_inference_steps=num_inference_steps,
191
- num_images_per_prompt=num_images,
192
- original_inference_steps=50,
193
- output_type="pil",
194
- device = selected_device
195
- ).images
196
- paths = save_images(result, metadata={"prompt": prompt, "seed": seed, "width": width,
197
- "height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps})
198
-
199
- elapsed_time = time.time() - start_time
200
- print("LCM inference time: ", elapsed_time, "seconds")
201
- return paths, seed
202
-
203
- import cv2
204
-
205
- def video_to_frames(video_path):
206
- # Open the video file
207
- cap = cv2.VideoCapture(video_path)
208
-
209
- # Check if the video opened successfully
210
- if not cap.isOpened():
211
- print("Error: LCM Could not open video.")
212
- return
213
-
214
- # Read frames from the video
215
- pil_images = []
216
- while True:
217
- ret, frame = cap.read()
218
- if not ret:
219
- break
220
-
221
- # Convert BGR to RGB (OpenCV uses BGR by default)
222
- rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
223
-
224
- # Convert numpy array to PIL Image
225
- pil_image = Image.fromarray(rgb_frame)
226
-
227
- # Append the PIL Image to the list
228
- pil_images.append(pil_image)
229
-
230
- # Release the video capture object
231
- cap.release()
232
-
233
- return pil_images
234
-
235
- def frames_to_video(pil_images, output_path, fps):
236
- if not pil_images:
237
- print("Error: No images to convert.")
238
- return
239
-
240
- img_array = []
241
- for pil_image in pil_images:
242
- img_array.append(np.array(pil_image))
243
-
244
- height, width, layers = img_array[0].shape
245
- size = (width, height)
246
-
247
- out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
248
- for i in range(len(img_array)):
249
- out.write(cv2.cvtColor(img_array[i], cv2.COLOR_RGB2BGR))
250
- out.release()
251
-
252
- def generate_v2v(
253
- prompt: str,
254
- video: any = None,
255
- strength: float = 0.8,
256
- seed: int = 0,
257
- guidance_scale: float = 8.0,
258
- num_inference_steps: int = 4,
259
- randomize_seed: bool = False,
260
- use_fp16: bool = True,
261
- use_torch_compile: bool = False,
262
- use_cpu: bool = False,
263
- fps: int = 10,
264
- save_frames: bool = False,
265
- # progress=gr.Progress(track_tqdm=True),
266
- width: Optional[int] = 512,
267
- height: Optional[int] = 512,
268
- num_images: Optional[int] = 1,
269
- ) -> Image.Image:
270
- seed = randomize_seed_fn(seed, randomize_seed)
271
- torch.manual_seed(seed)
272
-
273
- selected_device = 'cuda'
274
- if use_cpu:
275
- selected_device = "cpu"
276
- if use_fp16:
277
- use_fp16 = False
278
- print("LCM warning: running on CPU, overrode FP16 with FP32")
279
- global pipe, scheduler
280
- pipe = LatentConsistencyModelImg2ImgPipeline(
281
- vae= pipe.vae,
282
- text_encoder = pipe.text_encoder,
283
- tokenizer = pipe.tokenizer,
284
- unet = pipe.unet,
285
- scheduler = None,
286
- safety_checker=None, # Disable NSFW filter
287
- feature_extractor = pipe.feature_extractor,
288
- requires_safety_checker = False,
289
- )
290
- # pipe = LatentConsistencyModelImg2ImgPipeline.from_pretrained(
291
- # "SimianLuo/LCM_Dreamshaper_v7", safety_checker = None)
292
 
293
- if use_fp16:
294
- pipe.to(torch_device=selected_device, torch_dtype=torch.float16)
295
- else:
296
- pipe.to(torch_device=selected_device, torch_dtype=torch.float32)
297
-
298
- # Windows does not support torch.compile for now
299
- if os.name != 'nt' and use_torch_compile:
300
- pipe.unet = torch.compile(pipe.unet, mode='max-autotune')
301
-
302
- frames = video_to_frames(video)
303
- if frames is None:
304
- print("Error: LCM could not convert video.")
305
- return
306
- width, height = frames[0].size
307
-
308
- start_time = time.time()
309
-
310
- results = []
311
- for frame in frames:
312
- result = pipe(
313
- prompt=prompt,
314
- image=frame,
315
- strength=strength,
316
- width=width,
317
- height=height,
318
- guidance_scale=guidance_scale,
319
- num_inference_steps=num_inference_steps,
320
- num_images_per_prompt=1,
321
- original_inference_steps=50,
322
- output_type="pil",
323
- device = selected_device
324
- ).images
325
- if save_frames:
326
- paths = save_images(result, metadata={"prompt": prompt, "seed": seed, "width": width,
327
- "height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps})
328
- results.extend(result)
329
-
330
- elapsed_time = time.time() - start_time
331
- print("LCM vid2vid inference complete! Processing", len(frames), "frames took", elapsed_time, "seconds")
332
-
333
- save_dir = './outputs/LCM-vid2vid/'
334
- Path(save_dir).mkdir(exist_ok=True, parents=True)
335
- unique_id = uuid.uuid4()
336
- _, input_ext = os.path.splitext(video)
337
- output_path = save_dir + f"{unique_id}-{seed}" + f"{input_ext}"
338
- frames_to_video(results, output_path, fps)
339
- return output_path
340
-
341
-
342
-
343
- examples = [
344
- "portrait photo of a girl, photograph, highly detailed face, depth of field, moody light, golden hour, style by Dan Winters, Russell James, Steve McCurry, centered, extremely detailed, Nikon D850, award winning photography",
345
- "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k",
346
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
347
- "A photo of beautiful mountain with realistic sunset and blue lake, highly detailed, masterpiece",
348
- ]
349
 
350
  with gr.Blocks() as lcm:
351
  with gr.Tab("LCM txt2img"):
@@ -443,173 +227,7 @@ with gr.Blocks() as lcm:
443
  outputs=[result, seed],
444
  )
445
 
446
- with gr.Tab("LCM img2img"):
447
- with gr.Row():
448
- prompt = gr.Textbox(label="Prompt",
449
- show_label=False,
450
- lines=3,
451
- placeholder="Prompt",
452
- elem_classes=["prompt"])
453
- run_i2i_button = gr.Button("Run", scale=0)
454
- with gr.Row():
455
- image_input = gr.Image(label="Upload your Image", type="pil")
456
- result = gr.Gallery(
457
- label="Generated images",
458
- show_label=False,
459
- elem_id="gallery",
460
- preview=True
461
- )
462
-
463
- with gr.Accordion("Advanced options", open=False):
464
- seed = gr.Slider(
465
- label="Seed",
466
- minimum=0,
467
- maximum=MAX_SEED,
468
- step=1,
469
- value=0,
470
- randomize=True
471
- )
472
- randomize_seed = gr.Checkbox(
473
- label="Randomize seed across runs", value=True)
474
- use_fp16 = gr.Checkbox(
475
- label="Run LCM in fp16 (for lower VRAM)", value=False)
476
- use_torch_compile = gr.Checkbox(
477
- label="Run LCM with torch.compile (currently not supported on Windows)", value=False)
478
- use_cpu = gr.Checkbox(label="Run LCM on CPU", value=True)
479
- with gr.Row():
480
- guidance_scale = gr.Slider(
481
- label="Guidance scale for base",
482
- minimum=2,
483
- maximum=14,
484
- step=0.1,
485
- value=8.0,
486
- )
487
- num_inference_steps = gr.Slider(
488
- label="Number of inference steps for base",
489
- minimum=1,
490
- maximum=8,
491
- step=1,
492
- value=4,
493
- )
494
- with gr.Row():
495
- num_images = gr.Slider(
496
- label="Number of images (batch count)",
497
- minimum=1,
498
- maximum=int(os.getenv("MAX_NUM_IMAGES")),
499
- step=1,
500
- value=1,
501
- )
502
- strength = gr.Slider(
503
- label="Prompt Strength",
504
- minimum=0.1,
505
- maximum=1.0,
506
- step=0.1,
507
- value=0.5,
508
- )
509
-
510
- run_i2i_button.click(
511
- fn=generate_i2i,
512
- inputs=[
513
- prompt,
514
- image_input,
515
- strength,
516
- seed,
517
- guidance_scale,
518
- num_inference_steps,
519
- num_images,
520
- randomize_seed,
521
- use_fp16,
522
- use_torch_compile,
523
- use_cpu
524
- ],
525
- outputs=[result, seed],
526
- )
527
 
528
-
529
- with gr.Tab("LCM vid2vid"):
530
-
531
- show_v2v = False if os.getenv("SHOW_VID2VID") == "NO" else True
532
- gr.Markdown("Not recommended for use with CPU. Duplicate the space and modify SHOW_VID2VID to enable it. 🚫💻")
533
- with gr.Tabs(visible=show_v2v) as tabs:
534
- #with gr.Tab("", visible=show_v2v):
535
-
536
- with gr.Row():
537
- prompt = gr.Textbox(label="Prompt",
538
- show_label=False,
539
- lines=3,
540
- placeholder="Prompt",
541
- elem_classes=["prompt"])
542
- run_v2v_button = gr.Button("Run", scale=0)
543
- with gr.Row():
544
- video_input = gr.Video(label="Source Video")
545
- video_output = gr.Video(label="Generated Video")
546
-
547
- with gr.Accordion("Advanced options", open=False):
548
- seed = gr.Slider(
549
- label="Seed",
550
- minimum=0,
551
- maximum=MAX_SEED,
552
- step=1,
553
- value=0,
554
- randomize=True
555
- )
556
- randomize_seed = gr.Checkbox(
557
- label="Randomize seed across runs", value=True)
558
- use_fp16 = gr.Checkbox(
559
- label="Run LCM in fp16 (for lower VRAM)", value=False)
560
- use_torch_compile = gr.Checkbox(
561
- label="Run LCM with torch.compile (currently not supported on Windows)", value=False)
562
- use_cpu = gr.Checkbox(label="Run LCM on CPU", value=True)
563
- save_frames = gr.Checkbox(label="Save intermediate frames", value=False)
564
- with gr.Row():
565
- guidance_scale = gr.Slider(
566
- label="Guidance scale for base",
567
- minimum=2,
568
- maximum=14,
569
- step=0.1,
570
- value=8.0,
571
- )
572
- num_inference_steps = gr.Slider(
573
- label="Number of inference steps for base",
574
- minimum=1,
575
- maximum=8,
576
- step=1,
577
- value=4,
578
- )
579
- with gr.Row():
580
- fps = gr.Slider(
581
- label="Output FPS",
582
- minimum=1,
583
- maximum=200,
584
- step=1,
585
- value=10,
586
- )
587
- strength = gr.Slider(
588
- label="Prompt Strength",
589
- minimum=0.1,
590
- maximum=1.0,
591
- step=0.05,
592
- value=0.5,
593
- )
594
-
595
- run_v2v_button.click(
596
- fn=generate_v2v,
597
- inputs=[
598
- prompt,
599
- video_input,
600
- strength,
601
- seed,
602
- guidance_scale,
603
- num_inference_steps,
604
- randomize_seed,
605
- use_fp16,
606
- use_torch_compile,
607
- use_cpu,
608
- fps,
609
- save_frames
610
- ],
611
- outputs=video_output,
612
- )
613
 
614
  if __name__ == "__main__":
615
  lcm.queue().launch()
 
17
  from PIL import Image, PngImagePlugin
18
  import torch
19
 
20
+ scheduler = LCMScheduler.from_pretrained( "charliebaby2023/cybrpny", subfolder="scheduler")
 
21
 
22
  pipe = LatentConsistencyModelPipeline.from_pretrained(
23
  "charliebaby2023/cybrpny", scheduler = scheduler, safety_checker = None)
 
128
  return paths, seed
129
 
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
+ examples = [ "" ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  with gr.Blocks() as lcm:
135
  with gr.Tab("LCM txt2img"):
 
227
  outputs=[result, seed],
228
  )
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
  if __name__ == "__main__":
233
  lcm.queue().launch()