Himanshu-AT commited on
Commit
8b821ae
·
1 Parent(s): 6f47450

change to vae

Browse files
Files changed (2) hide show
  1. app.py +26 -9
  2. requirements.txt +2 -0
app.py CHANGED
@@ -2,19 +2,15 @@ import gradio as gr
2
  import numpy as np
3
 
4
  import spaces
5
- import torch
6
  import random
7
  from image_gen_aux import DepthPreprocessor
 
 
 
8
 
9
- from diffusers import FluxFillPipeline
10
  from PIL import Image
11
 
12
- def remove_background(image):
13
- # Placeholder function for background removal
14
- # Use a library or model like Inspyrenet for actual implementation
15
- mask = generate_mask(image)
16
- subject = apply_mask(image, mask)
17
- return subject, mask
18
 
19
  MAX_SEED = np.iinfo(np.int32).max
20
  MAX_IMAGE_SIZE = 2048
@@ -23,8 +19,29 @@ pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", tor
23
  pipe.load_lora_weights("alvdansen/flux-koda")
24
  pipe.enable_lora()
25
 
 
26
  processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def calculate_optimal_dimensions(image: Image.Image):
29
  # Extract the original dimensions
30
  original_width, original_height = image.size
@@ -74,7 +91,7 @@ def infer(edit_images, prompt, seed=42, randomize_seed=False, width=1024, height
74
 
75
  controlImage = processor(image)[0].convert("RGB")
76
  image = pipe(
77
- control_image=controlImage,
78
  prompt=prompt,
79
  image=image,
80
  mask_image=mask,
 
2
  import numpy as np
3
 
4
  import spaces
 
5
  import random
6
  from image_gen_aux import DepthPreprocessor
7
+ from PIL import Image
8
+ import torch
9
+ from torchvision import transforms
10
 
11
+ from diffusers import FluxFillPipeline, AutoencoderKL
12
  from PIL import Image
13
 
 
 
 
 
 
 
14
 
15
  MAX_SEED = np.iinfo(np.int32).max
16
  MAX_IMAGE_SIZE = 2048
 
19
  pipe.load_lora_weights("alvdansen/flux-koda")
20
  pipe.enable_lora()
21
 
22
+ vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae")
23
  processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
24
 
25
+ preprocess = transforms.Compose(
26
+ [
27
+ transforms.Resize(
28
+ (vae.config.sample_size, vae.config.sample_size),
29
+ interpolation=transforms.InterpolationMode.BILINEAR,
30
+ ),
31
+ transforms.ToTensor(),
32
+ transforms.Normalize([0.5], [0.5]),
33
+ ]
34
+ )
35
+ #
36
+ # image_np = image[0].cpu().numpy() # Move to CPU and convert to NumPy
37
+
38
+ # if image_np.shape[0] == 3: # Check if channels are first
39
+ # image_np = image_np.transpose(1, 2, 0)
40
+
41
+ # image_np = (image_np * 255).astype(np.uint8)
42
+
43
+ image = Image.fromarray(image_np)
44
+
45
  def calculate_optimal_dimensions(image: Image.Image):
46
  # Extract the original dimensions
47
  original_width, original_height = image.size
 
91
 
92
  controlImage = processor(image)[0].convert("RGB")
93
  image = pipe(
94
+ mask_image_latent=vae.encode(controlImage),
95
  prompt=prompt,
96
  image=image,
97
  mask_image=mask,
requirements.txt CHANGED
@@ -6,3 +6,5 @@ safetensors
6
  sentencepiece
7
  peft
8
  xformers
 
 
 
6
  sentencepiece
7
  peft
8
  xformers
9
+ torchvision
10
+ torch