TalHach61 commited on
Commit
d5ed8c2
·
verified ·
1 Parent(s): 710e08b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -50
app.py CHANGED
@@ -38,6 +38,27 @@ model_configs = {
38
  'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
39
  }
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  encoder = 'vitl'
42
  model = DepthAnythingV2(**model_configs[encoder])
43
  filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-Large", filename=f"depth_anything_v2_vitl.pth", repo_type="model")
@@ -45,27 +66,59 @@ state_dict = torch.load(filepath, map_location="cpu")
45
  model.load_state_dict(state_dict)
46
  model = model.to(DEVICE).eval()
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  import torch
49
  from diffusers.utils import load_image
50
- from diffusers import FluxControlNetPipeline, FluxControlNetModel
51
- from diffusers.models import FluxMultiControlNetModel
52
-
53
- base_model = 'black-forest-labs/FLUX.1-dev'
54
- controlnet_model = 'Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro'
55
- controlnet = FluxControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16)
56
- controlnet = FluxMultiControlNetModel([controlnet])
57
- pipe = FluxControlNetPipeline.from_pretrained(base_model, controlnet=controlnet, torch_dtype=torch.bfloat16)
 
 
 
58
  pipe.to("cuda")
59
 
60
- mode_mapping = {"canny":0, "tile":1, "depth":2, "blur":3, "openpose":4, "gray":5, "low quality": 6}
61
- strength_mapping = {"canny":0.65, "tile":0.45, "depth":0.55, "blur":0.45, "openpose":0.55, "gray":0.45, "low quality": 0.4}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  canny = CannyDetector()
64
  open_pose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
65
 
66
  torch.backends.cuda.matmul.allow_tf32 = True
67
- pipe.vae.enable_tiling()
68
- pipe.vae.enable_slicing()
69
  pipe.enable_model_cpu_offload() # for saving memory
70
 
71
  def convert_from_image_to_cv2(img: Image) -> np.ndarray:
@@ -100,13 +153,8 @@ def convert_to_grayscale(image):
100
  gray_image = convert_from_cv2_to_image(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
101
  return gray_image
102
 
103
- def add_gaussian_noise(image, mean=0, sigma=10):
104
- image = convert_from_image_to_cv2(image)
105
- noise = np.random.normal(mean, sigma, image.shape)
106
- noisy_image = convert_from_cv2_to_image(np.clip(image.astype(np.float32) + noise, 0, 255).astype(np.uint8))
107
- return noisy_image
108
 
109
- def tile(input_image, resolution=768):
110
  input_image = convert_from_image_to_cv2(input_image)
111
  H, W, C = input_image.shape
112
  H = float(H)
@@ -114,38 +162,32 @@ def tile(input_image, resolution=768):
114
  k = float(resolution) / min(H, W)
115
  H *= k
116
  W *= k
117
- H = int(np.round(H / 64.0)) * 64
118
- W = int(np.round(W / 64.0)) * 64
119
  img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
120
  img = convert_from_cv2_to_image(img)
121
  return img
122
 
123
- def resize_img(input_image, max_side=768, min_side=512, size=None,
124
- pad_to_max_side=False, mode=Image.BILINEAR, base_pixel_number=64):
 
 
 
 
 
 
 
 
 
125
 
126
- w, h = input_image.size
127
- if size is not None:
128
- w_resize_new, h_resize_new = size
129
- else:
130
- ratio = min_side / min(h, w)
131
- w, h = round(ratio*w), round(ratio*h)
132
- ratio = max_side / max(h, w)
133
- input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
134
- w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
135
- h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
136
- input_image = input_image.resize([w_resize_new, h_resize_new], mode)
137
-
138
- if pad_to_max_side:
139
- res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
140
- offset_x = (max_side - w_resize_new) // 2
141
- offset_y = (max_side - h_resize_new) // 2
142
- res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
143
- input_image = Image.fromarray(res)
144
- return input_image
145
 
146
  @spaces.GPU(duration=180)
147
  def infer(cond_in, image_in, prompt, inference_steps, guidance_scale, control_mode, control_strength, seed, progress=gr.Progress(track_tqdm=True)):
148
-
149
  control_mode_num = mode_mapping[control_mode]
150
 
151
  if cond_in is None:
@@ -157,14 +199,12 @@ def infer(cond_in, image_in, prompt, inference_steps, guidance_scale, control_mo
157
  control_image = extract_depth(image_in)
158
  elif control_mode == "openpose":
159
  control_image = extract_openpose(image_in)
160
- elif control_mode == "blur":
161
- control_image = apply_gaussian_blur(image_in)
162
- elif control_mode == "low quality":
163
- control_image = add_gaussian_noise(image_in)
164
- elif control_mode == "gray":
165
  control_image = convert_to_grayscale(image_in)
166
  elif control_mode == "tile":
167
- control_image = tile(image_in)
168
  else:
169
  control_image = resize_img(load_image(cond_in))
170
 
@@ -214,7 +254,7 @@ with gr.Blocks(css=css) as demo:
214
 
215
  with gr.Accordion("Controlnet"):
216
  control_mode = gr.Radio(
217
- ["canny", "depth", "openpose", "gray", "blur", "tile", "low quality"], label="Mode", value="gray",
218
  info="select the control mode, one for all"
219
  )
220
 
@@ -223,7 +263,7 @@ with gr.Blocks(css=css) as demo:
223
  minimum=0,
224
  maximum=1.0,
225
  step=0.05,
226
- value=0.50,
227
  )
228
 
229
  seed = gr.Slider(
 
38
  'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
39
  }
40
 
41
+ ratios_map = {
42
+ 0.5:{"width":704,"height":1408},
43
+ 0.57:{"width":768,"height":1344},
44
+ 0.68:{"width":832,"height":1216},
45
+ 0.72:{"width":832,"height":1152},
46
+ 0.78:{"width":896,"height":1152},
47
+ 0.82:{"width":896,"height":1088},
48
+ 0.88:{"width":960,"height":1088},
49
+ 0.94:{"width":960,"height":1024},
50
+ 1.00:{"width":1024,"height":1024},
51
+ 1.13:{"width":1088,"height":960},
52
+ 1.21:{"width":1088,"height":896},
53
+ 1.29:{"width":1152,"height":896},
54
+ 1.38:{"width":1152,"height":832},
55
+ 1.46:{"width":1216,"height":832},
56
+ 1.67:{"width":1280,"height":768},
57
+ 1.75:{"width":1344,"height":768},
58
+ 2.00:{"width":1408,"height":704}
59
+ }
60
+ ratios = np.array(list(ratios_map.keys()))
61
+
62
  encoder = 'vitl'
63
  model = DepthAnythingV2(**model_configs[encoder])
64
  filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-Large", filename=f"depth_anything_v2_vitl.pth", repo_type="model")
 
66
  model.load_state_dict(state_dict)
67
  model = model.to(DEVICE).eval()
68
 
69
+ from huggingface_hub import hf_hub_download
70
+ import os
71
+
72
+ try:
73
+ local_dir = os.path.dirname(__file__)
74
+ except:
75
+ local_dir = '.'
76
+
77
+ hf_hub_download(repo_id="briaai/BRIA-4B-Adapt", filename='pipeline_bria.py', local_dir=local_dir)
78
+ hf_hub_download(repo_id="briaai/BRIA-4B-Adapt", filename='transformer_bria.py', local_dir=local_dir)
79
+ hf_hub_download(repo_id="briaai/BRIA-4B-Adapt", filename='bria_utils.py', local_dir=local_dir)
80
+ hf_hub_download(repo_id="briaai/BRIA-3.0-ControlNet-Union", filename='pipeline_bria_controlnet.py', local_dir=local_dir)
81
+ hf_hub_download(repo_id="briaai/BRIA-3.0-ControlNet-Union", filename='controlnet_bria.py', local_dir=local_dir)
82
+
83
+
84
  import torch
85
  from diffusers.utils import load_image
86
+ from controlnet_bria import BriaControlNetModel, BriaMultiControlNetModel
87
+ from pipeline_bria_controlnet import BriaControlNetPipeline
88
+ import PIL.Image as Image
89
+
90
+ base_model = 'briaai/BRIA-4B-Adapt'
91
+ controlnet_model = 'briaai/BRIA-3.0-ControlNet-Union'
92
+
93
+ controlnet = BriaControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16)
94
+ controlnet = BriaMultiControlNetModel([controlnet])
95
+
96
+ pipe = BriaControlNetPipeline.from_pretrained(base_model, controlnet=controlnet, torch_dtype=torch.bfloat16, trust_remote_code=True)
97
  pipe.to("cuda")
98
 
99
+ mode_mapping = {
100
+ "depth": 0,
101
+ "canny": 1,
102
+ "colorgrid": 2,
103
+ "recolor": 3,
104
+ "tile": 4,
105
+ "pose": 5,
106
+ }
107
+ strength_mapping = {
108
+ "depth": 1.0,
109
+ "canny": 1.0,
110
+ "colorgrid": 1.0,
111
+ "recolor": 1.0,
112
+ "tile": 1.0,
113
+ "pose": 1.0,
114
+ }
115
 
116
  canny = CannyDetector()
117
  open_pose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
118
 
119
  torch.backends.cuda.matmul.allow_tf32 = True
120
+ # pipe.vae.enable_tiling()
121
+ # pipe.vae.enable_slicing()
122
  pipe.enable_model_cpu_offload() # for saving memory
123
 
124
  def convert_from_image_to_cv2(img: Image) -> np.ndarray:
 
153
  gray_image = convert_from_cv2_to_image(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
154
  return gray_image
155
 
 
 
 
 
 
156
 
157
+ def tile_old(input_image, resolution=768):
158
  input_image = convert_from_image_to_cv2(input_image)
159
  H, W, C = input_image.shape
160
  H = float(H)
 
162
  k = float(resolution) / min(H, W)
163
  H *= k
164
  W *= k
165
+ H = int(np.round(H / 16.0)) * 16
166
+ W = int(np.round(W / 16.0)) * 16
167
  img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
168
  img = convert_from_cv2_to_image(img)
169
  return img
170
 
171
+ def tile(downscale_factor, input_image):
172
+ control_image = input_image.resize((input_image.size[0] // downscale_factor, input_image.size[1] // downscale_factor)).resize(input_image.size, Image.NEAREST)
173
+
174
+ def get_size(init_image):
175
+ w,h=init_image.size
176
+ curr_ratio = w/h
177
+ ind = np.argmin(np.abs(curr_ratio-ratios))
178
+ ratio = ratios[ind]
179
+ chosen_ratio = ratios_map[ratio]
180
+ w,h = chosen_ratio['width'], chosen_ratio['height']
181
+ return w,h
182
 
183
+ def resize_image(image):
184
+ image = image.convert('RGB')
185
+ w,h = get_size(image)
186
+ resized_image = image.resize((w, h))
187
+ return resized_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
  @spaces.GPU(duration=180)
190
  def infer(cond_in, image_in, prompt, inference_steps, guidance_scale, control_mode, control_strength, seed, progress=gr.Progress(track_tqdm=True)):
 
191
  control_mode_num = mode_mapping[control_mode]
192
 
193
  if cond_in is None:
 
199
  control_image = extract_depth(image_in)
200
  elif control_mode == "openpose":
201
  control_image = extract_openpose(image_in)
202
+ elif control_mode == "colorgrid":
203
+ control_image = tile(64, image_in)
204
+ elif control_mode == "recolor":
 
 
205
  control_image = convert_to_grayscale(image_in)
206
  elif control_mode == "tile":
207
+ control_image = tile(16, image_in)
208
  else:
209
  control_image = resize_img(load_image(cond_in))
210
 
 
254
 
255
  with gr.Accordion("Controlnet"):
256
  control_mode = gr.Radio(
257
+ ["depth", "canny", "colorgrid", "recolor", "tile", "pose"], label="Mode", value="gray",
258
  info="select the control mode, one for all"
259
  )
260
 
 
263
  minimum=0,
264
  maximum=1.0,
265
  step=0.05,
266
+ value=0.9,
267
  )
268
 
269
  seed = gr.Slider(