Spaces:
Runtime error
Runtime error
Commit
·
65acb9b
1
Parent(s):
fd28d2d
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,7 +14,7 @@ import tempfile
|
|
| 14 |
from mesh import get_mesh
|
| 15 |
|
| 16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 17 |
-
|
| 18 |
# Inpainting pipeline
|
| 19 |
|
| 20 |
|
|
@@ -26,39 +26,57 @@ model_type = "DPT_Large" # MiDaS v3 - Large (highest accuracy, slowest i
|
|
| 26 |
#model_type = "DPT_Hybrid" # MiDaS v3 - Hybrid (medium accuracy, medium inference speed)
|
| 27 |
#model_type = "MiDaS_small" # MiDaS v2.1 - Small (lowest accuracy, highest inference speed)
|
| 28 |
|
| 29 |
-
|
|
|
|
| 30 |
|
| 31 |
-
midas.to(device)
|
| 32 |
-
midas.eval()
|
| 33 |
|
| 34 |
-
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
|
| 35 |
|
| 36 |
-
if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
|
| 37 |
-
|
| 38 |
-
else:
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
| 40 |
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
|
|
|
| 48 |
|
| 49 |
-
prediction = torch.nn.functional.interpolate(
|
| 50 |
-
prediction.unsqueeze(1),
|
| 51 |
-
size=image.shape[:2],
|
| 52 |
-
mode="bicubic",
|
| 53 |
-
align_corners=False,
|
| 54 |
-
).squeeze()
|
| 55 |
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
return Image.fromarray(output.astype("int32"))
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
def read_content(file_path: str) -> str:
|
| 64 |
"""read the content of target file
|
|
@@ -69,17 +87,16 @@ def read_content(file_path: str) -> str:
|
|
| 69 |
return content
|
| 70 |
|
| 71 |
def predict_images(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler"):
|
|
|
|
| 72 |
if negative_prompt == "":
|
| 73 |
negative_prompt = None
|
| 74 |
-
scheduler_class_name = scheduler.split("-")[0]
|
| 75 |
|
| 76 |
init_image = cv2.resize(dict["image"], (512, 512))
|
| 77 |
|
| 78 |
mask = Image.fromarray(cv2.resize(dict["mask"], (512, 512))[:,:,0])
|
| 79 |
-
mask.save("temp_mask.jpg")
|
| 80 |
|
| 81 |
if (depth is None):
|
| 82 |
-
depth_image = estimate_depth(init_image)
|
| 83 |
|
| 84 |
else:
|
| 85 |
d_i = depth[:,:,0]
|
|
@@ -177,6 +194,7 @@ def create_vis_demo():
|
|
| 177 |
|
| 178 |
|
| 179 |
|
|
|
|
| 180 |
def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler", keep_edges=False):
|
| 181 |
if negative_prompt == "":
|
| 182 |
negative_prompt = None
|
|
@@ -187,7 +205,7 @@ def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale
|
|
| 187 |
mask.save("temp_mask.jpg")
|
| 188 |
|
| 189 |
if (depth is None):
|
| 190 |
-
depth_image = estimate_depth(init_image)
|
| 191 |
|
| 192 |
else:
|
| 193 |
d_i = depth[:,:,0]
|
|
@@ -201,24 +219,13 @@ def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale
|
|
| 201 |
|
| 202 |
output = pipe(prompt = prompt, negative_prompt=negative_prompt, image=init_image, mask_image=mask, depth_image=depth_image, guidance_scale=guidance_scale, num_inference_steps=int(steps), strength=strength)
|
| 203 |
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
output_depth_vis = output_depth_vis.astype("uint8")
|
| 208 |
-
|
| 209 |
-
input_depth = np.array(depth_image)
|
| 210 |
-
input_depth_vis = (input_depth - np.min(input_depth)) / (np.max(input_depth) - np.min(input_depth)) * 255
|
| 211 |
-
input_depth_vis = input_depth_vis.astype("uint8")
|
| 212 |
-
|
| 213 |
-
#init_image
|
| 214 |
-
#depth_image
|
| 215 |
output_image = output.rgb[0]
|
| 216 |
|
| 217 |
-
|
| 218 |
-
output_mesh = get_mesh(
|
| 219 |
-
|
| 220 |
-
depth_image_mesh = input_depth_vis.max() - input_depth_vis
|
| 221 |
-
input_mesh = get_mesh(depth_image_mesh,init_image, keep_edges=keep_edges, skew=1)
|
| 222 |
|
| 223 |
return input_mesh, output_mesh, gr.update(visible=True)
|
| 224 |
|
|
|
|
| 14 |
from mesh import get_mesh
|
| 15 |
|
| 16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 17 |
+
model_arch = "zoe"
|
| 18 |
# Inpainting pipeline
|
| 19 |
|
| 20 |
|
|
|
|
| 26 |
#model_type = "DPT_Hybrid" # MiDaS v3 - Hybrid (medium accuracy, medium inference speed)
|
| 27 |
#model_type = "MiDaS_small" # MiDaS v2.1 - Small (lowest accuracy, highest inference speed)
|
| 28 |
|
| 29 |
+
if model_arch == "midas":
|
| 30 |
+
midas = torch.hub.load("intel-isl/MiDaS", model_type)
|
| 31 |
|
| 32 |
+
midas.to(device)
|
| 33 |
+
midas.eval()
|
| 34 |
|
| 35 |
+
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
|
| 36 |
|
| 37 |
+
if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
|
| 38 |
+
transform = midas_transforms.dpt_transform
|
| 39 |
+
else:
|
| 40 |
+
transform = midas_transforms.small_transform
|
| 41 |
+
|
| 42 |
+
def estimate_depth(image):
|
| 43 |
+
input_batch = transform(image).to(device)
|
| 44 |
|
| 45 |
+
with torch.no_grad():
|
| 46 |
+
prediction = midas(input_batch)
|
| 47 |
|
| 48 |
+
prediction = torch.nn.functional.interpolate(
|
| 49 |
+
prediction.unsqueeze(1),
|
| 50 |
+
size=image.shape[:2],
|
| 51 |
+
mode="bicubic",
|
| 52 |
+
align_corners=False,
|
| 53 |
+
).squeeze()
|
| 54 |
|
| 55 |
+
output = prediction.cpu().numpy()
|
| 56 |
|
| 57 |
+
output= 65535 * (output - np.min(output))/(np.max(output) - np.min(output))
|
| 58 |
+
|
| 59 |
+
return Image.fromarray(output.astype("int32")), output.min(), output.max()
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
+
elif model_arch == "zoe":
|
| 63 |
+
# Zoe_N
|
| 64 |
+
repo = "isl-org/ZoeDepth"
|
| 65 |
+
model_zoe_n = torch.hub.load(repo, "ZoeD_N", pretrained=True)
|
| 66 |
+
zoe = model_zoe_n.to(device)
|
| 67 |
|
| 68 |
+
def estimate_depth(image):
|
|
|
|
|
|
|
| 69 |
|
| 70 |
+
depth_tensor = zoe.infer_pil(image, output_type="tensor")
|
| 71 |
+
output = depth_tensor.cpu().numpy()
|
| 72 |
+
|
| 73 |
+
output_ = 65535 * (1 - (output - np.min(output))/(np.max(output) - np.min(output)))
|
| 74 |
+
|
| 75 |
+
return Image.fromarray(output_.astype("int32")), output.min(), output.max()
|
| 76 |
+
|
| 77 |
+
def denormalize(image, max, min):
|
| 78 |
+
image = (image / 65535 - 1 ) * (min - max) + min
|
| 79 |
+
return image
|
| 80 |
|
| 81 |
def read_content(file_path: str) -> str:
|
| 82 |
"""read the content of target file
|
|
|
|
| 87 |
return content
|
| 88 |
|
| 89 |
def predict_images(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler"):
|
| 90 |
+
|
| 91 |
if negative_prompt == "":
|
| 92 |
negative_prompt = None
|
|
|
|
| 93 |
|
| 94 |
init_image = cv2.resize(dict["image"], (512, 512))
|
| 95 |
|
| 96 |
mask = Image.fromarray(cv2.resize(dict["mask"], (512, 512))[:,:,0])
|
|
|
|
| 97 |
|
| 98 |
if (depth is None):
|
| 99 |
+
depth_image, _, _ = estimate_depth(init_image)
|
| 100 |
|
| 101 |
else:
|
| 102 |
d_i = depth[:,:,0]
|
|
|
|
| 194 |
|
| 195 |
|
| 196 |
|
| 197 |
+
|
| 198 |
def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler", keep_edges=False):
|
| 199 |
if negative_prompt == "":
|
| 200 |
negative_prompt = None
|
|
|
|
| 205 |
mask.save("temp_mask.jpg")
|
| 206 |
|
| 207 |
if (depth is None):
|
| 208 |
+
depth_image, min, max = estimate_depth(init_image)
|
| 209 |
|
| 210 |
else:
|
| 211 |
d_i = depth[:,:,0]
|
|
|
|
| 219 |
|
| 220 |
output = pipe(prompt = prompt, negative_prompt=negative_prompt, image=init_image, mask_image=mask, depth_image=depth_image, guidance_scale=guidance_scale, num_inference_steps=int(steps), strength=strength)
|
| 221 |
|
| 222 |
+
depth_in = denormalize(np.array(depth_image), min, max)
|
| 223 |
+
depth_out = denormalize(np.array(output.depth[0]), min, max)
|
| 224 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
output_image = output.rgb[0]
|
| 226 |
|
| 227 |
+
input_mesh = get_mesh(depth_in,init_image, keep_edges=keep_edges)
|
| 228 |
+
output_mesh = get_mesh(depth_out, output_image, keep_edges=keep_edges)
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
return input_mesh, output_mesh, gr.update(visible=True)
|
| 231 |
|