bartduis commited on
Commit
3fc8eb5
·
verified ·
1 Parent(s): 5887b54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +180 -180
app.py CHANGED
@@ -14,205 +14,205 @@ import torch
14
  # from eval_wrapper.eval import EvalWrapper, eval_scene
15
 
16
 
17
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
18
-
19
- @GPU(duration = 180)
20
- def dummy_warmup():
21
- import torch
22
- if torch.cuda.is_available():
23
- print("Warmup: GPU is available!")
24
- _ = torch.tensor([0.0]).to(device)
25
- dummy_warmup()
26
-
27
-
28
- outdir = "/tmp/rayst3r"
29
-
30
- # loading all necessary models
31
-
32
- print("Loading MoGe model")
33
- # Load the model from huggingface hub (or load from local).
34
-
35
-
36
- def depth2uint16(depth):
37
- return depth * torch.iinfo(torch.uint16).max / 10.0 # threshold is in m, convert to uint16 value
38
-
39
- def save_tensor_as_png(tensor: torch.Tensor, path: str, dtype: torch.dtype | None = None):
40
- if dtype is None:
41
- dtype = tensor.dtype
42
- Image.fromarray(tensor.to(dtype).cpu().numpy()).save(path)
43
-
44
- def colorize_points_with_turbo_all_dims(points, method='norm',cmap='turbo'):
45
- """
46
- Assigns colors to 3D points using the 'turbo' colormap based on a scalar computed from all 3 dimensions.
47
-
48
- Args:
49
- points (np.ndarray): (N, 3) array of 3D points.
50
- method (str): Method for reducing 3D point to scalar. Options: 'norm', 'pca'.
51
-
52
- Returns:
53
- np.ndarray: (N, 3) RGB colors in [0, 1].
54
- """
55
- assert points.shape[1] == 3, "Input must be of shape (N, 3)"
56
-
57
- if method == 'norm':
58
- scalar = np.linalg.norm(points, axis=1)
59
- elif method == 'pca':
60
- # Project onto first principal component
61
- mean = points.mean(axis=0)
62
- centered = points - mean
63
- u, s, vh = np.linalg.svd(centered, full_matrices=False)
64
- scalar = centered @ vh[0] # Project onto first principal axis
65
- else:
66
- raise ValueError(f"Unknown method '{method}'")
67
-
68
- # Normalize scalar to [0, 1]
69
- scalar_min, scalar_max = scalar.min(), scalar.max()
70
- normalized = (scalar - scalar_min) / (scalar_max - scalar_min + 1e-8)
71
-
72
- # Apply turbo colormap
73
- cmap = plt.colormaps.get_cmap(cmap)
74
- colors = cmap(normalized)[:, :3] # Drop alpha
75
-
76
- return colors
77
-
78
- def prep_for_rayst3r(img,depth_dict,mask):
79
- H, W = img.shape[:2]
80
- intrinsics = depth_dict["intrinsics"].detach().cpu()
81
- intrinsics[0] *= W
82
- intrinsics[1] *= H
83
-
84
- input_dir = os.path.join(outdir, "input")
85
- if os.path.exists(input_dir):
86
- shutil.rmtree(input_dir)
87
- os.makedirs(input_dir, exist_ok=True)
88
- # save intrinsics
89
- torch.save(intrinsics, os.path.join(input_dir, "intrinsics.pt"))
90
-
91
- # save depth
92
- depth = depth_dict["depth"].cpu()
93
- depth = depth2uint16(depth)
94
- save_tensor_as_png(depth, os.path.join(input_dir, "depth.png"),dtype=torch.uint16)
95
-
96
- # save mask as bool
97
- save_tensor_as_png(torch.from_numpy(mask).bool(), os.path.join(input_dir, "mask.png"),dtype=torch.bool)
98
- # save image
99
- save_tensor_as_png(torch.from_numpy(img), os.path.join(input_dir, "rgb.png"))
100
-
101
- @GPU(duration = 180)
102
- def rayst3r_to_glb(img,depth_dict,mask,max_total_points=10e6,rotated=False):
103
- prep_for_rayst3r(img,depth_dict,mask)
104
 
105
- dino_model = torch.hub.load('facebookresearch/dinov2', "dinov2_vitl14_reg")
106
- dino_model.eval()
107
- dino_model.to(device)
108
 
109
- print("Loading RaySt3R model")
110
- rayst3r_checkpoint = hf_hub_download("bartduis/rayst3r", "rayst3r.pth")
111
- rayst3r_model = EvalWrapper(rayst3r_checkpoint,device='cpu')
112
- rayst3r_model = rayst3r_model.to(device)
113
 
114
- rayst3r_points = eval_scene(rayst3r_model,os.path.join(outdir, "input"),do_filter_all_masks=True,dino_model=dino_model, device = device).cpu()
115
 
116
- # subsample points
117
- n_points = min(max_total_points,rayst3r_points.shape[0])
118
- rayst3r_points = rayst3r_points[torch.randperm(rayst3r_points.shape[0])[:n_points]].numpy()
119
 
120
- rayst3r_points[:,1] = -rayst3r_points[:,1]
121
- rayst3r_points[:,2] = -rayst3r_points[:,2]
122
 
123
- # make all points red
124
- colors = colorize_points_with_turbo_all_dims(rayst3r_points)
125
 
126
- # load the input glb
127
- scene = trimesh.Scene()
128
- pct = trimesh.PointCloud(rayst3r_points, colors=colors, radius=0.01)
129
- scene.add_geometry(pct)
130
 
131
- outfile = os.path.join(outdir, "rayst3r.glb")
132
- scene.export(outfile)
133
- return outfile
134
 
135
 
136
- def input_to_glb(outdir,img,depth_dict,mask,rotated=False):
137
- H, W = img.shape[:2]
138
- intrinsics = depth_dict["intrinsics"].cpu().numpy()
139
- intrinsics[0] *= W
140
- intrinsics[1] *= H
141
 
142
- depth = depth_dict["depth"].cpu().numpy()
143
- cam2world = np.eye(4)
144
- points_world = compute_pointmap(depth, cam2world, intrinsics)
145
 
146
- scene = trimesh.Scene()
147
- pts = np.concatenate([p[m] for p,m in zip(points_world,mask)])
148
- col = np.concatenate([c[m] for c,m in zip(img,mask)])
149
 
150
- pts = pts.reshape(-1,3)
151
- pts[:,1] = -pts[:,1]
152
- pts[:,2] = -pts[:,2]
153
 
154
 
155
- pct = trimesh.PointCloud(pts, colors=col.reshape(-1,3))
156
- scene.add_geometry(pct)
157
 
158
- outfile = os.path.join(outdir, "input.glb")
159
- scene.export(outfile)
160
- return outfile
161
-
162
- @GPU(duration = 180)
163
- def depth_moge(input_img):
164
- moge_model = MoGeModel.from_pretrained("Ruicheng/moge-vitl")
165
- moge_model.to(device)
166
- input_img_torch = torch.tensor(input_img / 255, dtype=torch.float32, device=device).permute(2, 0, 1)
167
- output = moge_model.infer(input_img_torch).cpu()
168
- return output
169
-
170
- @GPU(duration = 180)
171
- def mask_rembg(input_img):
172
- #masked_img = rembg.remove(input_img,)
173
- output_img = rembg.remove(input_img, alpha_matting=False, post_process_mask=True)
174
-
175
- # Convert to NumPy array
176
- output_np = np.array(output_img)
177
- alpha = output_np[..., 3]
178
-
179
- # Step 2: Erode the alpha mask to shrink object slightly
180
- kernel = np.ones((3, 3), np.uint8) # Adjust size for aggressiveness
181
- eroded_alpha = cv2.erode(alpha, kernel, iterations=1)
182
- # Step 3: Replace alpha channel
183
- output_np[..., 3] = eroded_alpha
184
 
185
- mask = output_np[:,:,-1] >= 128
186
- rgb = output_np[:,:,:3]
187
- return mask, rgb
188
-
189
- @GPU(duration = 180)
190
- def process_image(input_img):
191
- # resize the input image
192
- rotated = False
193
- #if input_img.shape[0] > input_img.shape[1]:
194
- #input_img = cv2.rotate(input_img, cv2.ROTATE_90_COUNTERCLOCKWISE)
195
- #rotated = True
196
- input_img = cv2.resize(input_img, (640, 480))
197
- # mask, rgb = mask_rembg(input_img)
198
- # depth_dict = depth_moge(input_img)
199
-
200
- # if os.path.exists(outdir):
201
- # shutil.rmtree(outdir)
202
- # os.makedirs(outdir)
203
 
204
- # input_glb = input_to_glb(outdir,input_img,depth_dict,mask,rotated=rotated)
205
 
206
- # # visualize the input points in 3D in gradio
207
- # inference_glb = rayst3r_to_glb(input_img,depth_dict,mask,rotated=rotated)
208
 
209
- return input_img, input_img
210
 
211
- demo = gr.Interface(
212
- process_image,
213
- gr.Image(),
214
- [gr.Model3D(label="Input"), gr.Model3D(label="RaySt3R",)]
215
- )
216
 
217
- if __name__ == "__main__":
218
- demo.launch()
 
14
  # from eval_wrapper.eval import EvalWrapper, eval_scene
15
 
16
 
17
+ # device = 'cuda' if torch.cuda.is_available() else 'cpu'
18
+
19
+ # @GPU(duration = 180)
20
+ # def dummy_warmup():
21
+ # import torch
22
+ # if torch.cuda.is_available():
23
+ # print("Warmup: GPU is available!")
24
+ # _ = torch.tensor([0.0]).to(device)
25
+ # dummy_warmup()
26
+
27
+
28
+ # outdir = "/tmp/rayst3r"
29
+
30
+ # # loading all necessary models
31
+
32
+ # print("Loading MoGe model")
33
+ # # Load the model from huggingface hub (or load from local).
34
+
35
+
36
+ # def depth2uint16(depth):
37
+ # return depth * torch.iinfo(torch.uint16).max / 10.0 # threshold is in m, convert to uint16 value
38
+
39
+ # def save_tensor_as_png(tensor: torch.Tensor, path: str, dtype: torch.dtype | None = None):
40
+ # if dtype is None:
41
+ # dtype = tensor.dtype
42
+ # Image.fromarray(tensor.to(dtype).cpu().numpy()).save(path)
43
+
44
+ # def colorize_points_with_turbo_all_dims(points, method='norm',cmap='turbo'):
45
+ # """
46
+ # Assigns colors to 3D points using the 'turbo' colormap based on a scalar computed from all 3 dimensions.
47
+
48
+ # Args:
49
+ # points (np.ndarray): (N, 3) array of 3D points.
50
+ # method (str): Method for reducing 3D point to scalar. Options: 'norm', 'pca'.
51
+
52
+ # Returns:
53
+ # np.ndarray: (N, 3) RGB colors in [0, 1].
54
+ # """
55
+ # assert points.shape[1] == 3, "Input must be of shape (N, 3)"
56
+
57
+ # if method == 'norm':
58
+ # scalar = np.linalg.norm(points, axis=1)
59
+ # elif method == 'pca':
60
+ # # Project onto first principal component
61
+ # mean = points.mean(axis=0)
62
+ # centered = points - mean
63
+ # u, s, vh = np.linalg.svd(centered, full_matrices=False)
64
+ # scalar = centered @ vh[0] # Project onto first principal axis
65
+ # else:
66
+ # raise ValueError(f"Unknown method '{method}'")
67
+
68
+ # # Normalize scalar to [0, 1]
69
+ # scalar_min, scalar_max = scalar.min(), scalar.max()
70
+ # normalized = (scalar - scalar_min) / (scalar_max - scalar_min + 1e-8)
71
+
72
+ # # Apply turbo colormap
73
+ # cmap = plt.colormaps.get_cmap(cmap)
74
+ # colors = cmap(normalized)[:, :3] # Drop alpha
75
+
76
+ # return colors
77
+
78
+ # def prep_for_rayst3r(img,depth_dict,mask):
79
+ # H, W = img.shape[:2]
80
+ # intrinsics = depth_dict["intrinsics"].detach().cpu()
81
+ # intrinsics[0] *= W
82
+ # intrinsics[1] *= H
83
+
84
+ # input_dir = os.path.join(outdir, "input")
85
+ # if os.path.exists(input_dir):
86
+ # shutil.rmtree(input_dir)
87
+ # os.makedirs(input_dir, exist_ok=True)
88
+ # # save intrinsics
89
+ # torch.save(intrinsics, os.path.join(input_dir, "intrinsics.pt"))
90
+
91
+ # # save depth
92
+ # depth = depth_dict["depth"].cpu()
93
+ # depth = depth2uint16(depth)
94
+ # save_tensor_as_png(depth, os.path.join(input_dir, "depth.png"),dtype=torch.uint16)
95
+
96
+ # # save mask as bool
97
+ # save_tensor_as_png(torch.from_numpy(mask).bool(), os.path.join(input_dir, "mask.png"),dtype=torch.bool)
98
+ # # save image
99
+ # save_tensor_as_png(torch.from_numpy(img), os.path.join(input_dir, "rgb.png"))
100
+
101
+ # @GPU(duration = 180)
102
+ # def rayst3r_to_glb(img,depth_dict,mask,max_total_points=10e6,rotated=False):
103
+ # prep_for_rayst3r(img,depth_dict,mask)
104
 
105
+ # dino_model = torch.hub.load('facebookresearch/dinov2', "dinov2_vitl14_reg")
106
+ # dino_model.eval()
107
+ # dino_model.to(device)
108
 
109
+ # print("Loading RaySt3R model")
110
+ # rayst3r_checkpoint = hf_hub_download("bartduis/rayst3r", "rayst3r.pth")
111
+ # rayst3r_model = EvalWrapper(rayst3r_checkpoint,device='cpu')
112
+ # rayst3r_model = rayst3r_model.to(device)
113
 
114
+ # rayst3r_points = eval_scene(rayst3r_model,os.path.join(outdir, "input"),do_filter_all_masks=True,dino_model=dino_model, device = device).cpu()
115
 
116
+ # # subsample points
117
+ # n_points = min(max_total_points,rayst3r_points.shape[0])
118
+ # rayst3r_points = rayst3r_points[torch.randperm(rayst3r_points.shape[0])[:n_points]].numpy()
119
 
120
+ # rayst3r_points[:,1] = -rayst3r_points[:,1]
121
+ # rayst3r_points[:,2] = -rayst3r_points[:,2]
122
 
123
+ # # make all points red
124
+ # colors = colorize_points_with_turbo_all_dims(rayst3r_points)
125
 
126
+ # # load the input glb
127
+ # scene = trimesh.Scene()
128
+ # pct = trimesh.PointCloud(rayst3r_points, colors=colors, radius=0.01)
129
+ # scene.add_geometry(pct)
130
 
131
+ # outfile = os.path.join(outdir, "rayst3r.glb")
132
+ # scene.export(outfile)
133
+ # return outfile
134
 
135
 
136
+ # def input_to_glb(outdir,img,depth_dict,mask,rotated=False):
137
+ # H, W = img.shape[:2]
138
+ # intrinsics = depth_dict["intrinsics"].cpu().numpy()
139
+ # intrinsics[0] *= W
140
+ # intrinsics[1] *= H
141
 
142
+ # depth = depth_dict["depth"].cpu().numpy()
143
+ # cam2world = np.eye(4)
144
+ # points_world = compute_pointmap(depth, cam2world, intrinsics)
145
 
146
+ # scene = trimesh.Scene()
147
+ # pts = np.concatenate([p[m] for p,m in zip(points_world,mask)])
148
+ # col = np.concatenate([c[m] for c,m in zip(img,mask)])
149
 
150
+ # pts = pts.reshape(-1,3)
151
+ # pts[:,1] = -pts[:,1]
152
+ # pts[:,2] = -pts[:,2]
153
 
154
 
155
+ # pct = trimesh.PointCloud(pts, colors=col.reshape(-1,3))
156
+ # scene.add_geometry(pct)
157
 
158
+ # outfile = os.path.join(outdir, "input.glb")
159
+ # scene.export(outfile)
160
+ # return outfile
161
+
162
+ # @GPU(duration = 180)
163
+ # def depth_moge(input_img):
164
+ # moge_model = MoGeModel.from_pretrained("Ruicheng/moge-vitl")
165
+ # moge_model.to(device)
166
+ # input_img_torch = torch.tensor(input_img / 255, dtype=torch.float32, device=device).permute(2, 0, 1)
167
+ # output = moge_model.infer(input_img_torch).cpu()
168
+ # return output
169
+
170
+ # @GPU(duration = 180)
171
+ # def mask_rembg(input_img):
172
+ # #masked_img = rembg.remove(input_img,)
173
+ # output_img = rembg.remove(input_img, alpha_matting=False, post_process_mask=True)
174
+
175
+ # # Convert to NumPy array
176
+ # output_np = np.array(output_img)
177
+ # alpha = output_np[..., 3]
178
+
179
+ # # Step 2: Erode the alpha mask to shrink object slightly
180
+ # kernel = np.ones((3, 3), np.uint8) # Adjust size for aggressiveness
181
+ # eroded_alpha = cv2.erode(alpha, kernel, iterations=1)
182
+ # # Step 3: Replace alpha channel
183
+ # output_np[..., 3] = eroded_alpha
184
 
185
+ # mask = output_np[:,:,-1] >= 128
186
+ # rgb = output_np[:,:,:3]
187
+ # return mask, rgb
188
+
189
+ # @GPU(duration = 180)
190
+ # def process_image(input_img):
191
+ # # resize the input image
192
+ # rotated = False
193
+ # #if input_img.shape[0] > input_img.shape[1]:
194
+ # #input_img = cv2.rotate(input_img, cv2.ROTATE_90_COUNTERCLOCKWISE)
195
+ # #rotated = True
196
+ # input_img = cv2.resize(input_img, (640, 480))
197
+ # # mask, rgb = mask_rembg(input_img)
198
+ # # depth_dict = depth_moge(input_img)
199
+
200
+ # # if os.path.exists(outdir):
201
+ # # shutil.rmtree(outdir)
202
+ # # os.makedirs(outdir)
203
 
204
+ # # input_glb = input_to_glb(outdir,input_img,depth_dict,mask,rotated=rotated)
205
 
206
+ # # # visualize the input points in 3D in gradio
207
+ # # inference_glb = rayst3r_to_glb(input_img,depth_dict,mask,rotated=rotated)
208
 
209
+ # return input_img, input_img
210
 
211
+ # demo = gr.Interface(
212
+ # process_image,
213
+ # gr.Image(),
214
+ # [gr.Model3D(label="Input"), gr.Model3D(label="RaySt3R",)]
215
+ # )
216
 
217
+ # if __name__ == "__main__":
218
+ # demo.launch()