Text-to-3D
image-to-3d
Chao Xu commited on
Commit
216282e
1 Parent(s): 3680bde

code pruning

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. SparseNeuS_demo_v1/confs/blender_general_lod1_val_new.conf +0 -137
  2. SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf +0 -2
  3. SparseNeuS_demo_v1/data/__init__.py +0 -0
  4. SparseNeuS_demo_v1/data/blender.py +0 -340
  5. SparseNeuS_demo_v1/data/blender_general.py +0 -432
  6. SparseNeuS_demo_v1/data/blender_general_12_narrow.py +0 -427
  7. SparseNeuS_demo_v1/data/blender_general_12_narrow_8.py +0 -427
  8. SparseNeuS_demo_v1/data/blender_general_360.py +0 -412
  9. SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_3.py +0 -406
  10. SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_4.py +0 -411
  11. SparseNeuS_demo_v1/data/blender_general_4_narrow_and_4_2_stage_mix.py +0 -480
  12. SparseNeuS_demo_v1/data/blender_general_4_narrow_and_6_2_stage_mix.py +0 -476
  13. SparseNeuS_demo_v1/data/blender_general_6_narrow_and_6_2_stage_blend_mix.py +0 -449
  14. SparseNeuS_demo_v1/data/blender_general_8_2_stage.py +0 -396
  15. SparseNeuS_demo_v1/data/blender_general_8_4_gt.py +0 -396
  16. SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_3_views.py +0 -446
  17. SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_mix.py +0 -439
  18. SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_mix.py +0 -470
  19. SparseNeuS_demo_v1/data/blender_general_8_wide_from_2_stage.py +0 -395
  20. SparseNeuS_demo_v1/data/blender_general_narrow_4_1_eval_new_data.py +0 -418
  21. SparseNeuS_demo_v1/data/blender_general_narrow_6.py +0 -399
  22. SparseNeuS_demo_v1/data/blender_general_narrow_8_3_fixed.py +0 -393
  23. SparseNeuS_demo_v1/data/blender_general_narrow_8_3_random.py +0 -395
  24. SparseNeuS_demo_v1/data/blender_general_narrow_8_4_random_shading.py +0 -432
  25. SparseNeuS_demo_v1/data/blender_general_narrow_all.py +0 -386
  26. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage.py +0 -410
  27. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage_temp.py +0 -411
  28. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py +3 -27
  29. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data3_1.py +0 -414
  30. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_32_wide.py +0 -465
  31. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_4_4.py +0 -419
  32. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_6_4.py +0 -420
  33. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_3.py +0 -428
  34. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_wide.py +0 -420
  35. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_temp.py +0 -417
  36. SparseNeuS_demo_v1/data/blender_general_narrow_all_no_depth.py +0 -388
  37. SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4.py +0 -389
  38. SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4_and_4.py +0 -395
  39. SparseNeuS_demo_v1/data/blender_gt_32.py +0 -419
  40. SparseNeuS_demo_v1/data/dtu/dtu_pairs.txt +0 -93
  41. SparseNeuS_demo_v1/data/dtu/lists/test.txt +0 -15
  42. SparseNeuS_demo_v1/data/dtu/lists/train.txt +0 -75
  43. SparseNeuS_demo_v1/data/dtu_fit.py +0 -278
  44. SparseNeuS_demo_v1/data/dtu_general.py +0 -376
  45. SparseNeuS_demo_v1/data/scene.py +0 -1
  46. SparseNeuS_demo_v1/evaluation/__init__.py +0 -0
  47. SparseNeuS_demo_v1/evaluation/clean_mesh.py +0 -283
  48. SparseNeuS_demo_v1/evaluation/eval_dtu_python.py +0 -369
  49. SparseNeuS_demo_v1/exp_runner_generic_blender_val.py +19 -49
  50. SparseNeuS_demo_v1/models/patch_projector.py +1 -1
SparseNeuS_demo_v1/confs/blender_general_lod1_val_new.conf DELETED
@@ -1,137 +0,0 @@
1
- # - for the lod1 geometry network, using adaptive cost for sparse cost regularization network
2
- #- for lod1 rendering network, using depth-adaptive render
3
-
4
- general {
5
- base_exp_dir = ./exp/val/1_4_only_narrow_lod1
6
-
7
- recording = [
8
- ./,
9
- ./data
10
- ./ops
11
- ./models
12
- ./loss
13
- ]
14
- }
15
-
16
- dataset {
17
- # local path
18
- trainpath = /objaverse-processed/zero12345_img/eval_selected
19
- valpath = /objaverse-processed/zero12345_img/eval_selected
20
- testpath = /objaverse-processed/zero12345_img/eval_selected
21
- # trainpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/
22
- # valpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/
23
- # testpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/
24
- imgScale_train = 1.0
25
- imgScale_test = 1.0
26
- nviews = 5
27
- clean_image = True
28
- importance_sample = True
29
- test_ref_views = [23]
30
-
31
- # test dataset
32
- test_n_views = 2
33
- test_img_wh = [256, 256]
34
- test_clip_wh = [0, 0]
35
- test_scan_id = scan110
36
- train_img_idx = [49, 50, 52, 53, 54, 56, 58] #[21, 22, 23, 24, 25] #
37
- test_img_idx = [51, 55, 57] #[32, 33, 34] #
38
-
39
- test_dir_comment = train
40
- }
41
-
42
- train {
43
- learning_rate = 2e-4
44
- learning_rate_milestone = [100000, 150000, 200000]
45
- learning_rate_factor = 0.5
46
- end_iter = 200000
47
- save_freq = 5000
48
- val_freq = 1
49
- val_mesh_freq =1
50
- report_freq = 100
51
-
52
- N_rays = 512
53
-
54
- validate_resolution_level = 4
55
- anneal_start = 0
56
- anneal_end = 25000
57
- anneal_start_lod1 = 0
58
- anneal_end_lod1 = 15000
59
-
60
- use_white_bkgd = True
61
-
62
- # Loss
63
- # ! for training the lod1 network, don't use this regularization in first 10k steps; then use the regularization
64
- sdf_igr_weight = 0.1
65
- sdf_sparse_weight = 0.02 # 0.002 for lod1 network; 0.02 for lod0 network
66
- sdf_decay_param = 100 # cannot be too large, which decide the tsdf range
67
- fg_bg_weight = 0.01 # first 0.01
68
- bg_ratio = 0.3
69
-
70
- if_fix_lod0_networks = True
71
- }
72
-
73
- model {
74
- num_lods = 2
75
-
76
- sdf_network_lod0 {
77
- lod = 0,
78
- ch_in = 56, # the channel num of fused pyramid features
79
- voxel_size = 0.02105263, # 0.02083333, should be 2/95
80
- vol_dims = [96, 96, 96],
81
- hidden_dim = 128,
82
- cost_type = variance_mean
83
- d_pyramid_feature_compress = 16,
84
- regnet_d_out = 16,
85
- num_sdf_layers = 4,
86
- # position embedding
87
- multires = 6
88
- }
89
-
90
-
91
- sdf_network_lod1 {
92
- lod = 1,
93
- ch_in = 56, # the channel num of fused pyramid features
94
- voxel_size = 0.0104712, #0.01041667, should be 2/191
95
- vol_dims = [192, 192, 192],
96
- hidden_dim = 128,
97
- cost_type = variance_mean
98
- d_pyramid_feature_compress = 8,
99
- regnet_d_out = 8,
100
- num_sdf_layers = 4,
101
- # position embedding
102
- multires = 6
103
- }
104
-
105
-
106
- variance_network {
107
- init_val = 0.2
108
- }
109
-
110
- variance_network_lod1 {
111
- init_val = 0.2
112
- }
113
-
114
- rendering_network {
115
- in_geometry_feat_ch = 16
116
- in_rendering_feat_ch = 56
117
- anti_alias_pooling = True
118
- }
119
-
120
- rendering_network_lod1 {
121
- in_geometry_feat_ch = 8
122
- in_rendering_feat_ch = 56
123
- anti_alias_pooling = True
124
-
125
- }
126
-
127
-
128
- trainer {
129
- n_samples_lod0 = 64
130
- n_importance_lod0 = 64
131
- n_samples_lod1 = 64
132
- n_importance_lod1 = 64
133
- n_outside = 0 # 128 if render_outside_uniform_sampling
134
- perturb = 1.0
135
- alpha_type = div
136
- }
137
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf CHANGED
@@ -18,8 +18,6 @@ dataset {
18
  valpath = ../ # !!! where you store the validation data
19
  testpath = ../
20
 
21
-
22
-
23
  imgScale_train = 1.0
24
  imgScale_test = 1.0
25
  nviews = 5
 
18
  valpath = ../ # !!! where you store the validation data
19
  testpath = ../
20
 
 
 
21
  imgScale_train = 1.0
22
  imgScale_test = 1.0
23
  nviews = 5
SparseNeuS_demo_v1/data/__init__.py DELETED
File without changes
SparseNeuS_demo_v1/data/blender.py DELETED
@@ -1,340 +0,0 @@
1
- import torch
2
- from torch.utils.data import Dataset
3
- import json
4
- import numpy as np
5
- import os
6
- from PIL import Image
7
- from torchvision import transforms as T
8
- from kornia import create_meshgrid
9
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
10
- import cv2 as cv
11
- from data.scene import get_boundingbox
12
-
13
-
14
- def get_ray_directions(H, W, focal, center=None):
15
- """
16
- Get ray directions for all pixels in camera coordinate.
17
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
18
- ray-tracing-generating-camera-rays/standard-coordinate-systems
19
- Inputs:
20
- H, W, focal: image height, width and focal length
21
- Outputs:
22
- directions: (H, W, 3), the direction of the rays in camera coordinate
23
- """
24
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0]
25
- i, j = grid.unbind(-1)
26
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
27
- # see https://github.com/bmild/nerf/issues/24
28
- cent = center if center is not None else [W / 2, H / 2]
29
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
30
-
31
- return directions
32
-
33
- def get_rays(directions, c2w):
34
- """
35
- Get ray origin and normalized directions in world coordinate for all pixels in one image.
36
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
37
- ray-tracing-generating-camera-rays/standard-coordinate-systems
38
- Inputs:
39
- directions: (H, W, 3) precomputed ray directions in camera coordinate
40
- c2w: (3, 4) transformation matrix from camera coordinate to world coordinate
41
- Outputs:
42
- rays_o: (H*W, 3), the origin of the rays in world coordinate
43
- rays_d: (H*W, 3), the normalized direction of the rays in world coordinate
44
- """
45
- # Rotate ray directions from camera coordinate to the world coordinate
46
- rays_d = directions @ c2w[:3, :3].T # (H, W, 3)
47
- # rays_d = rays_d / torch.norm(rays_d, dim=-1, keepdim=True)
48
- # The origin of all rays is the camera origin in world coordinate
49
- rays_o = c2w[:3, 3].expand(rays_d.shape) # (H, W, 3)
50
-
51
- rays_d = rays_d.view(-1, 3)
52
- rays_o = rays_o.view(-1, 3)
53
-
54
- return rays_o, rays_d
55
-
56
-
57
- def load_K_Rt_from_P(filename, P=None):
58
- if P is None:
59
- lines = open(filename).read().splitlines()
60
- if len(lines) == 4:
61
- lines = lines[1:]
62
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
63
- P = np.asarray(lines).astype(np.float32).squeeze()
64
-
65
- out = cv.decomposeProjectionMatrix(P)
66
- K = out[0]
67
- R = out[1]
68
- t = out[2]
69
-
70
- K = K / K[2, 2]
71
- intrinsics = np.eye(4)
72
- intrinsics[:3, :3] = K
73
-
74
- pose = np.eye(4, dtype=np.float32)
75
- pose[:3, :3] = R.transpose() # ? why need transpose here
76
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
77
-
78
- return intrinsics, pose # ! return cam2world matrix here
79
-
80
-
81
- class BlenderDataset(Dataset):
82
- def __init__(self, root_dir, split, scan_id, n_views, train_img_idx=[], test_img_idx=[],
83
- img_wh=[800, 800], clip_wh=[0, 0], original_img_wh=[800, 800],
84
- N_rays=512, h_patch_size=5, near=2.0, far=6.0):
85
- self.root_dir = root_dir
86
- self.split = split
87
- self.img_wh = img_wh
88
- self.clip_wh = clip_wh
89
- self.define_transforms()
90
- self.train_img_idx = train_img_idx
91
- self.test_img_idx = test_img_idx
92
- self.N_rays = N_rays
93
- self.h_patch_size = h_patch_size # used to extract patch for supervision
94
- self.n_views = n_views
95
- self.near, self.far = near, far
96
- self.blender2opencv = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
97
-
98
- with open(os.path.join(self.root_dir, f"transforms_{self.split}.json"), 'r') as f:
99
- self.meta = json.load(f)
100
-
101
-
102
- self.read_meta(near, far)
103
- # import ipdb; ipdb.set_trace()
104
- self.raw_near_fars = np.stack([np.array([self.near, self.far]) for i in range(len(self.meta['frames']))])
105
-
106
-
107
- # ! estimate scale_mat
108
- self.scale_mat, self.scale_factor = self.cal_scale_mat(
109
- img_hw=[self.img_wh[1], self.img_wh[0]],
110
- intrinsics=self.all_intrinsics[self.train_img_idx],
111
- extrinsics=self.all_w2cs[self.train_img_idx],
112
- near_fars=self.raw_near_fars[self.train_img_idx],
113
- factor=1.1)
114
- # self.scale_mat = np.eye(4)
115
- # self.scale_factor = 1.0
116
- # import ipdb; ipdb.set_trace()
117
- # * after scaling and translation, unit bounding box
118
- self.scaled_intrinsics, self.scaled_w2cs, self.scaled_c2ws, \
119
- self.scaled_affine_mats, self.scaled_near_fars = self.scale_cam_info()
120
-
121
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
122
- self.bbox_max = np.array([1.0, 1.0, 1.0])
123
- self.partial_vol_origin = torch.Tensor([-1., -1., -1.])
124
- self.white_back = True
125
-
126
- def read_meta(self, near=2.0, far=6.0):
127
-
128
-
129
- self.ref_img_idx = self.train_img_idx[0]
130
- ref_c2w = np.array(self.meta['frames'][self.ref_img_idx]['transform_matrix']) @ self.blender2opencv
131
- # ref_c2w = torch.FloatTensor(ref_c2w)
132
- self.ref_c2w = ref_c2w
133
- self.ref_w2c = np.linalg.inv(ref_c2w)
134
-
135
-
136
- w, h = self.img_wh
137
- self.focal = 0.5 * 800 / np.tan(0.5 * self.meta['camera_angle_x']) # original focal length
138
- self.focal *= self.img_wh[0] / 800 # modify focal length to match size self.img_wh
139
-
140
- # bounds, common for all scenes
141
- self.near = near
142
- self.far = far
143
- self.bounds = np.array([self.near, self.far])
144
-
145
- # ray directions for all pixels, same for all images (same H, W, focal)
146
- self.directions = get_ray_directions(h, w, [self.focal,self.focal]) # (h, w, 3)
147
- intrinsics = np.eye(4)
148
- intrinsics[:3, :3] = np.array([[self.focal,0,w/2],[0,self.focal,h/2],[0,0,1]]).astype(np.float32)
149
- self.intrinsics = intrinsics
150
-
151
- self.image_paths = []
152
- self.poses = []
153
- self.all_rays = []
154
- self.all_images = []
155
- self.all_masks = []
156
- self.all_w2cs = []
157
- self.all_intrinsics = []
158
- for frame in self.meta['frames']:
159
- pose = np.array(frame['transform_matrix']) @ self.blender2opencv
160
- self.poses += [pose]
161
- c2w = torch.FloatTensor(pose)
162
- w2c = np.linalg.inv(c2w)
163
- image_path = os.path.join(self.root_dir, f"{frame['file_path']}.png")
164
- self.image_paths += [image_path]
165
- img = Image.open(image_path)
166
- img = img.resize(self.img_wh, Image.LANCZOS)
167
- img = self.transform(img) # (4, h, w)
168
-
169
- self.all_masks += [img[-1:,:]>0]
170
- # img = img[:3, :] * img[ -1:,:] + (1 - img[-1:, :]) # blend A to RGB
171
- img = img[:3, :] * img[ -1:,:]
172
- img = img.numpy() # (3, h, w)
173
- self.all_images += [img]
174
-
175
-
176
- self.all_masks += []
177
- self.all_intrinsics.append(self.intrinsics)
178
- # - transform from world system to ref-camera system
179
- self.all_w2cs.append(w2c @ np.linalg.inv(self.ref_w2c))
180
-
181
- self.all_images = torch.from_numpy(np.stack(self.all_images)).to(torch.float32)
182
- self.all_intrinsics = torch.from_numpy(np.stack(self.all_intrinsics)).to(torch.float32)
183
- self.all_w2cs = torch.from_numpy(np.stack(self.all_w2cs)).to(torch.float32)
184
- # self.img_wh = [self.img_wh[0] - self.clip_wh[0] - self.clip_wh[2],
185
- # self.img_wh[1] - self.clip_wh[1] - self.clip_wh[3]]
186
-
187
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
188
- center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
189
- radius = radius * factor
190
- scale_mat = np.diag([radius, radius, radius, 1.0])
191
- scale_mat[:3, 3] = center.cpu().numpy()
192
- scale_mat = scale_mat.astype(np.float32)
193
-
194
- return scale_mat, 1. / radius.cpu().numpy()
195
-
196
- def scale_cam_info(self):
197
- new_intrinsics = []
198
- new_near_fars = []
199
- new_w2cs = []
200
- new_c2ws = []
201
- new_affine_mats = []
202
- for idx in range(len(self.all_images)):
203
-
204
- intrinsics = self.all_intrinsics[idx]
205
- # import ipdb; ipdb.set_trace()
206
- P = intrinsics @ self.all_w2cs[idx] @ self.scale_mat
207
- P = P.cpu().numpy()[:3, :4]
208
-
209
- # - should use load_K_Rt_from_P() to obtain c2w
210
- c2w = load_K_Rt_from_P(None, P)[1]
211
- w2c = np.linalg.inv(c2w)
212
- new_w2cs.append(w2c)
213
- new_c2ws.append(c2w)
214
- new_intrinsics.append(intrinsics)
215
- affine_mat = np.eye(4)
216
- affine_mat[:3, :4] = intrinsics[:3, :3] @ w2c[:3, :4]
217
- new_affine_mats.append(affine_mat)
218
-
219
- camera_o = c2w[:3, 3]
220
- dist = np.sqrt(np.sum(camera_o ** 2))
221
- near = dist - 1
222
- far = dist + 1
223
-
224
- new_near_fars.append([0.95 * near, 1.05 * far])
225
-
226
- new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars = \
227
- np.stack(new_intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), \
228
- np.stack(new_affine_mats), np.stack(new_near_fars)
229
-
230
- new_intrinsics = torch.from_numpy(np.float32(new_intrinsics))
231
- new_w2cs = torch.from_numpy(np.float32(new_w2cs))
232
- new_c2ws = torch.from_numpy(np.float32(new_c2ws))
233
- new_affine_mats = torch.from_numpy(np.float32(new_affine_mats))
234
- new_near_fars = torch.from_numpy(np.float32(new_near_fars))
235
-
236
- return new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars
237
-
238
- def load_poses_all(self, file=f"transforms_train.json"):
239
- with open(os.path.join(self.root_dir, file), 'r') as f:
240
- meta = json.load(f)
241
-
242
- c2ws = []
243
- for i,frame in enumerate(meta['frames']):
244
- c2ws.append(np.array(frame['transform_matrix']) @ self.blender2opencv)
245
- return np.stack(c2ws)
246
-
247
- def define_transforms(self):
248
- self.transform = T.ToTensor()
249
-
250
-
251
-
252
- def get_conditional_sample(self):
253
- sample = {}
254
- support_idxs = self.train_img_idx
255
-
256
- sample['images'] = self.all_images[support_idxs] # (V, 3, H, W)
257
- sample['w2cs'] = self.scaled_w2cs[self.train_img_idx] # (V, 4, 4)
258
- sample['c2ws'] = self.scaled_c2ws[self.train_img_idx] # (V, 4, 4)
259
- sample['near_fars'] = self.scaled_near_fars[self.train_img_idx] # (V, 2)
260
- sample['intrinsics'] = self.scaled_intrinsics[self.train_img_idx][:, :3, :3] # (V, 3, 3)
261
- sample['affine_mats'] = self.scaled_affine_mats[self.train_img_idx] # ! in world space
262
-
263
- # sample['scan'] = self.scan_id
264
- sample['scale_factor'] = torch.tensor(self.scale_factor)
265
- sample['scale_mat'] = torch.from_numpy(self.scale_mat)
266
- sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
267
- sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
268
- sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
269
-
270
- return sample
271
-
272
-
273
-
274
- def __len__(self):
275
- if self.split == 'train':
276
- return self.n_views * 1000
277
- else:
278
- return len(self.test_img_idx) * 1000
279
-
280
-
281
- def __getitem__(self, idx):
282
- sample = {}
283
-
284
- if self.split == 'train':
285
- render_idx = self.train_img_idx[idx % self.n_views]
286
- support_idxs = [idx for idx in self.train_img_idx if idx != render_idx]
287
- else:
288
- # render_idx = idx % self.n_test_images + self.n_train_images
289
- render_idx = self.test_img_idx[idx % len(self.test_img_idx)]
290
- support_idxs = [render_idx]
291
-
292
- sample['images'] = self.all_images[support_idxs] # (V, 3, H, W)
293
- sample['w2cs'] = self.scaled_w2cs[support_idxs] # (V, 4, 4)
294
- sample['c2ws'] = self.scaled_c2ws[support_idxs] # (V, 4, 4)
295
- sample['intrinsics'] = self.scaled_intrinsics[support_idxs][:, :3, :3] # (V, 3, 3)
296
- sample['affine_mats'] = self.scaled_affine_mats[support_idxs] # ! in world space
297
- # sample['scan'] = self.scan_id
298
- sample['scale_factor'] = torch.tensor(self.scale_factor)
299
- sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
300
- sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
301
- sample['img_index'] = torch.tensor(render_idx)
302
-
303
- # - query image
304
- sample['query_image'] = self.all_images[render_idx]
305
- sample['query_c2w'] = self.scaled_c2ws[render_idx]
306
- sample['query_w2c'] = self.scaled_w2cs[render_idx]
307
- sample['query_intrinsic'] = self.scaled_intrinsics[render_idx]
308
- sample['query_near_far'] = self.scaled_near_fars[render_idx]
309
- # sample['meta'] = str(self.scan_id) + "_" + os.path.basename(self.images_list[render_idx])
310
- sample['scale_mat'] = torch.from_numpy(self.scale_mat)
311
- sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
312
- sample['rendering_c2ws'] = self.scaled_c2ws[self.test_img_idx]
313
- sample['rendering_imgs_idx'] = torch.Tensor(np.array(self.test_img_idx).astype(np.int32))
314
-
315
- # - generate rays
316
- if self.split == 'val' or self.split == 'test':
317
- sample_rays = gen_rays_from_single_image(
318
- self.img_wh[1], self.img_wh[0],
319
- sample['query_image'],
320
- sample['query_intrinsic'],
321
- sample['query_c2w'],
322
- depth=None,
323
- mask=None)
324
- else:
325
- sample_rays = gen_random_rays_from_single_image(
326
- self.img_wh[1], self.img_wh[0],
327
- self.N_rays,
328
- sample['query_image'],
329
- sample['query_intrinsic'],
330
- sample['query_c2w'],
331
- depth=None,
332
- mask=None,
333
- dilated_mask=None,
334
- importance_sample=False,
335
- h_patch_size=self.h_patch_size
336
- )
337
-
338
- sample['rays'] = sample_rays
339
-
340
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general.py DELETED
@@ -1,432 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
-
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
155
- depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
156
- interpolation=cv2.INTER_NEAREST) # (600, 800)
157
- depth_h = depth_h[44:556, 80:720] # (512, 640)
158
- depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- return depth, depth_h
164
-
165
- def read_mask(self, filename):
166
- mask_h = cv2.imread(filename, 0)
167
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
168
- interpolation=cv2.INTER_NEAREST)
169
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
170
- interpolation=cv2.INTER_NEAREST)
171
-
172
- mask[mask > 0] = 1 # the masks stored in png are not binary
173
- mask_h[mask_h > 0] = 1
174
-
175
- return mask, mask_h
176
-
177
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
178
-
179
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
180
- # print("center", center)
181
- # print("radius", radius)
182
- # print("bounds", bounds)
183
- # import ipdb; ipdb.set_trace()
184
- radius = radius * factor
185
- scale_mat = np.diag([radius, radius, radius, 1.0])
186
- scale_mat[:3, 3] = center.cpu().numpy()
187
- scale_mat = scale_mat.astype(np.float32)
188
-
189
- return scale_mat, 1. / radius.cpu().numpy()
190
-
191
- def __len__(self):
192
- return 8*len(self.lvis_paths)
193
-
194
-
195
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
196
- depth_h = cv2.imread(filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 65535 * 1.4 + 0.5
197
-
198
- depth_h[depth_h < near_bound+1e-3] = 0.0
199
-
200
- depth = {}
201
- for l in range(3):
202
- depth[f"level_{l}"] = cv2.resize(
203
- depth_h,
204
- None,
205
- fx=1.0 / (2**l),
206
- fy=1.0 / (2**l),
207
- interpolation=cv2.INTER_NEAREST,
208
- )
209
-
210
- if self.split == "train":
211
- cutout = np.ones_like(depth[f"level_2"])
212
- h0 = int(np.random.randint(0, high=cutout.shape[0] // 5, size=1))
213
- h1 = int(
214
- np.random.randint(
215
- 4 * cutout.shape[0] // 5, high=cutout.shape[0], size=1
216
- )
217
- )
218
- w0 = int(np.random.randint(0, high=cutout.shape[1] // 5, size=1))
219
- w1 = int(
220
- np.random.randint(
221
- 4 * cutout.shape[1] // 5, high=cutout.shape[1], size=1
222
- )
223
- )
224
- cutout[h0:h1, w0:w1] = 0
225
- depth_aug = depth[f"level_2"] * cutout
226
- else:
227
- depth_aug = depth[f"level_2"].copy()
228
-
229
- return depth, depth_h, depth_aug
230
-
231
-
232
- def __getitem__(self, idx):
233
- sample = {}
234
- origin_idx = idx
235
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
236
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
237
-
238
-
239
- folder_uid_dict = self.lvis_paths[idx//8]
240
- idx = idx % 8 # [0, 7]
241
- folder_id = folder_uid_dict['folder_id']
242
- uid = folder_uid_dict['uid']
243
-
244
- # idx = idx % 8
245
- # uid = 'c40d63d5d740405e91c7f5fce855076e'
246
- # folder_id = '000-123'
247
-
248
- # target view
249
- c2w = self.c2ws[idx]
250
- w2c = np.linalg.inv(c2w)
251
- w2c_ref = w2c
252
- w2c_ref_inv = np.linalg.inv(w2c_ref)
253
-
254
- w2cs.append(w2c @ w2c_ref_inv)
255
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
256
-
257
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
258
-
259
- depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
260
-
261
-
262
- img = Image.open(img_filename)
263
-
264
- img = self.transform(img) # (4, h, w)
265
-
266
-
267
- if img.shape[0] == 4:
268
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
269
- imgs += [img]
270
-
271
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
272
- mask_h = depth_h > 0
273
- # print("valid pixels", np.sum(mask_h))
274
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
275
- surface_points = directions * depth_h[..., None] # [H, W, 3]
276
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
277
- depth_h = distance
278
-
279
-
280
- depths_h.append(depth_h)
281
- masks_h.append(mask_h)
282
-
283
- intrinsic = self.intrinsic
284
- intrinsics.append(intrinsic)
285
-
286
-
287
- near_fars.append(self.near_fars[idx])
288
- image_perm = 0 # only supervised on reference view
289
-
290
- mask_dilated = None
291
-
292
- src_views = range(8+idx*4, 8+(idx+1)*4)
293
-
294
-
295
- for vid in src_views:
296
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_{vid%4}_10.png')
297
-
298
- img = Image.open(img_filename)
299
- img_wh = self.img_wh
300
-
301
- img = self.transform(img)
302
- if img.shape[0] == 4:
303
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
304
-
305
- imgs += [img]
306
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
307
- depths_h.append(depth_h)
308
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
309
-
310
- near_fars.append(self.all_near_fars[vid])
311
- intrinsics.append(self.all_intrinsics[vid])
312
-
313
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
314
-
315
-
316
- # ! estimate scale_mat
317
- scale_mat, scale_factor = self.cal_scale_mat(
318
- img_hw=[img_wh[1], img_wh[0]],
319
- intrinsics=intrinsics, extrinsics=w2cs,
320
- near_fars=near_fars, factor=1.1
321
- )
322
- # print(scale_mat)
323
- # print(scale_factor)
324
- # ! calculate the new w2cs after scaling
325
- new_near_fars = []
326
- new_w2cs = []
327
- new_c2ws = []
328
- new_affine_mats = []
329
- new_depths_h = []
330
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
331
-
332
- P = intrinsic @ extrinsic @ scale_mat
333
- P = P[:3, :4]
334
- # - should use load_K_Rt_from_P() to obtain c2w
335
- c2w = load_K_Rt_from_P(None, P)[1]
336
- w2c = np.linalg.inv(c2w)
337
- new_w2cs.append(w2c)
338
- new_c2ws.append(c2w)
339
- affine_mat = np.eye(4)
340
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
341
- new_affine_mats.append(affine_mat)
342
-
343
- camera_o = c2w[:3, 3]
344
- dist = np.sqrt(np.sum(camera_o ** 2))
345
- near = dist - 1
346
- far = dist + 1
347
-
348
- new_near_fars.append([0.95 * near, 1.05 * far])
349
- new_depths_h.append(depth * scale_factor)
350
-
351
- # print(new_near_fars)
352
- imgs = torch.stack(imgs).float()
353
- depths_h = np.stack(new_depths_h)
354
- masks_h = np.stack(masks_h)
355
-
356
- affine_mats = np.stack(new_affine_mats)
357
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
358
- new_near_fars)
359
-
360
- if self.split == 'train':
361
- start_idx = 0
362
- else:
363
- start_idx = 1
364
-
365
- view_ids = [idx] + list(src_views)
366
- sample['origin_idx'] = origin_idx
367
- sample['images'] = imgs # (V, 3, H, W)
368
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
369
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
370
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
371
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
372
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
373
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
374
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
375
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
376
-
377
- # sample['light_idx'] = torch.tensor(light_idx)
378
- sample['scan'] = folder_id
379
-
380
- sample['scale_factor'] = torch.tensor(scale_factor)
381
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
382
- sample['render_img_idx'] = torch.tensor(image_perm)
383
- sample['partial_vol_origin'] = self.partial_vol_origin
384
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
385
-
386
-
387
- # - image to render
388
- sample['query_image'] = sample['images'][0]
389
- sample['query_c2w'] = sample['c2ws'][0]
390
- sample['query_w2c'] = sample['w2cs'][0]
391
- sample['query_intrinsic'] = sample['intrinsics'][0]
392
- sample['query_depth'] = sample['depths_h'][0]
393
- sample['query_mask'] = sample['masks_h'][0]
394
- sample['query_near_far'] = sample['near_fars'][0]
395
-
396
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
397
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
398
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
399
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
400
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
401
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
402
- sample['view_ids'] = sample['view_ids'][start_idx:]
403
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
404
-
405
- sample['scale_mat'] = torch.from_numpy(scale_mat)
406
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
407
-
408
- # - generate rays
409
- if ('val' in self.split) or ('test' in self.split):
410
- sample_rays = gen_rays_from_single_image(
411
- img_wh[1], img_wh[0],
412
- sample['query_image'],
413
- sample['query_intrinsic'],
414
- sample['query_c2w'],
415
- depth=sample['query_depth'],
416
- mask=sample['query_mask'] if self.clean_image else None)
417
- else:
418
- sample_rays = gen_random_rays_from_single_image(
419
- img_wh[1], img_wh[0],
420
- self.N_rays,
421
- sample['query_image'],
422
- sample['query_intrinsic'],
423
- sample['query_c2w'],
424
- depth=sample['query_depth'],
425
- mask=sample['query_mask'] if self.clean_image else None,
426
- dilated_mask=mask_dilated,
427
- importance_sample=self.importance_sample)
428
-
429
-
430
- sample['rays'] = sample_rays
431
-
432
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_12_narrow.py DELETED
@@ -1,427 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- self.root_dir = root_dir
70
- self.split = split
71
- self.imgs_per_instance = 12
72
- self.n_views = n_views
73
- self.N_rays = N_rays
74
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
-
76
- self.clean_image = clean_image
77
- self.importance_sample = importance_sample
78
- self.test_ref_views = test_ref_views # used for testing
79
- self.scale_factor = 1.0
80
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
-
82
- lvis_json_path = '/objaverse-processed/zero12345_img/narrow_12_split_upd.json' # folder_id and uid
83
- with open(lvis_json_path, 'r') as f:
84
- lvis_paths = json.load(f)
85
- if self.split == 'train':
86
- self.lvis_paths = lvis_paths['train']
87
- else:
88
- self.lvis_paths = lvis_paths['val']
89
- if img_wh is not None:
90
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
- 'img_wh must both be multiples of 32!'
92
-
93
-
94
- pose_json_path_narrow_8 = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
- with open(pose_json_path_narrow_8, 'r') as f:
96
- narrow_8_meta = json.load(f)
97
-
98
- pose_json_path_narrow_4 = "/objaverse-processed/zero12345_img/zero12345_2stage_12_pose.json"
99
- with open(pose_json_path_narrow_4, 'r') as f:
100
- narrow_4_meta = json.load(f)
101
-
102
-
103
- self.img_ids = list(narrow_8_meta["c2ws"].keys()) + list(narrow_4_meta["c2ws"].keys()) # (8 + 8*4) + (4 + 4*4)
104
- self.img_wh = (256, 256)
105
- self.input_poses = np.array(list(narrow_8_meta["c2ws"].values()) + list(narrow_4_meta["c2ws"].values()))
106
- intrinsic = np.eye(4)
107
- assert narrow_8_meta["intrinsics"] == narrow_4_meta["intrinsics"], "intrinsics not equal"
108
- intrinsic[:3, :3] = np.array(narrow_8_meta["intrinsics"])
109
- self.intrinsic = intrinsic
110
- assert narrow_8_meta["near_far"] == narrow_4_meta["near_far"], "near_far not equal"
111
- self.near_far = np.array(narrow_8_meta["near_far"])
112
- self.near_far[1] = 1.8
113
- self.define_transforms()
114
- self.blender2opencv = np.array(
115
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
- )
117
-
118
-
119
- self.c2ws = []
120
- self.w2cs = []
121
- self.near_fars = []
122
- for idx, img_id in enumerate(self.img_ids):
123
- pose = self.input_poses[idx]
124
- c2w = pose @ self.blender2opencv
125
- self.c2ws.append(c2w)
126
- self.w2cs.append(np.linalg.inv(c2w))
127
- self.near_fars.append(self.near_far)
128
-
129
-
130
-
131
- self.c2ws = np.stack(self.c2ws, axis=0)
132
- self.w2cs = np.stack(self.w2cs, axis=0)
133
-
134
-
135
- self.all_intrinsics = [] # the cam info of the whole scene
136
- self.all_extrinsics = []
137
- self.all_near_fars = []
138
- self.load_cam_info()
139
-
140
- # * bounding box for rendering
141
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
- self.bbox_max = np.array([1.0, 1.0, 1.0])
143
-
144
- # - used for cost volume regularization
145
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
-
148
-
149
- def define_transforms(self):
150
- self.transform = T.Compose([T.ToTensor()])
151
-
152
-
153
-
154
- def load_cam_info(self):
155
- for vid, img_id in enumerate(self.img_ids):
156
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
- self.all_intrinsics.append(intrinsic)
158
- self.all_extrinsics.append(extrinsic)
159
- self.all_near_fars.append(near_far)
160
-
161
- def read_depth(self, filename):
162
- pass
163
-
164
- def read_mask(self, filename):
165
- mask_h = cv2.imread(filename, 0)
166
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
- interpolation=cv2.INTER_NEAREST)
168
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
- interpolation=cv2.INTER_NEAREST)
170
-
171
- mask[mask > 0] = 1 # the masks stored in png are not binary
172
- mask_h[mask_h > 0] = 1
173
-
174
- return mask, mask_h
175
-
176
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
-
178
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
-
180
- radius = radius * factor
181
- scale_mat = np.diag([radius, radius, radius, 1.0])
182
- scale_mat[:3, 3] = center.cpu().numpy()
183
- scale_mat = scale_mat.astype(np.float32)
184
-
185
- return scale_mat, 1. / radius.cpu().numpy()
186
-
187
- def __len__(self):
188
- return self.imgs_per_instance*len(self.lvis_paths)
189
-
190
-
191
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
- pass
193
-
194
-
195
- def __getitem__(self, idx):
196
- sample = {}
197
- origin_idx = idx
198
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
- idx_original=idx
201
-
202
- folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
203
-
204
- folder_id = folder_uid_dict['folder_id']
205
- uid = folder_uid_dict['uid']
206
-
207
- idx = idx % self.imgs_per_instance # [0, 11]
208
- if idx < 8:
209
- # target view
210
- c2w = self.c2ws[idx]
211
- w2c = np.linalg.inv(c2w)
212
- w2c_ref = w2c
213
- w2c_ref_inv = np.linalg.inv(w2c_ref)
214
-
215
- w2cs.append(w2c @ w2c_ref_inv)
216
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
217
-
218
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
219
-
220
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
221
-
222
- img = Image.open(img_filename)
223
-
224
- img = self.transform(img) # (4, h, w)
225
- else:
226
- # target view
227
- c2w = self.c2ws[idx-8+40]
228
- w2c = np.linalg.inv(c2w)
229
- w2c_ref = w2c
230
- w2c_ref_inv = np.linalg.inv(w2c_ref)
231
-
232
- w2cs.append(w2c @ w2c_ref_inv)
233
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
234
-
235
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}.png')
236
-
237
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}_depth_mm.png'))
238
-
239
- img = Image.open(img_filename)
240
-
241
- img = self.transform(img) # (4, h, w)
242
-
243
- if img.shape[0] == 4:
244
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
245
- imgs += [img]
246
-
247
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
248
- mask_h = depth_h > 0
249
- # print("valid pixels", np.sum(mask_h))
250
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
251
- surface_points = directions * depth_h[..., None] # [H, W, 3]
252
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
253
- depth_h = distance
254
-
255
-
256
-
257
- depths_h.append(depth_h)
258
- masks_h.append(mask_h)
259
-
260
- intrinsic = self.intrinsic
261
- intrinsics.append(intrinsic)
262
-
263
-
264
- near_fars.append(self.near_fars[idx])
265
- image_perm = 0 # only supervised on reference view
266
-
267
- mask_dilated = None
268
-
269
-
270
- src_views = range(8, 8 + 8 * 4 + 4 + 4*4)
271
- src_views_used = []
272
- skipped_idx = [40, 41, 42, 43]
273
- for vid in src_views:
274
- if vid in skipped_idx:
275
- continue
276
-
277
- src_views_used.append(vid)
278
- cur_view_id = (vid - 8) // 4 # [0, 7]
279
-
280
- # choose narrow
281
- if cur_view_id < 8:
282
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
283
- else: # choose 2-stage
284
- cur_view_id = cur_view_id - 1
285
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12", folder_id, uid, f'view_{cur_view_id}_{vid%4}.png')
286
-
287
- img = Image.open(img_filename)
288
- img_wh = self.img_wh
289
-
290
- img = self.transform(img)
291
- if img.shape[0] == 4:
292
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
293
-
294
- imgs += [img]
295
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
296
- depths_h.append(depth_h)
297
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
298
-
299
- near_fars.append(self.all_near_fars[vid])
300
- intrinsics.append(self.all_intrinsics[vid])
301
-
302
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
303
-
304
-
305
-
306
-
307
- scale_mat, scale_factor = self.cal_scale_mat(
308
- img_hw=[img_wh[1], img_wh[0]],
309
- intrinsics=intrinsics, extrinsics=w2cs,
310
- near_fars=near_fars, factor=1.1
311
- )
312
-
313
-
314
- new_near_fars = []
315
- new_w2cs = []
316
- new_c2ws = []
317
- new_affine_mats = []
318
- new_depths_h = []
319
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
320
-
321
- P = intrinsic @ extrinsic @ scale_mat
322
- P = P[:3, :4]
323
- # - should use load_K_Rt_from_P() to obtain c2w
324
- c2w = load_K_Rt_from_P(None, P)[1]
325
- w2c = np.linalg.inv(c2w)
326
- new_w2cs.append(w2c)
327
- new_c2ws.append(c2w)
328
- affine_mat = np.eye(4)
329
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
330
- new_affine_mats.append(affine_mat)
331
-
332
- camera_o = c2w[:3, 3]
333
- dist = np.sqrt(np.sum(camera_o ** 2))
334
- near = dist - 1
335
- far = dist + 1
336
-
337
- new_near_fars.append([0.95 * near, 1.05 * far])
338
-
339
- new_depths_h.append(depth * scale_factor)
340
-
341
- # print(new_near_fars)
342
- # print("img numeber: ", len(imgs))
343
- imgs = torch.stack(imgs).float()
344
- depths_h = np.stack(new_depths_h)
345
- masks_h = np.stack(masks_h)
346
-
347
- affine_mats = np.stack(new_affine_mats)
348
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
349
- new_near_fars)
350
-
351
- if self.split == 'train':
352
- start_idx = 0
353
- else:
354
- start_idx = 1
355
-
356
- view_ids = [idx_original % self.imgs_per_instance] + src_views_used
357
- sample['origin_idx'] = origin_idx
358
- sample['images'] = imgs # (V, 3, H, W)
359
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
360
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
361
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
362
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
363
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
364
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
365
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
366
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
367
-
368
- # sample['light_idx'] = torch.tensor(light_idx)
369
- sample['scan'] = folder_id
370
-
371
- sample['scale_factor'] = torch.tensor(scale_factor)
372
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
373
- sample['render_img_idx'] = torch.tensor(image_perm)
374
- sample['partial_vol_origin'] = self.partial_vol_origin
375
- if view_ids[0] < 8:
376
- meta_end = "_narrow"+ "_refview" + str(view_ids[0])
377
- else:
378
- meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
379
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
380
-
381
-
382
- # - image to render
383
- sample['query_image'] = sample['images'][0]
384
- sample['query_c2w'] = sample['c2ws'][0]
385
- sample['query_w2c'] = sample['w2cs'][0]
386
- sample['query_intrinsic'] = sample['intrinsics'][0]
387
- sample['query_depth'] = sample['depths_h'][0]
388
- sample['query_mask'] = sample['masks_h'][0]
389
- sample['query_near_far'] = sample['near_fars'][0]
390
-
391
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
392
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
393
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
394
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
395
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
396
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
397
- sample['view_ids'] = sample['view_ids'][start_idx:]
398
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
399
-
400
- sample['scale_mat'] = torch.from_numpy(scale_mat)
401
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
402
-
403
- # - generate rays
404
- if ('val' in self.split) or ('test' in self.split):
405
- sample_rays = gen_rays_from_single_image(
406
- img_wh[1], img_wh[0],
407
- sample['query_image'],
408
- sample['query_intrinsic'],
409
- sample['query_c2w'],
410
- depth=sample['query_depth'],
411
- mask=sample['query_mask'] if self.clean_image else None)
412
- else:
413
- sample_rays = gen_random_rays_from_single_image(
414
- img_wh[1], img_wh[0],
415
- self.N_rays,
416
- sample['query_image'],
417
- sample['query_intrinsic'],
418
- sample['query_c2w'],
419
- depth=sample['query_depth'],
420
- mask=sample['query_mask'] if self.clean_image else None,
421
- dilated_mask=mask_dilated,
422
- importance_sample=self.importance_sample)
423
-
424
-
425
- sample['rays'] = sample_rays
426
-
427
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_12_narrow_8.py DELETED
@@ -1,427 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- self.root_dir = root_dir
70
- self.split = split
71
- self.imgs_per_instance = 8
72
- self.n_views = n_views
73
- self.N_rays = N_rays
74
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
-
76
- self.clean_image = clean_image
77
- self.importance_sample = importance_sample
78
- self.test_ref_views = test_ref_views # used for testing
79
- self.scale_factor = 1.0
80
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
-
82
- lvis_json_path = '/objaverse-processed/zero12345_img/narrow_12_split_upd.json' # folder_id and uid
83
- with open(lvis_json_path, 'r') as f:
84
- lvis_paths = json.load(f)
85
- if self.split == 'train':
86
- self.lvis_paths = lvis_paths['train']
87
- else:
88
- self.lvis_paths = lvis_paths['val']
89
- if img_wh is not None:
90
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
- 'img_wh must both be multiples of 32!'
92
-
93
-
94
- pose_json_path_narrow_8 = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
- with open(pose_json_path_narrow_8, 'r') as f:
96
- narrow_8_meta = json.load(f)
97
-
98
- pose_json_path_narrow_4 = "/objaverse-processed/zero12345_img/zero12345_2stage_12_pose.json"
99
- with open(pose_json_path_narrow_4, 'r') as f:
100
- narrow_4_meta = json.load(f)
101
-
102
-
103
- self.img_ids = list(narrow_8_meta["c2ws"].keys()) + list(narrow_4_meta["c2ws"].keys()) # (8 + 8*4) + (4 + 4*4)
104
- self.img_wh = (256, 256)
105
- self.input_poses = np.array(list(narrow_8_meta["c2ws"].values()) + list(narrow_4_meta["c2ws"].values()))
106
- intrinsic = np.eye(4)
107
- assert narrow_8_meta["intrinsics"] == narrow_4_meta["intrinsics"], "intrinsics not equal"
108
- intrinsic[:3, :3] = np.array(narrow_8_meta["intrinsics"])
109
- self.intrinsic = intrinsic
110
- assert narrow_8_meta["near_far"] == narrow_4_meta["near_far"], "near_far not equal"
111
- self.near_far = np.array(narrow_8_meta["near_far"])
112
- self.near_far[1] = 1.8
113
- self.define_transforms()
114
- self.blender2opencv = np.array(
115
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
- )
117
-
118
-
119
- self.c2ws = []
120
- self.w2cs = []
121
- self.near_fars = []
122
- for idx, img_id in enumerate(self.img_ids):
123
- pose = self.input_poses[idx]
124
- c2w = pose @ self.blender2opencv
125
- self.c2ws.append(c2w)
126
- self.w2cs.append(np.linalg.inv(c2w))
127
- self.near_fars.append(self.near_far)
128
-
129
-
130
-
131
- self.c2ws = np.stack(self.c2ws, axis=0)
132
- self.w2cs = np.stack(self.w2cs, axis=0)
133
-
134
-
135
- self.all_intrinsics = [] # the cam info of the whole scene
136
- self.all_extrinsics = []
137
- self.all_near_fars = []
138
- self.load_cam_info()
139
-
140
- # * bounding box for rendering
141
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
- self.bbox_max = np.array([1.0, 1.0, 1.0])
143
-
144
- # - used for cost volume regularization
145
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
-
148
-
149
- def define_transforms(self):
150
- self.transform = T.Compose([T.ToTensor()])
151
-
152
-
153
-
154
- def load_cam_info(self):
155
- for vid, img_id in enumerate(self.img_ids):
156
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
- self.all_intrinsics.append(intrinsic)
158
- self.all_extrinsics.append(extrinsic)
159
- self.all_near_fars.append(near_far)
160
-
161
- def read_depth(self, filename):
162
- pass
163
-
164
- def read_mask(self, filename):
165
- mask_h = cv2.imread(filename, 0)
166
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
- interpolation=cv2.INTER_NEAREST)
168
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
- interpolation=cv2.INTER_NEAREST)
170
-
171
- mask[mask > 0] = 1 # the masks stored in png are not binary
172
- mask_h[mask_h > 0] = 1
173
-
174
- return mask, mask_h
175
-
176
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
-
178
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
-
180
- radius = radius * factor
181
- scale_mat = np.diag([radius, radius, radius, 1.0])
182
- scale_mat[:3, 3] = center.cpu().numpy()
183
- scale_mat = scale_mat.astype(np.float32)
184
-
185
- return scale_mat, 1. / radius.cpu().numpy()
186
-
187
- def __len__(self):
188
- return self.imgs_per_instance*len(self.lvis_paths)
189
-
190
-
191
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
- pass
193
-
194
-
195
- def __getitem__(self, idx):
196
- sample = {}
197
- origin_idx = idx
198
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
- idx_original=idx
201
-
202
- folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
203
-
204
- folder_id = folder_uid_dict['folder_id']
205
- uid = folder_uid_dict['uid']
206
-
207
- idx = idx % self.imgs_per_instance # [0, 11]
208
- if idx < 8:
209
- # target view
210
- c2w = self.c2ws[idx]
211
- w2c = np.linalg.inv(c2w)
212
- w2c_ref = w2c
213
- w2c_ref_inv = np.linalg.inv(w2c_ref)
214
-
215
- w2cs.append(w2c @ w2c_ref_inv)
216
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
217
-
218
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
219
-
220
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
221
-
222
- img = Image.open(img_filename)
223
-
224
- img = self.transform(img) # (4, h, w)
225
- else:
226
- # target view
227
- c2w = self.c2ws[idx-8+40]
228
- w2c = np.linalg.inv(c2w)
229
- w2c_ref = w2c
230
- w2c_ref_inv = np.linalg.inv(w2c_ref)
231
-
232
- w2cs.append(w2c @ w2c_ref_inv)
233
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
234
-
235
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}.png')
236
-
237
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}_depth_mm.png'))
238
-
239
- img = Image.open(img_filename)
240
-
241
- img = self.transform(img) # (4, h, w)
242
-
243
- if img.shape[0] == 4:
244
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
245
- imgs += [img]
246
-
247
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
248
- mask_h = depth_h > 0
249
- # print("valid pixels", np.sum(mask_h))
250
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
251
- surface_points = directions * depth_h[..., None] # [H, W, 3]
252
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
253
- depth_h = distance
254
-
255
-
256
-
257
- depths_h.append(depth_h)
258
- masks_h.append(mask_h)
259
-
260
- intrinsic = self.intrinsic
261
- intrinsics.append(intrinsic)
262
-
263
-
264
- near_fars.append(self.near_fars[idx])
265
- image_perm = 0 # only supervised on reference view
266
-
267
- mask_dilated = None
268
-
269
-
270
- src_views = range(8, 8 + 8 * 4 + 4 + 4*4)
271
- src_views_used = []
272
- skipped_idx = [40, 41, 42, 43]
273
- for vid in src_views:
274
- if vid in skipped_idx:
275
- continue
276
-
277
- src_views_used.append(vid)
278
- cur_view_id = (vid - 8) // 4 # [0, 7]
279
-
280
- # choose narrow
281
- if cur_view_id < 8:
282
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
283
- else: # choose 2-stage
284
- cur_view_id = cur_view_id - 1
285
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12", folder_id, uid, f'view_{cur_view_id}_{vid%4}.png')
286
-
287
- img = Image.open(img_filename)
288
- img_wh = self.img_wh
289
-
290
- img = self.transform(img)
291
- if img.shape[0] == 4:
292
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
293
-
294
- imgs += [img]
295
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
296
- depths_h.append(depth_h)
297
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
298
-
299
- near_fars.append(self.all_near_fars[vid])
300
- intrinsics.append(self.all_intrinsics[vid])
301
-
302
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
303
-
304
-
305
-
306
-
307
- scale_mat, scale_factor = self.cal_scale_mat(
308
- img_hw=[img_wh[1], img_wh[0]],
309
- intrinsics=intrinsics, extrinsics=w2cs,
310
- near_fars=near_fars, factor=1.1
311
- )
312
-
313
-
314
- new_near_fars = []
315
- new_w2cs = []
316
- new_c2ws = []
317
- new_affine_mats = []
318
- new_depths_h = []
319
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
320
-
321
- P = intrinsic @ extrinsic @ scale_mat
322
- P = P[:3, :4]
323
- # - should use load_K_Rt_from_P() to obtain c2w
324
- c2w = load_K_Rt_from_P(None, P)[1]
325
- w2c = np.linalg.inv(c2w)
326
- new_w2cs.append(w2c)
327
- new_c2ws.append(c2w)
328
- affine_mat = np.eye(4)
329
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
330
- new_affine_mats.append(affine_mat)
331
-
332
- camera_o = c2w[:3, 3]
333
- dist = np.sqrt(np.sum(camera_o ** 2))
334
- near = dist - 1
335
- far = dist + 1
336
-
337
- new_near_fars.append([0.95 * near, 1.05 * far])
338
-
339
- new_depths_h.append(depth * scale_factor)
340
-
341
- # print(new_near_fars)
342
- # print("img numeber: ", len(imgs))
343
- imgs = torch.stack(imgs).float()
344
- depths_h = np.stack(new_depths_h)
345
- masks_h = np.stack(masks_h)
346
-
347
- affine_mats = np.stack(new_affine_mats)
348
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
349
- new_near_fars)
350
-
351
- if self.split == 'train':
352
- start_idx = 0
353
- else:
354
- start_idx = 1
355
-
356
- view_ids = [idx_original % self.imgs_per_instance] + src_views_used
357
- sample['origin_idx'] = origin_idx
358
- sample['images'] = imgs # (V, 3, H, W)
359
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
360
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
361
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
362
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
363
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
364
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
365
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
366
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
367
-
368
- # sample['light_idx'] = torch.tensor(light_idx)
369
- sample['scan'] = folder_id
370
-
371
- sample['scale_factor'] = torch.tensor(scale_factor)
372
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
373
- sample['render_img_idx'] = torch.tensor(image_perm)
374
- sample['partial_vol_origin'] = self.partial_vol_origin
375
- if view_ids[0] < 8:
376
- meta_end = "_narrow"+ "_refview" + str(view_ids[0])
377
- else:
378
- meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
379
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
380
-
381
-
382
- # - image to render
383
- sample['query_image'] = sample['images'][0]
384
- sample['query_c2w'] = sample['c2ws'][0]
385
- sample['query_w2c'] = sample['w2cs'][0]
386
- sample['query_intrinsic'] = sample['intrinsics'][0]
387
- sample['query_depth'] = sample['depths_h'][0]
388
- sample['query_mask'] = sample['masks_h'][0]
389
- sample['query_near_far'] = sample['near_fars'][0]
390
-
391
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
392
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
393
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
394
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
395
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
396
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
397
- sample['view_ids'] = sample['view_ids'][start_idx:]
398
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
399
-
400
- sample['scale_mat'] = torch.from_numpy(scale_mat)
401
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
402
-
403
- # - generate rays
404
- if ('val' in self.split) or ('test' in self.split):
405
- sample_rays = gen_rays_from_single_image(
406
- img_wh[1], img_wh[0],
407
- sample['query_image'],
408
- sample['query_intrinsic'],
409
- sample['query_c2w'],
410
- depth=sample['query_depth'],
411
- mask=sample['query_mask'] if self.clean_image else None)
412
- else:
413
- sample_rays = gen_random_rays_from_single_image(
414
- img_wh[1], img_wh[0],
415
- self.N_rays,
416
- sample['query_image'],
417
- sample['query_intrinsic'],
418
- sample['query_c2w'],
419
- depth=sample['query_depth'],
420
- mask=sample['query_mask'] if self.clean_image else None,
421
- dilated_mask=mask_dilated,
422
- importance_sample=self.importance_sample)
423
-
424
-
425
- sample['rays'] = sample_rays
426
-
427
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_360.py DELETED
@@ -1,412 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
-
18
- def get_ray_directions(H, W, focal, center=None):
19
- """
20
- Get ray directions for all pixels in camera coordinate.
21
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
22
- ray-tracing-generating-camera-rays/standard-coordinate-systems
23
- Inputs:
24
- H, W, focal: image height, width and focal length
25
- Outputs:
26
- directions: (H, W, 3), the direction of the rays in camera coordinate
27
- """
28
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
29
-
30
- i, j = grid.unbind(-1)
31
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
32
- # see https://github.com/bmild/nerf/issues/24
33
- cent = center if center is not None else [W / 2, H / 2]
34
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
35
-
36
- return directions
37
-
38
- def load_K_Rt_from_P(filename, P=None):
39
- if P is None:
40
- lines = open(filename).read().splitlines()
41
- if len(lines) == 4:
42
- lines = lines[1:]
43
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
44
- P = np.asarray(lines).astype(np.float32).squeeze()
45
-
46
- out = cv2.decomposeProjectionMatrix(P)
47
- K = out[0]
48
- R = out[1]
49
- t = out[2]
50
-
51
- K = K / K[2, 2]
52
- intrinsics = np.eye(4)
53
- intrinsics[:3, :3] = K
54
-
55
- pose = np.eye(4, dtype=np.float32)
56
- pose[:3, :3] = R.transpose() # ? why need transpose here
57
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
58
-
59
- return intrinsics, pose # ! return cam2world matrix here
60
-
61
-
62
- # ! load one ref-image with multiple src-images in camera coordinate system
63
- class BlenderPerView(Dataset):
64
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
65
- split_filepath=None, pair_filepath=None,
66
- N_rays=512,
67
- vol_dims=[128, 128, 128], batch_size=1,
68
- clean_image=False, importance_sample=False, test_ref_views=[]):
69
-
70
- # print("root_dir: ", root_dir)
71
- self.root_dir = root_dir
72
- self.split = split
73
-
74
- self.n_views = n_views
75
- self.N_rays = N_rays
76
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
77
-
78
- self.clean_image = clean_image
79
- self.importance_sample = importance_sample
80
- self.test_ref_views = test_ref_views # used for testing
81
- self.scale_factor = 1.0
82
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
83
-
84
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
85
- with open(lvis_json_path, 'r') as f:
86
- lvis_paths = json.load(f)
87
- if self.split == 'train':
88
- self.lvis_paths = lvis_paths['train']
89
- else:
90
- self.lvis_paths = lvis_paths['val']
91
- if img_wh is not None:
92
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
93
- 'img_wh must both be multiples of 32!'
94
-
95
-
96
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_wide_pose.json"
97
- with open(pose_json_path, 'r') as f:
98
- meta = json.load(f)
99
-
100
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7"
101
- self.img_wh = (256, 256)
102
- self.input_poses = np.array(list(meta["c2ws"].values()))
103
- intrinsic = np.eye(4)
104
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
105
- self.intrinsic = intrinsic
106
- self.near_far = np.array(meta["near_far"])
107
-
108
-
109
- self.define_transforms()
110
- self.blender2opencv = np.array(
111
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
112
- )
113
-
114
-
115
- self.c2ws = []
116
- self.w2cs = []
117
- self.near_fars = []
118
- # self.root_dir = root_dir
119
- for idx, img_id in enumerate(self.img_ids):
120
- pose = self.input_poses[idx]
121
- c2w = pose @ self.blender2opencv
122
- self.c2ws.append(c2w)
123
- self.w2cs.append(np.linalg.inv(c2w))
124
- self.near_fars.append(self.near_far)
125
- self.c2ws = np.stack(self.c2ws, axis=0)
126
- self.w2cs = np.stack(self.w2cs, axis=0)
127
-
128
-
129
- self.all_intrinsics = [] # the cam info of the whole scene
130
- self.all_extrinsics = []
131
- self.all_near_fars = []
132
- self.load_cam_info()
133
-
134
- # * bounding box for rendering
135
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
136
- self.bbox_max = np.array([1.0, 1.0, 1.0])
137
-
138
- # - used for cost volume regularization
139
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
140
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
141
-
142
-
143
- def define_transforms(self):
144
- self.transform = T.Compose([T.ToTensor()])
145
-
146
-
147
-
148
- def load_cam_info(self):
149
- for vid, img_id in enumerate(self.img_ids):
150
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
151
- self.all_intrinsics.append(intrinsic)
152
- self.all_extrinsics.append(extrinsic)
153
- self.all_near_fars.append(near_far)
154
-
155
- def read_depth(self, filename):
156
- depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
157
- depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
158
- interpolation=cv2.INTER_NEAREST) # (600, 800)
159
- depth_h = depth_h[44:556, 80:720] # (512, 640)
160
- depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
161
- interpolation=cv2.INTER_NEAREST)
162
- depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
163
- interpolation=cv2.INTER_NEAREST)
164
-
165
- return depth, depth_h
166
-
167
- def read_mask(self, filename):
168
- mask_h = cv2.imread(filename, 0)
169
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
170
- interpolation=cv2.INTER_NEAREST)
171
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
172
- interpolation=cv2.INTER_NEAREST)
173
-
174
- mask[mask > 0] = 1 # the masks stored in png are not binary
175
- mask_h[mask_h > 0] = 1
176
-
177
- return mask, mask_h
178
-
179
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
180
-
181
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
182
- # print("center", center)
183
- # print("radius", radius)
184
- # print("bounds", bounds)
185
- # import ipdb; ipdb.set_trace()
186
- radius = radius * factor
187
- scale_mat = np.diag([radius, radius, radius, 1.0])
188
- scale_mat[:3, 3] = center.cpu().numpy()
189
- scale_mat = scale_mat.astype(np.float32)
190
-
191
- return scale_mat, 1. / radius.cpu().numpy()
192
-
193
- def __len__(self):
194
- return 36*len(self.lvis_paths)
195
-
196
-
197
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
198
- pass
199
-
200
-
201
- def __getitem__(self, idx):
202
- sample = {}
203
-
204
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
205
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
206
-
207
-
208
- folder_uid_dict = self.lvis_paths[idx//36]
209
-
210
-
211
- folder_id = folder_uid_dict['folder_id']
212
- uid = folder_uid_dict['uid']
213
-
214
- idx = idx % 36 # [0, 35]
215
- gt_view_idx = idx // 12 # [0, 2]
216
- target_view_idx = idx % 12 # [0, 11]
217
-
218
-
219
-
220
- # target view
221
- c2w = self.c2ws[idx]
222
- w2c = np.linalg.inv(c2w)
223
- w2c_ref = w2c
224
- w2c_ref_inv = np.linalg.inv(w2c_ref)
225
-
226
- w2cs.append(w2c @ w2c_ref_inv)
227
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
228
-
229
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}_gt.png')
230
-
231
- depth_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}_gt_depth_mm.png')
232
-
233
-
234
- img = Image.open(img_filename)
235
-
236
- img = self.transform(img) # (4, h, w)
237
-
238
-
239
- if img.shape[0] == 4:
240
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
241
- imgs += [img]
242
-
243
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
244
- mask_h = depth_h > 0
245
- # print("valid pixels", np.sum(mask_h))
246
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
247
- surface_points = directions * depth_h[..., None] # [H, W, 3]
248
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
249
- depth_h = distance
250
-
251
-
252
- depths_h.append(depth_h)
253
- masks_h.append(mask_h)
254
-
255
- intrinsic = self.intrinsic
256
- intrinsics.append(intrinsic)
257
-
258
-
259
-
260
- near_fars.append(self.near_fars[idx])
261
- image_perm = 0 # only supervised on reference view
262
-
263
- mask_dilated = None
264
-
265
- # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
266
-
267
- idx_of_12 = idx - 12 * gt_view_idx # idx % 12
268
-
269
- src_views = list(i % 12 + 12 * gt_view_idx for i in range(idx_of_12 - 1-1, idx_of_12 + 2+1))
270
-
271
-
272
- for vid in src_views:
273
- # if vid == idx:
274
- # continue
275
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}.png')
276
-
277
- img = Image.open(img_filename)
278
- img_wh = self.img_wh
279
-
280
- img = self.transform(img)
281
- if img.shape[0] == 4:
282
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
283
-
284
- imgs += [img]
285
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
286
- depths_h.append(depth_h)
287
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
288
-
289
- near_fars.append(self.all_near_fars[vid])
290
- intrinsics.append(self.all_intrinsics[vid])
291
-
292
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
293
-
294
-
295
- # ! estimate scale_mat
296
- scale_mat, scale_factor = self.cal_scale_mat(
297
- img_hw=[img_wh[1], img_wh[0]],
298
- intrinsics=intrinsics, extrinsics=w2cs,
299
- near_fars=near_fars, factor=1.1
300
- )
301
- # print(scale_mat)
302
- # print(scale_factor)
303
- # ! calculate the new w2cs after scaling
304
- new_near_fars = []
305
- new_w2cs = []
306
- new_c2ws = []
307
- new_affine_mats = []
308
- new_depths_h = []
309
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
310
-
311
- P = intrinsic @ extrinsic @ scale_mat
312
- P = P[:3, :4]
313
- # - should use load_K_Rt_from_P() to obtain c2w
314
- c2w = load_K_Rt_from_P(None, P)[1]
315
- w2c = np.linalg.inv(c2w)
316
- new_w2cs.append(w2c)
317
- new_c2ws.append(c2w)
318
- affine_mat = np.eye(4)
319
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
320
- new_affine_mats.append(affine_mat)
321
-
322
- camera_o = c2w[:3, 3]
323
- dist = np.sqrt(np.sum(camera_o ** 2))
324
- near = dist - 1
325
- far = dist + 1
326
-
327
- new_near_fars.append([0.95 * near, 1.05 * far])
328
- new_depths_h.append(depth * scale_factor)
329
-
330
- # print(new_near_fars)
331
- imgs = torch.stack(imgs).float()
332
- depths_h = np.stack(new_depths_h)
333
- masks_h = np.stack(masks_h)
334
-
335
- affine_mats = np.stack(new_affine_mats)
336
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
337
- new_near_fars)
338
-
339
- if self.split == 'train':
340
- start_idx = 0
341
- else:
342
- start_idx = 1
343
-
344
- view_ids = [idx] + list(src_views)
345
-
346
- sample['images'] = imgs # (V, 3, H, W)
347
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
348
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
349
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
350
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
351
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
352
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
353
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
354
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
355
-
356
- # sample['light_idx'] = torch.tensor(light_idx)
357
- sample['scan'] = folder_id
358
-
359
- sample['scale_factor'] = torch.tensor(scale_factor)
360
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
361
- sample['render_img_idx'] = torch.tensor(image_perm)
362
- sample['partial_vol_origin'] = self.partial_vol_origin
363
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
364
-
365
-
366
- # - image to render
367
- sample['query_image'] = sample['images'][0]
368
- sample['query_c2w'] = sample['c2ws'][0]
369
- sample['query_w2c'] = sample['w2cs'][0]
370
- sample['query_intrinsic'] = sample['intrinsics'][0]
371
- sample['query_depth'] = sample['depths_h'][0]
372
- sample['query_mask'] = sample['masks_h'][0]
373
- sample['query_near_far'] = sample['near_fars'][0]
374
-
375
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
376
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
377
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
378
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
379
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
380
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
381
- sample['view_ids'] = sample['view_ids'][start_idx:]
382
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
383
-
384
- sample['scale_mat'] = torch.from_numpy(scale_mat)
385
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
386
-
387
- # - generate rays
388
- if ('val' in self.split) or ('test' in self.split):
389
- sample_rays = gen_rays_from_single_image(
390
- img_wh[1], img_wh[0],
391
- sample['query_image'],
392
- sample['query_intrinsic'],
393
- sample['query_c2w'],
394
- depth=sample['query_depth'],
395
- mask=sample['query_mask'] if self.clean_image else None)
396
-
397
- else:
398
- sample_rays = gen_random_rays_from_single_image(
399
- img_wh[1], img_wh[0],
400
- self.N_rays,
401
- sample['query_image'],
402
- sample['query_intrinsic'],
403
- sample['query_c2w'],
404
- depth=sample['query_depth'],
405
- mask=sample['query_mask'] if self.clean_image else None,
406
- dilated_mask=mask_dilated,
407
- importance_sample=self.importance_sample)
408
-
409
-
410
- sample['rays'] = sample_rays
411
-
412
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_3.py DELETED
@@ -1,406 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_2stage_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
-
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
155
- depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
156
- interpolation=cv2.INTER_NEAREST) # (600, 800)
157
- depth_h = depth_h[44:556, 80:720] # (512, 640)
158
- depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- return depth, depth_h
164
-
165
- def read_mask(self, filename):
166
- mask_h = cv2.imread(filename, 0)
167
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
168
- interpolation=cv2.INTER_NEAREST)
169
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
170
- interpolation=cv2.INTER_NEAREST)
171
-
172
- mask[mask > 0] = 1 # the masks stored in png are not binary
173
- mask_h[mask_h > 0] = 1
174
-
175
- return mask, mask_h
176
-
177
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
178
-
179
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
180
- # print("center", center)
181
- # print("radius", radius)
182
- # print("bounds", bounds)
183
- # import ipdb; ipdb.set_trace()
184
- radius = radius * factor
185
- scale_mat = np.diag([radius, radius, radius, 1.0])
186
- scale_mat[:3, 3] = center.cpu().numpy()
187
- scale_mat = scale_mat.astype(np.float32)
188
-
189
- return scale_mat, 1. / radius.cpu().numpy()
190
-
191
- def __len__(self):
192
- return 6*len(self.lvis_paths)
193
-
194
-
195
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
196
- pass
197
-
198
-
199
- def __getitem__(self, idx):
200
- sample = {}
201
-
202
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
203
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
204
-
205
-
206
- folder_uid_dict = self.lvis_paths[idx//6]
207
- idx = idx % 6
208
-
209
- folder_id = folder_uid_dict['folder_id']
210
- uid = folder_uid_dict['uid']
211
-
212
- # idx = idx % 24 # [0, 23]
213
-
214
-
215
-
216
- # target view
217
- c2w = self.c2ws[idx]
218
- w2c = np.linalg.inv(c2w)
219
- w2c_ref = w2c
220
- w2c_ref_inv = np.linalg.inv(w2c_ref)
221
-
222
- w2cs.append(w2c @ w2c_ref_inv)
223
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
224
-
225
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_gt.png')
226
-
227
- depth_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_gt_depth_mm.png')
228
-
229
-
230
- img = Image.open(img_filename)
231
-
232
- img = self.transform(img) # (4, h, w)
233
-
234
-
235
- if img.shape[0] == 4:
236
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
237
- imgs += [img]
238
-
239
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
240
- mask_h = depth_h > 0
241
- # print("valid pixels", np.sum(mask_h))
242
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
243
- surface_points = directions * depth_h[..., None] # [H, W, 3]
244
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
245
- depth_h = distance
246
-
247
-
248
- depths_h.append(depth_h)
249
- masks_h.append(mask_h)
250
-
251
- intrinsic = self.intrinsic
252
- intrinsics.append(intrinsic)
253
-
254
-
255
-
256
- near_fars.append(self.near_fars[idx])
257
- image_perm = 0 # only supervised on reference view
258
-
259
- mask_dilated = None
260
-
261
- # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
262
-
263
-
264
- src_views = range(6+idx*4, 6+(idx+1)*4)
265
-
266
- for vid in src_views:
267
- # if vid == idx:
268
- # continue
269
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_{vid % 4}.png')
270
-
271
- img = Image.open(img_filename)
272
- img_wh = self.img_wh
273
-
274
- img = self.transform(img)
275
- if img.shape[0] == 4:
276
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
277
-
278
- imgs += [img]
279
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
280
- depths_h.append(depth_h)
281
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
282
-
283
- near_fars.append(self.all_near_fars[vid])
284
- intrinsics.append(self.all_intrinsics[vid])
285
-
286
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
287
-
288
-
289
- # ! estimate scale_mat
290
- scale_mat, scale_factor = self.cal_scale_mat(
291
- img_hw=[img_wh[1], img_wh[0]],
292
- intrinsics=intrinsics, extrinsics=w2cs,
293
- near_fars=near_fars, factor=1.1
294
- )
295
- # print(scale_mat)
296
- # print(scale_factor)
297
- # ! calculate the new w2cs after scaling
298
- new_near_fars = []
299
- new_w2cs = []
300
- new_c2ws = []
301
- new_affine_mats = []
302
- new_depths_h = []
303
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
304
-
305
- P = intrinsic @ extrinsic @ scale_mat
306
- P = P[:3, :4]
307
- # - should use load_K_Rt_from_P() to obtain c2w
308
- c2w = load_K_Rt_from_P(None, P)[1]
309
- w2c = np.linalg.inv(c2w)
310
- new_w2cs.append(w2c)
311
- new_c2ws.append(c2w)
312
- affine_mat = np.eye(4)
313
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
314
- new_affine_mats.append(affine_mat)
315
-
316
- camera_o = c2w[:3, 3]
317
- dist = np.sqrt(np.sum(camera_o ** 2))
318
- near = dist - 1
319
- far = dist + 1
320
-
321
- new_near_fars.append([0.95 * near, 1.05 * far])
322
- new_depths_h.append(depth * scale_factor)
323
-
324
- # print(new_near_fars)
325
- imgs = torch.stack(imgs).float()
326
- depths_h = np.stack(new_depths_h)
327
- masks_h = np.stack(masks_h)
328
-
329
- affine_mats = np.stack(new_affine_mats)
330
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
331
- new_near_fars)
332
-
333
- if self.split == 'train':
334
- start_idx = 0
335
- else:
336
- start_idx = 1
337
-
338
- view_ids = [idx] + list(src_views)
339
-
340
- sample['images'] = imgs # (V, 3, H, W)
341
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
342
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
343
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
344
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
345
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
346
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
347
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
348
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
349
-
350
- # sample['light_idx'] = torch.tensor(light_idx)
351
- sample['scan'] = folder_id
352
-
353
- sample['scale_factor'] = torch.tensor(scale_factor)
354
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
355
- sample['render_img_idx'] = torch.tensor(image_perm)
356
- sample['partial_vol_origin'] = self.partial_vol_origin
357
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
358
-
359
-
360
- # - image to render
361
- sample['query_image'] = sample['images'][0]
362
- sample['query_c2w'] = sample['c2ws'][0]
363
- sample['query_w2c'] = sample['w2cs'][0]
364
- sample['query_intrinsic'] = sample['intrinsics'][0]
365
- sample['query_depth'] = sample['depths_h'][0]
366
- sample['query_mask'] = sample['masks_h'][0]
367
- sample['query_near_far'] = sample['near_fars'][0]
368
-
369
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
370
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
371
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
372
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
373
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
374
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
375
- sample['view_ids'] = sample['view_ids'][start_idx:]
376
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
377
-
378
- sample['scale_mat'] = torch.from_numpy(scale_mat)
379
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
380
-
381
- # - generate rays
382
- if ('val' in self.split) or ('test' in self.split):
383
- sample_rays = gen_rays_from_single_image(
384
- img_wh[1], img_wh[0],
385
- sample['query_image'],
386
- sample['query_intrinsic'],
387
- sample['query_c2w'],
388
- depth=sample['query_depth'],
389
- mask=sample['query_mask'] if self.clean_image else None)
390
-
391
- else:
392
- sample_rays = gen_random_rays_from_single_image(
393
- img_wh[1], img_wh[0],
394
- self.N_rays,
395
- sample['query_image'],
396
- sample['query_intrinsic'],
397
- sample['query_c2w'],
398
- depth=sample['query_depth'],
399
- mask=sample['query_mask'] if self.clean_image else None,
400
- dilated_mask=mask_dilated,
401
- importance_sample=self.importance_sample)
402
-
403
-
404
- sample['rays'] = sample_rays
405
-
406
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_4.py DELETED
@@ -1,411 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_2stage_5pred_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
-
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
155
- depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
156
- interpolation=cv2.INTER_NEAREST) # (600, 800)
157
- depth_h = depth_h[44:556, 80:720] # (512, 640)
158
- depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- return depth, depth_h
164
-
165
- def read_mask(self, filename):
166
- mask_h = cv2.imread(filename, 0)
167
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
168
- interpolation=cv2.INTER_NEAREST)
169
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
170
- interpolation=cv2.INTER_NEAREST)
171
-
172
- mask[mask > 0] = 1 # the masks stored in png are not binary
173
- mask_h[mask_h > 0] = 1
174
-
175
- return mask, mask_h
176
-
177
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
178
-
179
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
180
- # print("center", center)
181
- # print("radius", radius)
182
- # print("bounds", bounds)
183
- # import ipdb; ipdb.set_trace()
184
- radius = radius * factor
185
- scale_mat = np.diag([radius, radius, radius, 1.0])
186
- scale_mat[:3, 3] = center.cpu().numpy()
187
- scale_mat = scale_mat.astype(np.float32)
188
-
189
- return scale_mat, 1. / radius.cpu().numpy()
190
-
191
- def __len__(self):
192
- return 6*len(self.lvis_paths)
193
-
194
-
195
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
196
- pass
197
-
198
-
199
- def __getitem__(self, idx):
200
- sample = {}
201
-
202
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
203
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
204
-
205
-
206
- folder_uid_dict = self.lvis_paths[idx//6]
207
- idx = idx % 6
208
-
209
- folder_id = folder_uid_dict['folder_id']
210
- uid = folder_uid_dict['uid']
211
-
212
- # idx = idx % 24 # [0, 23]
213
-
214
-
215
-
216
- # target view
217
- c2w = self.c2ws[idx]
218
- w2c = np.linalg.inv(c2w)
219
- w2c_ref = w2c
220
- w2c_ref_inv = np.linalg.inv(w2c_ref)
221
-
222
- w2cs.append(w2c @ w2c_ref_inv)
223
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
224
-
225
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage", folder_id, uid, f'view_0_{idx}_gt.png')
226
-
227
- depth_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage", folder_id, uid, f'view_0_{idx}_gt_depth_mm.png')
228
-
229
-
230
- img = Image.open(img_filename)
231
-
232
- img = self.transform(img) # (4, h, w)
233
-
234
- # print("img_pre", img.shape)
235
- if img.shape[0] == 4:
236
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
237
- # print("img", img.shape)
238
- imgs += [img]
239
-
240
-
241
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
242
- mask_h = depth_h > 0
243
- # print("valid pixels", np.sum(mask_h))
244
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
245
- surface_points = directions * depth_h[..., None] # [H, W, 3]
246
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
247
- depth_h = distance
248
- # print("depth_h", depth_h.shape)
249
-
250
- depths_h.append(depth_h)
251
- masks_h.append(mask_h)
252
-
253
- intrinsic = self.intrinsic
254
- intrinsics.append(intrinsic)
255
-
256
-
257
- near_fars.append(self.near_fars[idx])
258
- image_perm = 0 # only supervised on reference view
259
-
260
- mask_dilated = None
261
-
262
- # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
263
-
264
-
265
- src_views = range(6+idx*4, 6+(idx+1)*4)
266
-
267
- for vid in src_views:
268
- # if vid == idx:
269
- # continue
270
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_{vid % 4 + 1}.png')
271
-
272
- img = Image.open(img_filename)
273
- img_wh = self.img_wh
274
-
275
- img = self.transform(img)
276
- # print("img shape1: ", img.shape)
277
- if img.shape[0] == 4:
278
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
279
- # print("img shape2: ", img.shape)
280
- imgs += [img]
281
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
282
- depths_h.append(depth_h)
283
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
284
-
285
- near_fars.append(self.all_near_fars[vid])
286
- intrinsics.append(self.all_intrinsics[vid])
287
-
288
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
289
-
290
-
291
- # ! estimate scale_mat
292
- scale_mat, scale_factor = self.cal_scale_mat(
293
- img_hw=[img_wh[1], img_wh[0]],
294
- intrinsics=intrinsics, extrinsics=w2cs,
295
- near_fars=near_fars, factor=1.1
296
- )
297
- # print(scale_mat)
298
- # print(scale_factor)
299
- # ! calculate the new w2cs after scaling
300
- new_near_fars = []
301
- new_w2cs = []
302
- new_c2ws = []
303
- new_affine_mats = []
304
- new_depths_h = []
305
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
306
-
307
- P = intrinsic @ extrinsic @ scale_mat
308
- P = P[:3, :4]
309
- # - should use load_K_Rt_from_P() to obtain c2w
310
- c2w = load_K_Rt_from_P(None, P)[1]
311
- w2c = np.linalg.inv(c2w)
312
- new_w2cs.append(w2c)
313
- new_c2ws.append(c2w)
314
- affine_mat = np.eye(4)
315
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
316
- new_affine_mats.append(affine_mat)
317
-
318
- camera_o = c2w[:3, 3]
319
- dist = np.sqrt(np.sum(camera_o ** 2))
320
- near = dist - 1
321
- far = dist + 1
322
-
323
- new_near_fars.append([0.95 * near, 1.05 * far])
324
- new_depths_h.append(depth * scale_factor)
325
-
326
- # print(new_near_fars)
327
- # print("imgs: ", len(imgs))
328
- # print("img1 shape:", imgs[0].shape)
329
- # print("img2 shape:", imgs[1].shape)
330
- imgs = torch.stack(imgs).float()
331
- depths_h = np.stack(new_depths_h)
332
- masks_h = np.stack(masks_h)
333
-
334
- affine_mats = np.stack(new_affine_mats)
335
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
336
- new_near_fars)
337
-
338
- if self.split == 'train':
339
- start_idx = 0
340
- else:
341
- start_idx = 1
342
-
343
- view_ids = [idx] + list(src_views)
344
-
345
- sample['images'] = imgs # (V, 3, H, W)
346
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
347
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
348
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
349
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
350
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
351
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
352
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
353
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
354
-
355
- # sample['light_idx'] = torch.tensor(light_idx)
356
- sample['scan'] = folder_id
357
-
358
- sample['scale_factor'] = torch.tensor(scale_factor)
359
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
360
- sample['render_img_idx'] = torch.tensor(image_perm)
361
- sample['partial_vol_origin'] = self.partial_vol_origin
362
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
363
-
364
-
365
- # - image to render
366
- sample['query_image'] = sample['images'][0]
367
- sample['query_c2w'] = sample['c2ws'][0]
368
- sample['query_w2c'] = sample['w2cs'][0]
369
- sample['query_intrinsic'] = sample['intrinsics'][0]
370
- sample['query_depth'] = sample['depths_h'][0]
371
- sample['query_mask'] = sample['masks_h'][0]
372
- sample['query_near_far'] = sample['near_fars'][0]
373
-
374
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
375
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
376
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
377
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
378
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
379
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
380
- sample['view_ids'] = sample['view_ids'][start_idx:]
381
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
382
-
383
- sample['scale_mat'] = torch.from_numpy(scale_mat)
384
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
385
-
386
- # - generate rays
387
- if ('val' in self.split) or ('test' in self.split):
388
- sample_rays = gen_rays_from_single_image(
389
- img_wh[1], img_wh[0],
390
- sample['query_image'],
391
- sample['query_intrinsic'],
392
- sample['query_c2w'],
393
- depth=sample['query_depth'],
394
- mask=sample['query_mask'] if self.clean_image else None)
395
-
396
- else:
397
- sample_rays = gen_random_rays_from_single_image(
398
- img_wh[1], img_wh[0],
399
- self.N_rays,
400
- sample['query_image'],
401
- sample['query_intrinsic'],
402
- sample['query_c2w'],
403
- depth=sample['query_depth'],
404
- mask=sample['query_mask'] if self.clean_image else None,
405
- dilated_mask=mask_dilated,
406
- importance_sample=self.importance_sample)
407
-
408
-
409
- sample['rays'] = sample_rays
410
-
411
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_4_narrow_and_4_2_stage_mix.py DELETED
@@ -1,480 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- self.root_dir = root_dir
70
- self.split = split
71
- self.imgs_per_instance = 16
72
- self.n_views = n_views
73
- self.N_rays = N_rays
74
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
-
76
- self.clean_image = clean_image
77
- self.importance_sample = importance_sample
78
- self.test_ref_views = test_ref_views # used for testing
79
- self.scale_factor = 1.0
80
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
-
82
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
83
- with open(lvis_json_path, 'r') as f:
84
- lvis_paths = json.load(f)
85
- if self.split == 'train':
86
- self.lvis_paths = lvis_paths['train']
87
- else:
88
- self.lvis_paths = lvis_paths['val']
89
- if img_wh is not None:
90
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
- 'img_wh must both be multiples of 32!'
92
-
93
-
94
- pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
- with open(pose_json_path_narrow, 'r') as f:
96
- narrow_meta = json.load(f)
97
-
98
- pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
99
- with open(pose_json_path_two_stage, 'r') as f:
100
- two_stage_meta = json.load(f)
101
-
102
-
103
- self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4)
104
- self.img_wh = (256, 256)
105
- self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
106
- intrinsic = np.eye(4)
107
- assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
108
- intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
109
- self.intrinsic = intrinsic
110
- assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
111
- self.near_far = np.array(narrow_meta["near_far"])
112
- self.near_far[1] = 1.8
113
- self.define_transforms()
114
- self.blender2opencv = np.array(
115
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
- )
117
-
118
-
119
- self.c2ws = []
120
- self.w2cs = []
121
- self.near_fars = []
122
- for idx, img_id in enumerate(self.img_ids):
123
- pose = self.input_poses[idx]
124
- c2w = pose @ self.blender2opencv
125
- self.c2ws.append(c2w)
126
- self.w2cs.append(np.linalg.inv(c2w))
127
- self.near_fars.append(self.near_far)
128
-
129
-
130
-
131
- self.c2ws = np.stack(self.c2ws, axis=0)
132
- self.w2cs = np.stack(self.w2cs, axis=0)
133
-
134
-
135
- self.all_intrinsics = [] # the cam info of the whole scene
136
- self.all_extrinsics = []
137
- self.all_near_fars = []
138
- self.load_cam_info()
139
-
140
- # * bounding box for rendering
141
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
- self.bbox_max = np.array([1.0, 1.0, 1.0])
143
-
144
- # - used for cost volume regularization
145
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
-
148
-
149
- def define_transforms(self):
150
- self.transform = T.Compose([T.ToTensor()])
151
-
152
-
153
-
154
- def load_cam_info(self):
155
- for vid, img_id in enumerate(self.img_ids):
156
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
- self.all_intrinsics.append(intrinsic)
158
- self.all_extrinsics.append(extrinsic)
159
- self.all_near_fars.append(near_far)
160
-
161
- def read_depth(self, filename):
162
- pass
163
-
164
- def read_mask(self, filename):
165
- mask_h = cv2.imread(filename, 0)
166
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
- interpolation=cv2.INTER_NEAREST)
168
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
- interpolation=cv2.INTER_NEAREST)
170
-
171
- mask[mask > 0] = 1 # the masks stored in png are not binary
172
- mask_h[mask_h > 0] = 1
173
-
174
- return mask, mask_h
175
-
176
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
-
178
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
-
180
- radius = radius * factor
181
- scale_mat = np.diag([radius, radius, radius, 1.0])
182
- scale_mat[:3, 3] = center.cpu().numpy()
183
- scale_mat = scale_mat.astype(np.float32)
184
-
185
- return scale_mat, 1. / radius.cpu().numpy()
186
-
187
- def __len__(self):
188
- return self.imgs_per_instance * len(self.lvis_paths)
189
-
190
-
191
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
- pass
193
-
194
-
195
- def __getitem__(self, idx):
196
- sample = {}
197
- origin_idx = idx
198
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
- idx_original=idx
201
-
202
- folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
203
-
204
- folder_id = folder_uid_dict['folder_id']
205
- uid = folder_uid_dict['uid']
206
-
207
- if idx % 2 == 0:
208
- valid_list = [0, 2, 4, 6]
209
- else:
210
- valid_list = [1, 3, 5, 7]
211
-
212
- if idx % 16 < 8:
213
- idx = idx % 16 # [0, 7]
214
- # target view
215
- c2w = self.c2ws[idx]
216
- w2c = np.linalg.inv(c2w)
217
- w2c_ref = w2c
218
- w2c_ref_inv = np.linalg.inv(w2c_ref)
219
-
220
- w2cs.append(w2c @ w2c_ref_inv)
221
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
222
-
223
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
224
-
225
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
226
-
227
-
228
- img = Image.open(img_filename)
229
-
230
- img = self.transform(img) # (4, h, w)
231
-
232
-
233
- if img.shape[0] == 4:
234
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
235
- imgs += [img]
236
-
237
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
238
- mask_h = depth_h > 0
239
- # print("valid pixels", np.sum(mask_h))
240
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
241
- surface_points = directions * depth_h[..., None] # [H, W, 3]
242
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
243
- depth_h = distance
244
-
245
-
246
- depths_h.append(depth_h)
247
- masks_h.append(mask_h)
248
-
249
- intrinsic = self.intrinsic
250
- intrinsics.append(intrinsic)
251
-
252
-
253
- near_fars.append(self.near_fars[idx])
254
- image_perm = 0 # only supervised on reference view
255
-
256
- mask_dilated = None
257
-
258
- # src_views = range(8+idx*4, 8+(idx+1)*4)
259
-
260
- src_views = range(8, 8 + 8 * 4)
261
- src_views_used = []
262
- for vid in src_views:
263
- view_dix_to_use = (vid - 8) // 4
264
- if view_dix_to_use not in valid_list:
265
- continue
266
- src_views_used.append(vid)
267
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
268
-
269
- img = Image.open(img_filename)
270
- img_wh = self.img_wh
271
-
272
- img = self.transform(img)
273
- if img.shape[0] == 4:
274
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
275
-
276
- imgs += [img]
277
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
278
- depths_h.append(depth_h)
279
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
280
-
281
- near_fars.append(self.all_near_fars[vid])
282
- intrinsics.append(self.all_intrinsics[vid])
283
-
284
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
285
-
286
- else:
287
- idx = idx % 16 - 8 # [0, 7]
288
-
289
- c2w = self.c2ws[idx + 40]
290
- w2c = np.linalg.inv(c2w)
291
- w2c_ref = w2c
292
- w2c_ref_inv = np.linalg.inv(w2c_ref)
293
-
294
- w2cs.append(w2c @ w2c_ref_inv)
295
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
296
-
297
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
298
-
299
-
300
-
301
- img = Image.open(img_filename)
302
-
303
- img = self.transform(img) # (4, h, w)
304
-
305
- # print("img_pre", img.shape)
306
- if img.shape[0] == 4:
307
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
308
- # print("img", img.shape)
309
- imgs += [img]
310
-
311
-
312
- depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
313
- depth_h = depth_h.fill_(-1.0)
314
- # depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0)
315
- # print("depth_h", depth_h.shape)
316
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
317
- depths_h.append(depth_h)
318
- masks_h.append(mask_h)
319
-
320
- intrinsic = self.intrinsic
321
- intrinsics.append(intrinsic)
322
-
323
-
324
- near_fars.append(self.near_fars[idx])
325
- image_perm = 0 # only supervised on reference view
326
-
327
- mask_dilated = None
328
-
329
-
330
-
331
- src_views = range(40+8, 40+8+32)
332
- src_views_used = []
333
- for vid in src_views:
334
- view_dix_to_use = (vid - 40 - 8) // 4
335
- if view_dix_to_use not in valid_list:
336
- continue
337
- src_views_used.append(vid)
338
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-48) % 4 + 1}.png')
339
-
340
- img = Image.open(img_filename)
341
- img_wh = self.img_wh
342
-
343
- img = self.transform(img)
344
- # print("img shape1: ", img.shape)
345
- if img.shape[0] == 4:
346
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
347
- # print("img shape2: ", img.shape)
348
- imgs += [img]
349
- depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
350
- depth_h = depth_h.fill_(-1.0)
351
- depths_h.append(depth_h)
352
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
353
-
354
- near_fars.append(self.all_near_fars[vid])
355
- intrinsics.append(self.all_intrinsics[vid])
356
-
357
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
358
-
359
-
360
- scale_mat, scale_factor = self.cal_scale_mat(
361
- img_hw=[img_wh[1], img_wh[0]],
362
- intrinsics=intrinsics, extrinsics=w2cs,
363
- near_fars=near_fars, factor=1.1
364
- )
365
-
366
-
367
- new_near_fars = []
368
- new_w2cs = []
369
- new_c2ws = []
370
- new_affine_mats = []
371
- new_depths_h = []
372
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
373
-
374
- P = intrinsic @ extrinsic @ scale_mat
375
- P = P[:3, :4]
376
- # - should use load_K_Rt_from_P() to obtain c2w
377
- c2w = load_K_Rt_from_P(None, P)[1]
378
- w2c = np.linalg.inv(c2w)
379
- new_w2cs.append(w2c)
380
- new_c2ws.append(c2w)
381
- affine_mat = np.eye(4)
382
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
383
- new_affine_mats.append(affine_mat)
384
-
385
- camera_o = c2w[:3, 3]
386
- dist = np.sqrt(np.sum(camera_o ** 2))
387
- near = dist - 1
388
- far = dist + 1
389
-
390
- new_near_fars.append([0.95 * near, 1.05 * far])
391
-
392
- new_depths_h.append(depth * scale_factor)
393
-
394
- # print(new_near_fars)
395
- # print("img numeber: ", len(imgs))
396
- imgs = torch.stack(imgs).float()
397
- depths_h = np.stack(new_depths_h)
398
- masks_h = np.stack(masks_h)
399
-
400
- affine_mats = np.stack(new_affine_mats)
401
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
402
- new_near_fars)
403
-
404
- if self.split == 'train':
405
- start_idx = 0
406
- else:
407
- start_idx = 1
408
-
409
- view_ids = [idx_original % self.imgs_per_instance] + src_views_used
410
- sample['origin_idx'] = origin_idx
411
- sample['images'] = imgs # (V, 3, H, W)
412
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
413
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
414
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
415
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
416
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
417
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
418
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
419
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
420
-
421
- # sample['light_idx'] = torch.tensor(light_idx)
422
- sample['scan'] = folder_id
423
-
424
- sample['scale_factor'] = torch.tensor(scale_factor)
425
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
426
- sample['render_img_idx'] = torch.tensor(image_perm)
427
- sample['partial_vol_origin'] = self.partial_vol_origin
428
- if view_ids[0] < 8:
429
- meta_end = "_narrow"+ "_refview" + str(view_ids[0])
430
- else:
431
- meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
432
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
433
-
434
-
435
- # - image to render
436
- sample['query_image'] = sample['images'][0]
437
- sample['query_c2w'] = sample['c2ws'][0]
438
- sample['query_w2c'] = sample['w2cs'][0]
439
- sample['query_intrinsic'] = sample['intrinsics'][0]
440
- sample['query_depth'] = sample['depths_h'][0]
441
- sample['query_mask'] = sample['masks_h'][0]
442
- sample['query_near_far'] = sample['near_fars'][0]
443
-
444
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
445
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
446
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
447
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
448
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
449
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
450
- sample['view_ids'] = sample['view_ids'][start_idx:]
451
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
452
-
453
- sample['scale_mat'] = torch.from_numpy(scale_mat)
454
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
455
-
456
- # - generate rays
457
- if ('val' in self.split) or ('test' in self.split):
458
- sample_rays = gen_rays_from_single_image(
459
- img_wh[1], img_wh[0],
460
- sample['query_image'],
461
- sample['query_intrinsic'],
462
- sample['query_c2w'],
463
- depth=sample['query_depth'],
464
- mask=sample['query_mask'] if self.clean_image else None)
465
- else:
466
- sample_rays = gen_random_rays_from_single_image(
467
- img_wh[1], img_wh[0],
468
- self.N_rays,
469
- sample['query_image'],
470
- sample['query_intrinsic'],
471
- sample['query_c2w'],
472
- depth=sample['query_depth'],
473
- mask=sample['query_mask'] if self.clean_image else None,
474
- dilated_mask=mask_dilated,
475
- importance_sample=self.importance_sample)
476
-
477
-
478
- sample['rays'] = sample_rays
479
-
480
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_4_narrow_and_6_2_stage_mix.py DELETED
@@ -1,476 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- self.root_dir = root_dir
70
- self.split = split
71
-
72
- self.n_views = n_views
73
- self.N_rays = N_rays
74
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
-
76
- self.clean_image = clean_image
77
- self.importance_sample = importance_sample
78
- self.test_ref_views = test_ref_views # used for testing
79
- self.scale_factor = 1.0
80
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
-
82
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
83
- with open(lvis_json_path, 'r') as f:
84
- lvis_paths = json.load(f)
85
- if self.split == 'train':
86
- self.lvis_paths = lvis_paths['train']
87
- else:
88
- self.lvis_paths = lvis_paths['val']
89
- if img_wh is not None:
90
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
- 'img_wh must both be multiples of 32!'
92
-
93
-
94
- pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
- with open(pose_json_path_narrow, 'r') as f:
96
- narrow_meta = json.load(f)
97
-
98
- pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_5pred_pose.json"
99
- with open(pose_json_path_two_stage, 'r') as f:
100
- two_stage_meta = json.load(f)
101
-
102
-
103
- self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (6 + 6*4)
104
- self.img_wh = (256, 256)
105
- self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
106
- intrinsic = np.eye(4)
107
- assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
108
- intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
109
- self.intrinsic = intrinsic
110
- assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
111
- self.near_far = np.array(narrow_meta["near_far"])
112
- self.near_far[1] = 1.8
113
- self.define_transforms()
114
- self.blender2opencv = np.array(
115
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
- )
117
-
118
-
119
- self.c2ws = []
120
- self.w2cs = []
121
- self.near_fars = []
122
- for idx, img_id in enumerate(self.img_ids):
123
- pose = self.input_poses[idx]
124
- c2w = pose @ self.blender2opencv
125
- self.c2ws.append(c2w)
126
- self.w2cs.append(np.linalg.inv(c2w))
127
- self.near_fars.append(self.near_far)
128
-
129
-
130
-
131
- self.c2ws = np.stack(self.c2ws, axis=0)
132
- self.w2cs = np.stack(self.w2cs, axis=0)
133
-
134
-
135
- self.all_intrinsics = [] # the cam info of the whole scene
136
- self.all_extrinsics = []
137
- self.all_near_fars = []
138
- self.load_cam_info()
139
-
140
- # * bounding box for rendering
141
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
- self.bbox_max = np.array([1.0, 1.0, 1.0])
143
-
144
- # - used for cost volume regularization
145
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
-
148
-
149
- def define_transforms(self):
150
- self.transform = T.Compose([T.ToTensor()])
151
-
152
-
153
-
154
- def load_cam_info(self):
155
- for vid, img_id in enumerate(self.img_ids):
156
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
- self.all_intrinsics.append(intrinsic)
158
- self.all_extrinsics.append(extrinsic)
159
- self.all_near_fars.append(near_far)
160
-
161
- def read_depth(self, filename):
162
- pass
163
-
164
- def read_mask(self, filename):
165
- mask_h = cv2.imread(filename, 0)
166
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
- interpolation=cv2.INTER_NEAREST)
168
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
- interpolation=cv2.INTER_NEAREST)
170
-
171
- mask[mask > 0] = 1 # the masks stored in png are not binary
172
- mask_h[mask_h > 0] = 1
173
-
174
- return mask, mask_h
175
-
176
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
-
178
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
-
180
- radius = radius * factor
181
- scale_mat = np.diag([radius, radius, radius, 1.0])
182
- scale_mat[:3, 3] = center.cpu().numpy()
183
- scale_mat = scale_mat.astype(np.float32)
184
-
185
- return scale_mat, 1. / radius.cpu().numpy()
186
-
187
- def __len__(self):
188
- return 12*len(self.lvis_paths)
189
-
190
-
191
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
- pass
193
-
194
-
195
- def __getitem__(self, idx):
196
- sample = {}
197
- origin_idx = idx
198
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
- idx_original=idx
201
-
202
- folder_uid_dict = self.lvis_paths[idx//12]
203
-
204
- folder_id = folder_uid_dict['folder_id']
205
- uid = folder_uid_dict['uid']
206
-
207
- if idx % 12 < 8:
208
- idx = idx % 12 # [0, 7]
209
- # target view
210
- c2w = self.c2ws[idx]
211
- w2c = np.linalg.inv(c2w)
212
- w2c_ref = w2c
213
- w2c_ref_inv = np.linalg.inv(w2c_ref)
214
-
215
- w2cs.append(w2c @ w2c_ref_inv)
216
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
217
-
218
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
219
-
220
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
221
-
222
-
223
- img = Image.open(img_filename)
224
-
225
- img = self.transform(img) # (4, h, w)
226
-
227
-
228
- if img.shape[0] == 4:
229
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
230
- imgs += [img]
231
-
232
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
233
- mask_h = depth_h > 0
234
- # print("valid pixels", np.sum(mask_h))
235
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
- surface_points = directions * depth_h[..., None] # [H, W, 3]
237
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
- depth_h = distance
239
-
240
-
241
- depths_h.append(depth_h)
242
- masks_h.append(mask_h)
243
-
244
- intrinsic = self.intrinsic
245
- intrinsics.append(intrinsic)
246
-
247
-
248
- near_fars.append(self.near_fars[idx])
249
- image_perm = 0 # only supervised on reference view
250
-
251
- mask_dilated = None
252
-
253
- # src_views = range(8+idx*4, 8+(idx+1)*4)
254
-
255
- src_views = range(8, 8 + 8 * 4)
256
- src_views_used = []
257
- for vid in src_views:
258
- if (vid // 4) % 2 != idx % 2:
259
- continue
260
- src_views_used.append(vid)
261
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
262
-
263
- img = Image.open(img_filename)
264
- img_wh = self.img_wh
265
-
266
- img = self.transform(img)
267
- if img.shape[0] == 4:
268
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
269
-
270
- imgs += [img]
271
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
272
- depths_h.append(depth_h)
273
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
274
-
275
- near_fars.append(self.all_near_fars[vid])
276
- intrinsics.append(self.all_intrinsics[vid])
277
-
278
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
279
-
280
- else:
281
- idx = idx % 12 - 8 # [0, 5]
282
- valid_list = [0, 2, 3, 5]
283
- idx = valid_list[idx] # [0, 3]
284
- c2w = self.c2ws[idx + 40]
285
- w2c = np.linalg.inv(c2w)
286
- w2c_ref = w2c
287
- w2c_ref_inv = np.linalg.inv(w2c_ref)
288
-
289
- w2cs.append(w2c @ w2c_ref_inv)
290
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
291
-
292
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_5pred/", folder_id, uid, f'view_0_{idx}_0.png')
293
-
294
-
295
-
296
- img = Image.open(img_filename)
297
-
298
- img = self.transform(img) # (4, h, w)
299
-
300
- # print("img_pre", img.shape)
301
- if img.shape[0] == 4:
302
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
303
- # print("img", img.shape)
304
- imgs += [img]
305
-
306
-
307
- depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
308
- depth_h = depth_h.fill_(-1.0)
309
- # depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0)
310
- # print("depth_h", depth_h.shape)
311
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
312
- depths_h.append(depth_h)
313
- masks_h.append(mask_h)
314
-
315
- intrinsic = self.intrinsic
316
- intrinsics.append(intrinsic)
317
-
318
-
319
- near_fars.append(self.near_fars[idx])
320
- image_perm = 0 # only supervised on reference view
321
-
322
- mask_dilated = None
323
-
324
- # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
325
-
326
-
327
- src_views = range(40+6, 40+6+24)
328
- src_views_used = []
329
- for vid in src_views:
330
- view_dix_to_use = (vid - 40 - 6) // 4
331
- if view_dix_to_use not in valid_list:
332
- continue
333
- src_views_used.append(vid)
334
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_5pred/", folder_id, uid, f'view_0_{idx}_{(vid-46) % 4 + 1}.png')
335
-
336
- img = Image.open(img_filename)
337
- img_wh = self.img_wh
338
-
339
- img = self.transform(img)
340
- # print("img shape1: ", img.shape)
341
- if img.shape[0] == 4:
342
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
343
- # print("img shape2: ", img.shape)
344
- imgs += [img]
345
- depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
346
- depth_h = depth_h.fill_(-1.0)
347
- depths_h.append(depth_h)
348
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
349
-
350
- near_fars.append(self.all_near_fars[vid])
351
- intrinsics.append(self.all_intrinsics[vid])
352
-
353
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
354
-
355
-
356
- scale_mat, scale_factor = self.cal_scale_mat(
357
- img_hw=[img_wh[1], img_wh[0]],
358
- intrinsics=intrinsics, extrinsics=w2cs,
359
- near_fars=near_fars, factor=1.1
360
- )
361
-
362
-
363
- new_near_fars = []
364
- new_w2cs = []
365
- new_c2ws = []
366
- new_affine_mats = []
367
- new_depths_h = []
368
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
369
-
370
- P = intrinsic @ extrinsic @ scale_mat
371
- P = P[:3, :4]
372
- # - should use load_K_Rt_from_P() to obtain c2w
373
- c2w = load_K_Rt_from_P(None, P)[1]
374
- w2c = np.linalg.inv(c2w)
375
- new_w2cs.append(w2c)
376
- new_c2ws.append(c2w)
377
- affine_mat = np.eye(4)
378
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
379
- new_affine_mats.append(affine_mat)
380
-
381
- camera_o = c2w[:3, 3]
382
- dist = np.sqrt(np.sum(camera_o ** 2))
383
- near = dist - 1
384
- far = dist + 1
385
-
386
- new_near_fars.append([0.95 * near, 1.05 * far])
387
-
388
- new_depths_h.append(depth * scale_factor)
389
-
390
- # print(new_near_fars)
391
- # print("img numeber: ", len(imgs))
392
- imgs = torch.stack(imgs).float()
393
- depths_h = np.stack(new_depths_h)
394
- masks_h = np.stack(masks_h)
395
-
396
- affine_mats = np.stack(new_affine_mats)
397
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
398
- new_near_fars)
399
-
400
- if self.split == 'train':
401
- start_idx = 0
402
- else:
403
- start_idx = 1
404
-
405
- view_ids = [idx_original % 12] + src_views_used
406
- sample['origin_idx'] = origin_idx
407
- sample['images'] = imgs # (V, 3, H, W)
408
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
409
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
410
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
411
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
412
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
413
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
414
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
415
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
416
-
417
- # sample['light_idx'] = torch.tensor(light_idx)
418
- sample['scan'] = folder_id
419
-
420
- sample['scale_factor'] = torch.tensor(scale_factor)
421
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
422
- sample['render_img_idx'] = torch.tensor(image_perm)
423
- sample['partial_vol_origin'] = self.partial_vol_origin
424
- if view_ids[0] < 8:
425
- meta_end = "_narrow"+ "_refview" + str(view_ids[0])
426
- else:
427
- meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
428
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
429
-
430
-
431
- # - image to render
432
- sample['query_image'] = sample['images'][0]
433
- sample['query_c2w'] = sample['c2ws'][0]
434
- sample['query_w2c'] = sample['w2cs'][0]
435
- sample['query_intrinsic'] = sample['intrinsics'][0]
436
- sample['query_depth'] = sample['depths_h'][0]
437
- sample['query_mask'] = sample['masks_h'][0]
438
- sample['query_near_far'] = sample['near_fars'][0]
439
-
440
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
441
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
442
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
443
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
444
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
445
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
446
- sample['view_ids'] = sample['view_ids'][start_idx:]
447
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
448
-
449
- sample['scale_mat'] = torch.from_numpy(scale_mat)
450
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
451
-
452
- # - generate rays
453
- if ('val' in self.split) or ('test' in self.split):
454
- sample_rays = gen_rays_from_single_image(
455
- img_wh[1], img_wh[0],
456
- sample['query_image'],
457
- sample['query_intrinsic'],
458
- sample['query_c2w'],
459
- depth=sample['query_depth'],
460
- mask=sample['query_mask'] if self.clean_image else None)
461
- else:
462
- sample_rays = gen_random_rays_from_single_image(
463
- img_wh[1], img_wh[0],
464
- self.N_rays,
465
- sample['query_image'],
466
- sample['query_intrinsic'],
467
- sample['query_c2w'],
468
- depth=sample['query_depth'],
469
- mask=sample['query_mask'] if self.clean_image else None,
470
- dilated_mask=mask_dilated,
471
- importance_sample=self.importance_sample)
472
-
473
-
474
- sample['rays'] = sample_rays
475
-
476
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_6_narrow_and_6_2_stage_blend_mix.py DELETED
@@ -1,449 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- self.root_dir = root_dir
70
- self.split = split
71
- if self.split == 'train':
72
- self.imgs_per_instance = 12
73
- else:
74
- self.imgs_per_instance = 16
75
- self.n_views = n_views
76
- self.N_rays = N_rays
77
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
78
-
79
- self.clean_image = clean_image
80
- self.importance_sample = importance_sample
81
- self.test_ref_views = test_ref_views # used for testing
82
- self.scale_factor = 1.0
83
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
84
-
85
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
86
- with open(lvis_json_path, 'r') as f:
87
- lvis_paths = json.load(f)
88
- if self.split == 'train':
89
- self.lvis_paths = lvis_paths['train']
90
- else:
91
- self.lvis_paths = lvis_paths['val']
92
- if img_wh is not None:
93
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
94
- 'img_wh must both be multiples of 32!'
95
-
96
-
97
- pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
98
- with open(pose_json_path_narrow, 'r') as f:
99
- narrow_meta = json.load(f)
100
-
101
- pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
102
- with open(pose_json_path_two_stage, 'r') as f:
103
- two_stage_meta = json.load(f)
104
-
105
-
106
- self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4)
107
- self.img_wh = (256, 256)
108
- self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
109
- intrinsic = np.eye(4)
110
- assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
111
- intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
112
- self.intrinsic = intrinsic
113
- assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
114
- self.near_far = np.array(narrow_meta["near_far"])
115
- self.near_far[1] = 1.8
116
- self.define_transforms()
117
- self.blender2opencv = np.array(
118
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
119
- )
120
-
121
-
122
- self.c2ws = []
123
- self.w2cs = []
124
- self.near_fars = []
125
- for idx, img_id in enumerate(self.img_ids):
126
- pose = self.input_poses[idx]
127
- c2w = pose @ self.blender2opencv
128
- self.c2ws.append(c2w)
129
- self.w2cs.append(np.linalg.inv(c2w))
130
- self.near_fars.append(self.near_far)
131
-
132
-
133
-
134
- self.c2ws = np.stack(self.c2ws, axis=0)
135
- self.w2cs = np.stack(self.w2cs, axis=0)
136
-
137
-
138
- self.all_intrinsics = [] # the cam info of the whole scene
139
- self.all_extrinsics = []
140
- self.all_near_fars = []
141
- self.load_cam_info()
142
-
143
- # * bounding box for rendering
144
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
145
- self.bbox_max = np.array([1.0, 1.0, 1.0])
146
-
147
- # - used for cost volume regularization
148
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
149
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
150
-
151
-
152
- def define_transforms(self):
153
- self.transform = T.Compose([T.ToTensor()])
154
-
155
-
156
-
157
- def load_cam_info(self):
158
- for vid, img_id in enumerate(self.img_ids):
159
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
160
- self.all_intrinsics.append(intrinsic)
161
- self.all_extrinsics.append(extrinsic)
162
- self.all_near_fars.append(near_far)
163
-
164
- def read_depth(self, filename):
165
- pass
166
-
167
- def read_mask(self, filename):
168
- mask_h = cv2.imread(filename, 0)
169
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
170
- interpolation=cv2.INTER_NEAREST)
171
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
172
- interpolation=cv2.INTER_NEAREST)
173
-
174
- mask[mask > 0] = 1 # the masks stored in png are not binary
175
- mask_h[mask_h > 0] = 1
176
-
177
- return mask, mask_h
178
-
179
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
180
-
181
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
182
-
183
- radius = radius * factor
184
- scale_mat = np.diag([radius, radius, radius, 1.0])
185
- scale_mat[:3, 3] = center.cpu().numpy()
186
- scale_mat = scale_mat.astype(np.float32)
187
-
188
- return scale_mat, 1. / radius.cpu().numpy()
189
-
190
- def __len__(self):
191
- return self.imgs_per_instance*len(self.lvis_paths)
192
-
193
-
194
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
195
- pass
196
-
197
-
198
- def __getitem__(self, idx):
199
- sample = {}
200
- origin_idx = idx
201
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
202
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
203
- idx_original=idx
204
-
205
- folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
206
-
207
- folder_id = folder_uid_dict['folder_id']
208
- uid = folder_uid_dict['uid']
209
-
210
- if self.split == 'train':
211
- if idx == 4:
212
- idx = 5
213
- elif idx == 5:
214
- idx = 7
215
- elif idx == 10:
216
- idx = 13
217
- elif idx == 11:
218
- idx = 15
219
-
220
- if idx % 16 < 8: # narrow image as target
221
- idx = idx % 16 # [0, 7]
222
- # target view
223
- c2w = self.c2ws[idx]
224
- w2c = np.linalg.inv(c2w)
225
- w2c_ref = w2c
226
- w2c_ref_inv = np.linalg.inv(w2c_ref)
227
-
228
- w2cs.append(w2c @ w2c_ref_inv)
229
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
230
-
231
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
232
-
233
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
234
-
235
-
236
- img = Image.open(img_filename)
237
-
238
- img = self.transform(img) # (4, h, w)
239
-
240
-
241
- if img.shape[0] == 4:
242
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
243
- imgs += [img]
244
-
245
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
246
- mask_h = depth_h > 0
247
- # print("valid pixels", np.sum(mask_h))
248
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
249
- surface_points = directions * depth_h[..., None] # [H, W, 3]
250
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
251
- depth_h = distance
252
-
253
- else:
254
- idx = idx % 16 - 8 # [0, 5]
255
- c2w = self.c2ws[idx + 40]
256
- w2c = np.linalg.inv(c2w)
257
- w2c_ref = w2c
258
- w2c_ref_inv = np.linalg.inv(w2c_ref)
259
-
260
- w2cs.append(w2c @ w2c_ref_inv)
261
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
262
-
263
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
264
-
265
- img = Image.open(img_filename)
266
- img = self.transform(img) # (4, h, w)
267
-
268
- # print("img_pre", img.shape)
269
- if img.shape[0] == 4:
270
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
271
- # print("img", img.shape)
272
- imgs += [img]
273
-
274
- depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
275
- depth_h = depth_h.fill_(-1.0)
276
-
277
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
278
- depths_h.append(depth_h)
279
- masks_h.append(mask_h)
280
-
281
- intrinsic = self.intrinsic
282
- intrinsics.append(intrinsic)
283
-
284
-
285
- near_fars.append(self.near_fars[idx])
286
- image_perm = 0 # only supervised on reference view
287
-
288
- mask_dilated = None
289
- if_use_narrow = []
290
- if self.split == 'train':
291
- for i in range(8):
292
- if np.random.random() > 0.5:
293
- if_use_narrow.append(True) # use narrow
294
- else:
295
- if_use_narrow.append(False) # 2-stage prediction
296
- if_use_narrow[origin_idx % 8] = True if origin_idx < 8 else False
297
- else:
298
- for i in range(8):
299
- if_use_narrow.append( True if origin_idx < 8 else False)
300
- src_views = range(8, 8 + 8 * 4)
301
- src_views_used = []
302
- for vid in src_views:
303
- if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6):
304
- continue
305
- src_views_used.append(vid)
306
- cur_view_id = (vid - 8) // 4
307
- # choose narrow
308
- if if_use_narrow[cur_view_id]:
309
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
310
- else: # choose 2-stage
311
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{(vid - 8) // 4}_{(vid-8) % 4 + 1}.png')
312
-
313
- img = Image.open(img_filename)
314
- img_wh = self.img_wh
315
-
316
- img = self.transform(img)
317
- if img.shape[0] == 4:
318
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
319
-
320
- imgs += [img]
321
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
322
- depths_h.append(depth_h)
323
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
324
-
325
- near_fars.append(self.all_near_fars[vid])
326
- intrinsics.append(self.all_intrinsics[vid])
327
-
328
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
329
-
330
-
331
- scale_mat, scale_factor = self.cal_scale_mat(
332
- img_hw=[img_wh[1], img_wh[0]],
333
- intrinsics=intrinsics, extrinsics=w2cs,
334
- near_fars=near_fars, factor=1.1
335
- )
336
-
337
-
338
- new_near_fars = []
339
- new_w2cs = []
340
- new_c2ws = []
341
- new_affine_mats = []
342
- new_depths_h = []
343
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
344
-
345
- P = intrinsic @ extrinsic @ scale_mat
346
- P = P[:3, :4]
347
- # - should use load_K_Rt_from_P() to obtain c2w
348
- c2w = load_K_Rt_from_P(None, P)[1]
349
- w2c = np.linalg.inv(c2w)
350
- new_w2cs.append(w2c)
351
- new_c2ws.append(c2w)
352
- affine_mat = np.eye(4)
353
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
354
- new_affine_mats.append(affine_mat)
355
-
356
- camera_o = c2w[:3, 3]
357
- dist = np.sqrt(np.sum(camera_o ** 2))
358
- near = dist - 1
359
- far = dist + 1
360
-
361
- new_near_fars.append([0.95 * near, 1.05 * far])
362
- new_depths_h.append(depth * scale_factor)
363
-
364
-
365
- imgs = torch.stack(imgs).float()
366
- depths_h = np.stack(new_depths_h)
367
- masks_h = np.stack(masks_h)
368
-
369
- affine_mats = np.stack(new_affine_mats)
370
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
371
- new_near_fars)
372
-
373
- if self.split == 'train':
374
- start_idx = 0
375
- else:
376
- start_idx = 1
377
-
378
- view_ids = [idx_original % self.imgs_per_instance] + src_views_used
379
- sample['origin_idx'] = origin_idx
380
- sample['images'] = imgs # (V, 3, H, W)
381
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
382
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
383
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
384
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
385
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
386
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
387
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
388
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
389
-
390
- # sample['light_idx'] = torch.tensor(light_idx)
391
- sample['scan'] = folder_id
392
-
393
- sample['scale_factor'] = torch.tensor(scale_factor)
394
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
395
- sample['render_img_idx'] = torch.tensor(image_perm)
396
- sample['partial_vol_origin'] = self.partial_vol_origin
397
- if view_ids[0] < 8:
398
- meta_end = "_narrow"+ "_refview" + str(view_ids[0])
399
- else:
400
- meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
401
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
402
-
403
-
404
- # - image to render
405
- sample['query_image'] = sample['images'][0]
406
- sample['query_c2w'] = sample['c2ws'][0]
407
- sample['query_w2c'] = sample['w2cs'][0]
408
- sample['query_intrinsic'] = sample['intrinsics'][0]
409
- sample['query_depth'] = sample['depths_h'][0]
410
- sample['query_mask'] = sample['masks_h'][0]
411
- sample['query_near_far'] = sample['near_fars'][0]
412
-
413
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
414
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
415
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
416
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
417
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
418
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
419
- sample['view_ids'] = sample['view_ids'][start_idx:]
420
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
421
-
422
- sample['scale_mat'] = torch.from_numpy(scale_mat)
423
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
424
-
425
- # - generate rays
426
- if ('val' in self.split) or ('test' in self.split):
427
- sample_rays = gen_rays_from_single_image(
428
- img_wh[1], img_wh[0],
429
- sample['query_image'],
430
- sample['query_intrinsic'],
431
- sample['query_c2w'],
432
- depth=sample['query_depth'],
433
- mask=sample['query_mask'] if self.clean_image else None)
434
- else:
435
- sample_rays = gen_random_rays_from_single_image(
436
- img_wh[1], img_wh[0],
437
- self.N_rays,
438
- sample['query_image'],
439
- sample['query_intrinsic'],
440
- sample['query_c2w'],
441
- depth=sample['query_depth'],
442
- mask=sample['query_mask'] if self.clean_image else None,
443
- dilated_mask=mask_dilated,
444
- importance_sample=self.importance_sample)
445
-
446
-
447
- sample['rays'] = sample_rays
448
-
449
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_8_2_stage.py DELETED
@@ -1,396 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- self.root_dir = root_dir
70
- self.split = split
71
-
72
- self.imgs_per_instance = 8
73
-
74
- self.n_views = n_views
75
- self.N_rays = N_rays
76
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
77
-
78
- self.clean_image = clean_image
79
- self.importance_sample = importance_sample
80
- self.test_ref_views = test_ref_views # used for testing
81
- self.scale_factor = 1.0
82
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
83
-
84
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
85
- with open(lvis_json_path, 'r') as f:
86
- lvis_paths = json.load(f)
87
- if self.split == 'train':
88
- self.lvis_paths = lvis_paths['train']
89
- else:
90
- self.lvis_paths = lvis_paths['val']
91
- if img_wh is not None:
92
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
93
- 'img_wh must both be multiples of 32!'
94
-
95
-
96
- pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
97
- with open(pose_json_path_narrow, 'r') as f:
98
- narrow_meta = json.load(f)
99
-
100
- pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
101
- with open(pose_json_path_two_stage, 'r') as f:
102
- two_stage_meta = json.load(f)
103
-
104
-
105
- self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
106
- self.img_wh = (256, 256)
107
- self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
108
- intrinsic = np.eye(4)
109
- assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
110
- intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
111
- self.intrinsic = intrinsic
112
- assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
113
- self.near_far = np.array(narrow_meta["near_far"])
114
- self.near_far[1] = 1.8
115
- self.define_transforms()
116
- self.blender2opencv = np.array(
117
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
118
- )
119
-
120
-
121
- self.c2ws = []
122
- self.w2cs = []
123
- self.near_fars = []
124
- for idx, img_id in enumerate(self.img_ids):
125
- pose = self.input_poses[idx]
126
- c2w = pose @ self.blender2opencv
127
- self.c2ws.append(c2w)
128
- self.w2cs.append(np.linalg.inv(c2w))
129
- self.near_fars.append(self.near_far)
130
-
131
-
132
-
133
- self.c2ws = np.stack(self.c2ws, axis=0)
134
- self.w2cs = np.stack(self.w2cs, axis=0)
135
-
136
-
137
- self.all_intrinsics = [] # the cam info of the whole scene
138
- self.all_extrinsics = []
139
- self.all_near_fars = []
140
- self.load_cam_info()
141
-
142
- # * bounding box for rendering
143
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
144
- self.bbox_max = np.array([1.0, 1.0, 1.0])
145
-
146
- # - used for cost volume regularization
147
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
148
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
149
-
150
-
151
- def define_transforms(self):
152
- self.transform = T.Compose([T.ToTensor()])
153
-
154
-
155
-
156
- def load_cam_info(self):
157
- for vid, img_id in enumerate(self.img_ids):
158
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
159
- self.all_intrinsics.append(intrinsic)
160
- self.all_extrinsics.append(extrinsic)
161
- self.all_near_fars.append(near_far)
162
-
163
- def read_depth(self, filename):
164
- pass
165
-
166
- def read_mask(self, filename):
167
- mask_h = cv2.imread(filename, 0)
168
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
169
- interpolation=cv2.INTER_NEAREST)
170
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
171
- interpolation=cv2.INTER_NEAREST)
172
-
173
- mask[mask > 0] = 1 # the masks stored in png are not binary
174
- mask_h[mask_h > 0] = 1
175
-
176
- return mask, mask_h
177
-
178
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
179
-
180
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
181
-
182
- radius = radius * factor
183
- scale_mat = np.diag([radius, radius, radius, 1.0])
184
- scale_mat[:3, 3] = center.cpu().numpy()
185
- scale_mat = scale_mat.astype(np.float32)
186
-
187
- return scale_mat, 1. / radius.cpu().numpy()
188
-
189
- def __len__(self):
190
- return self.imgs_per_instance * len(self.lvis_paths)
191
-
192
-
193
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
194
- pass
195
-
196
-
197
- def __getitem__(self, idx):
198
- sample = {}
199
- origin_idx = idx
200
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
201
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
202
- idx_original=idx
203
-
204
- folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
205
-
206
- folder_id = folder_uid_dict['folder_id']
207
- uid = folder_uid_dict['uid']
208
-
209
- idx = idx % self.imgs_per_instance # [0, 7]
210
- # target view
211
- c2w = self.c2ws[idx]
212
- w2c = np.linalg.inv(c2w)
213
- w2c_ref = w2c
214
- w2c_ref_inv = np.linalg.inv(w2c_ref)
215
-
216
- w2cs.append(w2c @ w2c_ref_inv)
217
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
218
-
219
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
220
-
221
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
222
-
223
-
224
- img = Image.open(img_filename)
225
-
226
- img = self.transform(img) # (4, h, w)
227
-
228
-
229
- if img.shape[0] == 4:
230
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
231
- imgs += [img]
232
-
233
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
234
- mask_h = depth_h > 0
235
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
- surface_points = directions * depth_h[..., None] # [H, W, 3]
237
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
- depth_h = distance
239
-
240
-
241
- depths_h.append(depth_h)
242
- masks_h.append(mask_h)
243
-
244
- intrinsic = self.intrinsic
245
- intrinsics.append(intrinsic)
246
-
247
-
248
- near_fars.append(self.near_fars[idx])
249
- image_perm = 0 # only supervised on reference view
250
-
251
- mask_dilated = None
252
-
253
-
254
-
255
- src_views = range(8, 8+32)
256
- src_views_used = []
257
- for vid in src_views:
258
- view_dix_to_use = (vid - 8) // 4
259
- src_views_used.append(vid)
260
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-8) % 4 + 1}.png')
261
-
262
- img = Image.open(img_filename)
263
- img_wh = self.img_wh
264
-
265
- img = self.transform(img)
266
- if img.shape[0] == 4:
267
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
268
- imgs += [img]
269
- depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
270
- depth_h = depth_h.fill_(-1.0)
271
- depths_h.append(depth_h)
272
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
273
-
274
- near_fars.append(self.all_near_fars[vid])
275
- intrinsics.append(self.all_intrinsics[vid])
276
-
277
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
278
-
279
-
280
- scale_mat, scale_factor = self.cal_scale_mat(
281
- img_hw=[img_wh[1], img_wh[0]],
282
- intrinsics=intrinsics, extrinsics=w2cs,
283
- near_fars=near_fars, factor=1.1
284
- )
285
-
286
-
287
- new_near_fars = []
288
- new_w2cs = []
289
- new_c2ws = []
290
- new_affine_mats = []
291
- new_depths_h = []
292
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
293
-
294
- P = intrinsic @ extrinsic @ scale_mat
295
- P = P[:3, :4]
296
- # - should use load_K_Rt_from_P() to obtain c2w
297
- c2w = load_K_Rt_from_P(None, P)[1]
298
- w2c = np.linalg.inv(c2w)
299
- new_w2cs.append(w2c)
300
- new_c2ws.append(c2w)
301
- affine_mat = np.eye(4)
302
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
303
- new_affine_mats.append(affine_mat)
304
-
305
- camera_o = c2w[:3, 3]
306
- dist = np.sqrt(np.sum(camera_o ** 2))
307
- near = dist - 1
308
- far = dist + 1
309
-
310
- new_near_fars.append([0.95 * near, 1.05 * far])
311
-
312
- new_depths_h.append(depth * scale_factor)
313
-
314
-
315
- imgs = torch.stack(imgs).float()
316
- depths_h = np.stack(new_depths_h)
317
- masks_h = np.stack(masks_h)
318
-
319
- affine_mats = np.stack(new_affine_mats)
320
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
321
- new_near_fars)
322
-
323
- if self.split == 'train':
324
- start_idx = 0
325
- else:
326
- start_idx = 1
327
-
328
- view_ids = [idx_original % self.imgs_per_instance] + src_views_used
329
- sample['origin_idx'] = origin_idx
330
- sample['images'] = imgs # (V, 3, H, W)
331
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
332
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
333
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
334
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
335
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
336
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
337
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
338
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
339
-
340
- # sample['light_idx'] = torch.tensor(light_idx)
341
- sample['scan'] = folder_id
342
-
343
- sample['scale_factor'] = torch.tensor(scale_factor)
344
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
345
- sample['render_img_idx'] = torch.tensor(image_perm)
346
- sample['partial_vol_origin'] = self.partial_vol_origin
347
- meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
348
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
349
-
350
-
351
- # - image to render
352
- sample['query_image'] = sample['images'][0]
353
- sample['query_c2w'] = sample['c2ws'][0]
354
- sample['query_w2c'] = sample['w2cs'][0]
355
- sample['query_intrinsic'] = sample['intrinsics'][0]
356
- sample['query_depth'] = sample['depths_h'][0]
357
- sample['query_mask'] = sample['masks_h'][0]
358
- sample['query_near_far'] = sample['near_fars'][0]
359
-
360
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
361
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
362
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
363
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
364
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
365
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
366
- sample['view_ids'] = sample['view_ids'][start_idx:]
367
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
368
-
369
- sample['scale_mat'] = torch.from_numpy(scale_mat)
370
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
371
-
372
- # - generate rays
373
- if ('val' in self.split) or ('test' in self.split):
374
- sample_rays = gen_rays_from_single_image(
375
- img_wh[1], img_wh[0],
376
- sample['query_image'],
377
- sample['query_intrinsic'],
378
- sample['query_c2w'],
379
- depth=sample['query_depth'],
380
- mask=sample['query_mask'] if self.clean_image else None)
381
- else:
382
- sample_rays = gen_random_rays_from_single_image(
383
- img_wh[1], img_wh[0],
384
- self.N_rays,
385
- sample['query_image'],
386
- sample['query_intrinsic'],
387
- sample['query_c2w'],
388
- depth=sample['query_depth'],
389
- mask=sample['query_mask'] if self.clean_image else None,
390
- dilated_mask=mask_dilated,
391
- importance_sample=self.importance_sample)
392
-
393
-
394
- sample['rays'] = sample_rays
395
-
396
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_8_4_gt.py DELETED
@@ -1,396 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- self.root_dir = root_dir
70
- self.split = split
71
-
72
- self.imgs_per_instance = 8
73
-
74
- self.n_views = n_views
75
- self.N_rays = N_rays
76
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
77
-
78
- self.clean_image = clean_image
79
- self.importance_sample = importance_sample
80
- self.test_ref_views = test_ref_views # used for testing
81
- self.scale_factor = 1.0
82
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
83
-
84
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
85
- with open(lvis_json_path, 'r') as f:
86
- lvis_paths = json.load(f)
87
- if self.split == 'train':
88
- self.lvis_paths = lvis_paths['train']
89
- else:
90
- self.lvis_paths = lvis_paths['val']
91
- if img_wh is not None:
92
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
93
- 'img_wh must both be multiples of 32!'
94
-
95
-
96
- pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
97
- with open(pose_json_path_narrow, 'r') as f:
98
- narrow_meta = json.load(f)
99
-
100
- pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
101
- with open(pose_json_path_two_stage, 'r') as f:
102
- two_stage_meta = json.load(f)
103
-
104
-
105
- self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
106
- self.img_wh = (256, 256)
107
- self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
108
- intrinsic = np.eye(4)
109
- assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
110
- intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
111
- self.intrinsic = intrinsic
112
- assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
113
- self.near_far = np.array(narrow_meta["near_far"])
114
- self.near_far[1] = 1.8
115
- self.define_transforms()
116
- self.blender2opencv = np.array(
117
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
118
- )
119
-
120
-
121
- self.c2ws = []
122
- self.w2cs = []
123
- self.near_fars = []
124
- for idx, img_id in enumerate(self.img_ids):
125
- pose = self.input_poses[idx]
126
- c2w = pose @ self.blender2opencv
127
- self.c2ws.append(c2w)
128
- self.w2cs.append(np.linalg.inv(c2w))
129
- self.near_fars.append(self.near_far)
130
-
131
-
132
-
133
- self.c2ws = np.stack(self.c2ws, axis=0)
134
- self.w2cs = np.stack(self.w2cs, axis=0)
135
-
136
-
137
- self.all_intrinsics = [] # the cam info of the whole scene
138
- self.all_extrinsics = []
139
- self.all_near_fars = []
140
- self.load_cam_info()
141
-
142
- # * bounding box for rendering
143
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
144
- self.bbox_max = np.array([1.0, 1.0, 1.0])
145
-
146
- # - used for cost volume regularization
147
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
148
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
149
-
150
-
151
- def define_transforms(self):
152
- self.transform = T.Compose([T.ToTensor()])
153
-
154
-
155
-
156
- def load_cam_info(self):
157
- for vid, img_id in enumerate(self.img_ids):
158
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
159
- self.all_intrinsics.append(intrinsic)
160
- self.all_extrinsics.append(extrinsic)
161
- self.all_near_fars.append(near_far)
162
-
163
- def read_depth(self, filename):
164
- pass
165
-
166
- def read_mask(self, filename):
167
- mask_h = cv2.imread(filename, 0)
168
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
169
- interpolation=cv2.INTER_NEAREST)
170
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
171
- interpolation=cv2.INTER_NEAREST)
172
-
173
- mask[mask > 0] = 1 # the masks stored in png are not binary
174
- mask_h[mask_h > 0] = 1
175
-
176
- return mask, mask_h
177
-
178
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
179
-
180
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
181
-
182
- radius = radius * factor
183
- scale_mat = np.diag([radius, radius, radius, 1.0])
184
- scale_mat[:3, 3] = center.cpu().numpy()
185
- scale_mat = scale_mat.astype(np.float32)
186
-
187
- return scale_mat, 1. / radius.cpu().numpy()
188
-
189
- def __len__(self):
190
- return self.imgs_per_instance * len(self.lvis_paths)
191
-
192
-
193
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
194
- pass
195
-
196
-
197
- def __getitem__(self, idx):
198
- sample = {}
199
- origin_idx = idx
200
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
201
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
202
- idx_original=idx
203
-
204
- folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
205
-
206
- folder_id = folder_uid_dict['folder_id']
207
- uid = folder_uid_dict['uid']
208
-
209
- idx = idx % self.imgs_per_instance # [0, 7]
210
- # target view
211
- c2w = self.c2ws[idx]
212
- w2c = np.linalg.inv(c2w)
213
- w2c_ref = w2c
214
- w2c_ref_inv = np.linalg.inv(w2c_ref)
215
-
216
- w2cs.append(w2c @ w2c_ref_inv)
217
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
218
-
219
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
220
-
221
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
222
-
223
-
224
- img = Image.open(img_filename)
225
-
226
- img = self.transform(img) # (4, h, w)
227
-
228
-
229
- if img.shape[0] == 4:
230
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
231
- imgs += [img]
232
-
233
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
234
- mask_h = depth_h > 0
235
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
- surface_points = directions * depth_h[..., None] # [H, W, 3]
237
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
- depth_h = distance
239
-
240
-
241
- depths_h.append(depth_h)
242
- masks_h.append(mask_h)
243
-
244
- intrinsic = self.intrinsic
245
- intrinsics.append(intrinsic)
246
-
247
-
248
- near_fars.append(self.near_fars[idx])
249
- image_perm = 0 # only supervised on reference view
250
-
251
- mask_dilated = None
252
-
253
-
254
-
255
- src_views = range(8, 8+32)
256
- src_views_used = []
257
- for vid in src_views:
258
- view_dix_to_use = (vid - 8) // 4
259
- src_views_used.append(vid)
260
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10_gt.png')
261
-
262
- img = Image.open(img_filename)
263
- img_wh = self.img_wh
264
-
265
- img = self.transform(img)
266
- if img.shape[0] == 4:
267
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
268
- imgs += [img]
269
- depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
270
- depth_h = depth_h.fill_(-1.0)
271
- depths_h.append(depth_h)
272
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
273
-
274
- near_fars.append(self.all_near_fars[vid])
275
- intrinsics.append(self.all_intrinsics[vid])
276
-
277
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
278
-
279
-
280
- scale_mat, scale_factor = self.cal_scale_mat(
281
- img_hw=[img_wh[1], img_wh[0]],
282
- intrinsics=intrinsics, extrinsics=w2cs,
283
- near_fars=near_fars, factor=1.1
284
- )
285
-
286
-
287
- new_near_fars = []
288
- new_w2cs = []
289
- new_c2ws = []
290
- new_affine_mats = []
291
- new_depths_h = []
292
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
293
-
294
- P = intrinsic @ extrinsic @ scale_mat
295
- P = P[:3, :4]
296
- # - should use load_K_Rt_from_P() to obtain c2w
297
- c2w = load_K_Rt_from_P(None, P)[1]
298
- w2c = np.linalg.inv(c2w)
299
- new_w2cs.append(w2c)
300
- new_c2ws.append(c2w)
301
- affine_mat = np.eye(4)
302
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
303
- new_affine_mats.append(affine_mat)
304
-
305
- camera_o = c2w[:3, 3]
306
- dist = np.sqrt(np.sum(camera_o ** 2))
307
- near = dist - 1
308
- far = dist + 1
309
-
310
- new_near_fars.append([0.95 * near, 1.05 * far])
311
-
312
- new_depths_h.append(depth * scale_factor)
313
-
314
-
315
- imgs = torch.stack(imgs).float()
316
- depths_h = np.stack(new_depths_h)
317
- masks_h = np.stack(masks_h)
318
-
319
- affine_mats = np.stack(new_affine_mats)
320
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
321
- new_near_fars)
322
-
323
- if self.split == 'train':
324
- start_idx = 0
325
- else:
326
- start_idx = 1
327
-
328
- view_ids = [idx_original % self.imgs_per_instance] + src_views_used
329
- sample['origin_idx'] = origin_idx
330
- sample['images'] = imgs # (V, 3, H, W)
331
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
332
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
333
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
334
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
335
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
336
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
337
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
338
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
339
-
340
- # sample['light_idx'] = torch.tensor(light_idx)
341
- sample['scan'] = folder_id
342
-
343
- sample['scale_factor'] = torch.tensor(scale_factor)
344
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
345
- sample['render_img_idx'] = torch.tensor(image_perm)
346
- sample['partial_vol_origin'] = self.partial_vol_origin
347
- meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
348
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
349
-
350
-
351
- # - image to render
352
- sample['query_image'] = sample['images'][0]
353
- sample['query_c2w'] = sample['c2ws'][0]
354
- sample['query_w2c'] = sample['w2cs'][0]
355
- sample['query_intrinsic'] = sample['intrinsics'][0]
356
- sample['query_depth'] = sample['depths_h'][0]
357
- sample['query_mask'] = sample['masks_h'][0]
358
- sample['query_near_far'] = sample['near_fars'][0]
359
-
360
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
361
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
362
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
363
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
364
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
365
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
366
- sample['view_ids'] = sample['view_ids'][start_idx:]
367
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
368
-
369
- sample['scale_mat'] = torch.from_numpy(scale_mat)
370
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
371
-
372
- # - generate rays
373
- if ('val' in self.split) or ('test' in self.split):
374
- sample_rays = gen_rays_from_single_image(
375
- img_wh[1], img_wh[0],
376
- sample['query_image'],
377
- sample['query_intrinsic'],
378
- sample['query_c2w'],
379
- depth=sample['query_depth'],
380
- mask=sample['query_mask'] if self.clean_image else None)
381
- else:
382
- sample_rays = gen_random_rays_from_single_image(
383
- img_wh[1], img_wh[0],
384
- self.N_rays,
385
- sample['query_image'],
386
- sample['query_intrinsic'],
387
- sample['query_c2w'],
388
- depth=sample['query_depth'],
389
- mask=sample['query_mask'] if self.clean_image else None,
390
- dilated_mask=mask_dilated,
391
- importance_sample=self.importance_sample)
392
-
393
-
394
- sample['rays'] = sample_rays
395
-
396
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_3_views.py DELETED
@@ -1,446 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- self.root_dir = root_dir
70
- self.split = split
71
- self.imgs_per_instance = 16
72
- self.n_views = n_views
73
- self.N_rays = N_rays
74
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
-
76
- self.clean_image = clean_image
77
- self.importance_sample = importance_sample
78
- self.test_ref_views = test_ref_views # used for testing
79
- self.scale_factor = 1.0
80
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
-
82
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
83
- with open(lvis_json_path, 'r') as f:
84
- lvis_paths = json.load(f)
85
- if self.split == 'train':
86
- self.lvis_paths = lvis_paths['train']
87
- else:
88
- self.lvis_paths = lvis_paths['val']
89
- if img_wh is not None:
90
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
- 'img_wh must both be multiples of 32!'
92
-
93
-
94
- pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
- with open(pose_json_path_narrow, 'r') as f:
96
- narrow_meta = json.load(f)
97
-
98
- pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
99
- with open(pose_json_path_two_stage, 'r') as f:
100
- two_stage_meta = json.load(f)
101
-
102
-
103
- self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4)
104
- self.img_wh = (256, 256)
105
- self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
106
- intrinsic = np.eye(4)
107
- assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
108
- intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
109
- self.intrinsic = intrinsic
110
- assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
111
- self.near_far = np.array(narrow_meta["near_far"])
112
- self.near_far[1] = 1.8
113
- self.define_transforms()
114
- self.blender2opencv = np.array(
115
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
- )
117
-
118
-
119
- self.c2ws = []
120
- self.w2cs = []
121
- self.near_fars = []
122
- for idx, img_id in enumerate(self.img_ids):
123
- pose = self.input_poses[idx]
124
- c2w = pose @ self.blender2opencv
125
- self.c2ws.append(c2w)
126
- self.w2cs.append(np.linalg.inv(c2w))
127
- self.near_fars.append(self.near_far)
128
-
129
-
130
-
131
- self.c2ws = np.stack(self.c2ws, axis=0)
132
- self.w2cs = np.stack(self.w2cs, axis=0)
133
-
134
-
135
- self.all_intrinsics = [] # the cam info of the whole scene
136
- self.all_extrinsics = []
137
- self.all_near_fars = []
138
- self.load_cam_info()
139
-
140
- # * bounding box for rendering
141
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
- self.bbox_max = np.array([1.0, 1.0, 1.0])
143
-
144
- # - used for cost volume regularization
145
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
-
148
-
149
- def define_transforms(self):
150
- self.transform = T.Compose([T.ToTensor()])
151
-
152
-
153
-
154
- def load_cam_info(self):
155
- for vid, img_id in enumerate(self.img_ids):
156
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
- self.all_intrinsics.append(intrinsic)
158
- self.all_extrinsics.append(extrinsic)
159
- self.all_near_fars.append(near_far)
160
-
161
- def read_depth(self, filename):
162
- pass
163
-
164
- def read_mask(self, filename):
165
- mask_h = cv2.imread(filename, 0)
166
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
- interpolation=cv2.INTER_NEAREST)
168
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
- interpolation=cv2.INTER_NEAREST)
170
-
171
- mask[mask > 0] = 1 # the masks stored in png are not binary
172
- mask_h[mask_h > 0] = 1
173
-
174
- return mask, mask_h
175
-
176
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
-
178
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
-
180
- radius = radius * factor
181
- scale_mat = np.diag([radius, radius, radius, 1.0])
182
- scale_mat[:3, 3] = center.cpu().numpy()
183
- scale_mat = scale_mat.astype(np.float32)
184
-
185
- return scale_mat, 1. / radius.cpu().numpy()
186
-
187
- def __len__(self):
188
- return self.imgs_per_instance*len(self.lvis_paths)
189
-
190
-
191
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
- pass
193
-
194
-
195
- def __getitem__(self, idx):
196
- sample = {}
197
- origin_idx = idx
198
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
- idx_original=idx
201
-
202
- folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
203
-
204
- folder_id = folder_uid_dict['folder_id']
205
- uid = folder_uid_dict['uid']
206
-
207
- if idx % 16 < 8: # narrow image as target
208
- idx = idx % self.imgs_per_instance # [0, 7]
209
- # target view
210
- c2w = self.c2ws[idx]
211
- w2c = np.linalg.inv(c2w)
212
- w2c_ref = w2c
213
- w2c_ref_inv = np.linalg.inv(w2c_ref)
214
-
215
- w2cs.append(w2c @ w2c_ref_inv)
216
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
217
-
218
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
219
-
220
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
221
-
222
-
223
- img = Image.open(img_filename)
224
-
225
- img = self.transform(img) # (4, h, w)
226
-
227
-
228
- if img.shape[0] == 4:
229
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
230
- imgs += [img]
231
-
232
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
233
- mask_h = depth_h > 0
234
- # print("valid pixels", np.sum(mask_h))
235
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
- surface_points = directions * depth_h[..., None] # [H, W, 3]
237
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
- depth_h = distance
239
-
240
- else:
241
- idx = idx % self.imgs_per_instance - 8 # [0, 5]
242
- c2w = self.c2ws[idx + 40]
243
- w2c = np.linalg.inv(c2w)
244
- w2c_ref = w2c
245
- w2c_ref_inv = np.linalg.inv(w2c_ref)
246
-
247
- w2cs.append(w2c @ w2c_ref_inv)
248
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
249
-
250
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
251
-
252
-
253
- img = Image.open(img_filename)
254
- img = self.transform(img) # (4, h, w)
255
-
256
- # print("img_pre", img.shape)
257
- if img.shape[0] == 4:
258
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
259
- # print("img", img.shape)
260
- imgs += [img]
261
-
262
- depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
263
- depth_h = depth_h.fill_(-1.0)
264
-
265
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
266
- depths_h.append(depth_h)
267
- masks_h.append(mask_h)
268
-
269
- intrinsic = self.intrinsic
270
- intrinsics.append(intrinsic)
271
-
272
-
273
- near_fars.append(self.near_fars[idx])
274
- image_perm = 0 # only supervised on reference view
275
-
276
- mask_dilated = None
277
- if_use_narrow = []
278
- if self.split == 'train':
279
- for i in range(8):
280
- if np.random.random() > 0.5:
281
- if_use_narrow.append(True) # use narrow
282
- else:
283
- if_use_narrow.append(False) # 2-stage prediction
284
- if_use_narrow[origin_idx % 8] = True if origin_idx < 8 else False
285
- else:
286
- for i in range(8):
287
- if_use_narrow.append( True if origin_idx < 8 else False)
288
-
289
- src_views = list()
290
- for i in range(8):
291
- # randomly choose 3 different number from [0,3]
292
- local_idxs = np.random.choice(4, 3, replace=False)
293
- local_idxs = [0,1,2]
294
- local_idxs = [8+i*4+local_idx for local_idx in local_idxs]
295
- src_views += local_idxs
296
- src_views_used = []
297
- for vid in src_views:
298
- src_views_used.append(vid)
299
- cur_view_id = (vid - 8) // 4
300
- # choose narrow
301
- if if_use_narrow[cur_view_id]:
302
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
303
- else: # choose 2-stage
304
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{(vid - 8) // 4}_{(vid-8) % 4 + 1}.png')
305
-
306
- img = Image.open(img_filename)
307
- img_wh = self.img_wh
308
-
309
- img = self.transform(img)
310
- if img.shape[0] == 4:
311
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
312
-
313
- imgs += [img]
314
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
315
- depths_h.append(depth_h)
316
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
317
-
318
- near_fars.append(self.all_near_fars[vid])
319
- intrinsics.append(self.all_intrinsics[vid])
320
-
321
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
322
-
323
-
324
-
325
-
326
- scale_mat, scale_factor = self.cal_scale_mat(
327
- img_hw=[img_wh[1], img_wh[0]],
328
- intrinsics=intrinsics, extrinsics=w2cs,
329
- near_fars=near_fars, factor=1.1
330
- )
331
-
332
-
333
- new_near_fars = []
334
- new_w2cs = []
335
- new_c2ws = []
336
- new_affine_mats = []
337
- new_depths_h = []
338
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
339
-
340
- P = intrinsic @ extrinsic @ scale_mat
341
- P = P[:3, :4]
342
- # - should use load_K_Rt_from_P() to obtain c2w
343
- c2w = load_K_Rt_from_P(None, P)[1]
344
- w2c = np.linalg.inv(c2w)
345
- new_w2cs.append(w2c)
346
- new_c2ws.append(c2w)
347
- affine_mat = np.eye(4)
348
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
349
- new_affine_mats.append(affine_mat)
350
-
351
- camera_o = c2w[:3, 3]
352
- dist = np.sqrt(np.sum(camera_o ** 2))
353
- near = dist - 1
354
- far = dist + 1
355
-
356
- new_near_fars.append([0.95 * near, 1.05 * far])
357
-
358
- new_depths_h.append(depth * scale_factor)
359
-
360
- # print(new_near_fars)
361
- # print("img numeber: ", len(imgs))
362
- imgs = torch.stack(imgs).float()
363
- depths_h = np.stack(new_depths_h)
364
- masks_h = np.stack(masks_h)
365
-
366
- affine_mats = np.stack(new_affine_mats)
367
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
368
- new_near_fars)
369
-
370
- if self.split == 'train':
371
- start_idx = 0
372
- else:
373
- start_idx = 1
374
-
375
- view_ids = [idx_original % self.imgs_per_instance] + src_views_used
376
- sample['origin_idx'] = origin_idx
377
- sample['images'] = imgs # (V, 3, H, W)
378
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
379
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
380
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
381
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
382
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
383
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
384
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
385
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
386
-
387
- # sample['light_idx'] = torch.tensor(light_idx)
388
- sample['scan'] = folder_id
389
-
390
- sample['scale_factor'] = torch.tensor(scale_factor)
391
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
392
- sample['render_img_idx'] = torch.tensor(image_perm)
393
- sample['partial_vol_origin'] = self.partial_vol_origin
394
- if view_ids[0] < 8:
395
- meta_end = "_narrow"+ "_refview" + str(view_ids[0])
396
- else:
397
- meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
398
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
399
-
400
-
401
- # - image to render
402
- sample['query_image'] = sample['images'][0]
403
- sample['query_c2w'] = sample['c2ws'][0]
404
- sample['query_w2c'] = sample['w2cs'][0]
405
- sample['query_intrinsic'] = sample['intrinsics'][0]
406
- sample['query_depth'] = sample['depths_h'][0]
407
- sample['query_mask'] = sample['masks_h'][0]
408
- sample['query_near_far'] = sample['near_fars'][0]
409
-
410
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
411
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
412
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
413
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
414
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
415
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
416
- sample['view_ids'] = sample['view_ids'][start_idx:]
417
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
418
-
419
- sample['scale_mat'] = torch.from_numpy(scale_mat)
420
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
421
-
422
- # - generate rays
423
- if ('val' in self.split) or ('test' in self.split):
424
- sample_rays = gen_rays_from_single_image(
425
- img_wh[1], img_wh[0],
426
- sample['query_image'],
427
- sample['query_intrinsic'],
428
- sample['query_c2w'],
429
- depth=sample['query_depth'],
430
- mask=sample['query_mask'] if self.clean_image else None)
431
- else:
432
- sample_rays = gen_random_rays_from_single_image(
433
- img_wh[1], img_wh[0],
434
- self.N_rays,
435
- sample['query_image'],
436
- sample['query_intrinsic'],
437
- sample['query_c2w'],
438
- depth=sample['query_depth'],
439
- mask=sample['query_mask'] if self.clean_image else None,
440
- dilated_mask=mask_dilated,
441
- importance_sample=self.importance_sample)
442
-
443
-
444
- sample['rays'] = sample_rays
445
-
446
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_mix.py DELETED
@@ -1,439 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- self.root_dir = root_dir
70
- self.split = split
71
- self.imgs_per_instance = 16
72
- self.n_views = n_views
73
- self.N_rays = N_rays
74
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
-
76
- self.clean_image = clean_image
77
- self.importance_sample = importance_sample
78
- self.test_ref_views = test_ref_views # used for testing
79
- self.scale_factor = 1.0
80
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
-
82
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
83
- with open(lvis_json_path, 'r') as f:
84
- lvis_paths = json.load(f)
85
- if self.split == 'train':
86
- self.lvis_paths = lvis_paths['train']
87
- else:
88
- self.lvis_paths = lvis_paths['val']
89
- if img_wh is not None:
90
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
- 'img_wh must both be multiples of 32!'
92
-
93
-
94
- pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
- with open(pose_json_path_narrow, 'r') as f:
96
- narrow_meta = json.load(f)
97
-
98
- pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
99
- with open(pose_json_path_two_stage, 'r') as f:
100
- two_stage_meta = json.load(f)
101
-
102
-
103
- self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
104
- self.img_wh = (256, 256)
105
- self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
106
- intrinsic = np.eye(4)
107
- assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
108
- intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
109
- self.intrinsic = intrinsic
110
- assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
111
- self.near_far = np.array(narrow_meta["near_far"])
112
- self.near_far[1] = 1.8
113
- self.define_transforms()
114
- self.blender2opencv = np.array(
115
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
- )
117
-
118
-
119
- self.c2ws = []
120
- self.w2cs = []
121
- self.near_fars = []
122
- for idx, img_id in enumerate(self.img_ids):
123
- pose = self.input_poses[idx]
124
- c2w = pose @ self.blender2opencv
125
- self.c2ws.append(c2w)
126
- self.w2cs.append(np.linalg.inv(c2w))
127
- self.near_fars.append(self.near_far)
128
-
129
-
130
-
131
- self.c2ws = np.stack(self.c2ws, axis=0)
132
- self.w2cs = np.stack(self.w2cs, axis=0)
133
-
134
-
135
- self.all_intrinsics = [] # the cam info of the whole scene
136
- self.all_extrinsics = []
137
- self.all_near_fars = []
138
- self.load_cam_info()
139
-
140
- # * bounding box for rendering
141
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
- self.bbox_max = np.array([1.0, 1.0, 1.0])
143
-
144
- # - used for cost volume regularization
145
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
-
148
-
149
- def define_transforms(self):
150
- self.transform = T.Compose([T.ToTensor()])
151
-
152
-
153
-
154
- def load_cam_info(self):
155
- for vid, img_id in enumerate(self.img_ids):
156
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
- self.all_intrinsics.append(intrinsic)
158
- self.all_extrinsics.append(extrinsic)
159
- self.all_near_fars.append(near_far)
160
-
161
- def read_depth(self, filename):
162
- pass
163
-
164
- def read_mask(self, filename):
165
- mask_h = cv2.imread(filename, 0)
166
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
- interpolation=cv2.INTER_NEAREST)
168
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
- interpolation=cv2.INTER_NEAREST)
170
-
171
- mask[mask > 0] = 1 # the masks stored in png are not binary
172
- mask_h[mask_h > 0] = 1
173
-
174
- return mask, mask_h
175
-
176
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
-
178
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
-
180
- radius = radius * factor
181
- scale_mat = np.diag([radius, radius, radius, 1.0])
182
- scale_mat[:3, 3] = center.cpu().numpy()
183
- scale_mat = scale_mat.astype(np.float32)
184
-
185
- return scale_mat, 1. / radius.cpu().numpy()
186
-
187
- def __len__(self):
188
- return self.imgs_per_instance*len(self.lvis_paths)
189
-
190
-
191
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
- pass
193
-
194
-
195
- def __getitem__(self, idx):
196
- sample = {}
197
- origin_idx = idx
198
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
- idx_original=idx
201
-
202
- folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
203
-
204
- folder_id = folder_uid_dict['folder_id']
205
- uid = folder_uid_dict['uid']
206
-
207
- if idx % 16 < 8: # gt image as target
208
- idx = idx % self.imgs_per_instance # [0, 7]
209
- # target view
210
- c2w = self.c2ws[idx]
211
- w2c = np.linalg.inv(c2w)
212
- w2c_ref = w2c
213
- w2c_ref_inv = np.linalg.inv(w2c_ref)
214
-
215
- w2cs.append(w2c @ w2c_ref_inv)
216
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
217
-
218
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
219
-
220
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
221
-
222
-
223
- img = Image.open(img_filename)
224
-
225
- img = self.transform(img) # (4, h, w)
226
-
227
-
228
- if img.shape[0] == 4:
229
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
230
- imgs += [img]
231
-
232
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
233
- mask_h = depth_h > 0
234
- # print("valid pixels", np.sum(mask_h))
235
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
- surface_points = directions * depth_h[..., None] # [H, W, 3]
237
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
- depth_h = distance
239
-
240
- else:
241
- idx = idx % self.imgs_per_instance - 8 # [0, 7]
242
- c2w = self.c2ws[idx + 40]
243
- w2c = np.linalg.inv(c2w)
244
- w2c_ref = w2c
245
- w2c_ref_inv = np.linalg.inv(w2c_ref)
246
-
247
- w2cs.append(w2c @ w2c_ref_inv)
248
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
249
-
250
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
251
-
252
-
253
- img = Image.open(img_filename)
254
- img = self.transform(img) # (4, h, w)
255
-
256
- # print("img_pre", img.shape)
257
- if img.shape[0] == 4:
258
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
259
- # print("img", img.shape)
260
- imgs += [img]
261
-
262
- depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
263
- depth_h = depth_h.fill_(-1.0)
264
-
265
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
266
- depths_h.append(depth_h)
267
- masks_h.append(mask_h)
268
-
269
- intrinsic = self.intrinsic
270
- intrinsics.append(intrinsic)
271
-
272
-
273
- near_fars.append(self.near_fars[idx])
274
- image_perm = 0 # only supervised on reference view
275
-
276
- mask_dilated = None
277
- if_use_narrow = []
278
- if self.split == 'train':
279
- for i in range(8):
280
- if np.random.random() > 0.5:
281
- if_use_narrow.append(True) # use narrow
282
- else:
283
- if_use_narrow.append(False) # 2-stage prediction
284
- if_use_narrow[origin_idx % 8] = True if (origin_idx % 16) < 8 else False
285
- else:
286
- for i in range(8):
287
- if_use_narrow.append( True if (origin_idx % 16) < 8 else False)
288
- src_views = range(8, 8 + 8 * 4)
289
- src_views_used = []
290
- for vid in src_views:
291
- src_views_used.append(vid)
292
- cur_view_id = (vid - 8) // 4 # [0, 7]
293
- # choose narrow
294
- if if_use_narrow[cur_view_id]:
295
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
296
- else: # choose 2-stage
297
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{cur_view_id}_{(vid) % 4 + 1}.png')
298
-
299
- img = Image.open(img_filename)
300
- img_wh = self.img_wh
301
-
302
- img = self.transform(img)
303
- if img.shape[0] == 4:
304
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
305
-
306
- imgs += [img]
307
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
308
- depths_h.append(depth_h)
309
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
310
-
311
- near_fars.append(self.all_near_fars[vid])
312
- intrinsics.append(self.all_intrinsics[vid])
313
-
314
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
315
-
316
-
317
-
318
-
319
- scale_mat, scale_factor = self.cal_scale_mat(
320
- img_hw=[img_wh[1], img_wh[0]],
321
- intrinsics=intrinsics, extrinsics=w2cs,
322
- near_fars=near_fars, factor=1.1
323
- )
324
-
325
-
326
- new_near_fars = []
327
- new_w2cs = []
328
- new_c2ws = []
329
- new_affine_mats = []
330
- new_depths_h = []
331
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
332
-
333
- P = intrinsic @ extrinsic @ scale_mat
334
- P = P[:3, :4]
335
- # - should use load_K_Rt_from_P() to obtain c2w
336
- c2w = load_K_Rt_from_P(None, P)[1]
337
- w2c = np.linalg.inv(c2w)
338
- new_w2cs.append(w2c)
339
- new_c2ws.append(c2w)
340
- affine_mat = np.eye(4)
341
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
342
- new_affine_mats.append(affine_mat)
343
-
344
- camera_o = c2w[:3, 3]
345
- dist = np.sqrt(np.sum(camera_o ** 2))
346
- near = dist - 1
347
- far = dist + 1
348
-
349
- new_near_fars.append([0.95 * near, 1.05 * far])
350
-
351
- new_depths_h.append(depth * scale_factor)
352
-
353
- # print(new_near_fars)
354
- # print("img numeber: ", len(imgs))
355
- imgs = torch.stack(imgs).float()
356
- depths_h = np.stack(new_depths_h)
357
- masks_h = np.stack(masks_h)
358
-
359
- affine_mats = np.stack(new_affine_mats)
360
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
361
- new_near_fars)
362
-
363
- if self.split == 'train':
364
- start_idx = 0
365
- else:
366
- start_idx = 1
367
-
368
- view_ids = [idx_original % self.imgs_per_instance] + src_views_used
369
- sample['origin_idx'] = origin_idx
370
- sample['images'] = imgs # (V, 3, H, W)
371
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
372
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
373
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
374
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
375
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
376
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
377
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
378
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
379
-
380
- # sample['light_idx'] = torch.tensor(light_idx)
381
- sample['scan'] = folder_id
382
-
383
- sample['scale_factor'] = torch.tensor(scale_factor)
384
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
385
- sample['render_img_idx'] = torch.tensor(image_perm)
386
- sample['partial_vol_origin'] = self.partial_vol_origin
387
- if view_ids[0] < 8:
388
- meta_end = "_narrow"+ "_refview" + str(view_ids[0])
389
- else:
390
- meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
391
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
392
-
393
-
394
- # - image to render
395
- sample['query_image'] = sample['images'][0]
396
- sample['query_c2w'] = sample['c2ws'][0]
397
- sample['query_w2c'] = sample['w2cs'][0]
398
- sample['query_intrinsic'] = sample['intrinsics'][0]
399
- sample['query_depth'] = sample['depths_h'][0]
400
- sample['query_mask'] = sample['masks_h'][0]
401
- sample['query_near_far'] = sample['near_fars'][0]
402
-
403
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
404
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
405
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
406
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
407
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
408
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
409
- sample['view_ids'] = sample['view_ids'][start_idx:]
410
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
411
-
412
- sample['scale_mat'] = torch.from_numpy(scale_mat)
413
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
414
-
415
- # - generate rays
416
- if ('val' in self.split) or ('test' in self.split):
417
- sample_rays = gen_rays_from_single_image(
418
- img_wh[1], img_wh[0],
419
- sample['query_image'],
420
- sample['query_intrinsic'],
421
- sample['query_c2w'],
422
- depth=sample['query_depth'],
423
- mask=sample['query_mask'] if self.clean_image else None)
424
- else:
425
- sample_rays = gen_random_rays_from_single_image(
426
- img_wh[1], img_wh[0],
427
- self.N_rays,
428
- sample['query_image'],
429
- sample['query_intrinsic'],
430
- sample['query_c2w'],
431
- depth=sample['query_depth'],
432
- mask=sample['query_mask'] if self.clean_image else None,
433
- dilated_mask=mask_dilated,
434
- importance_sample=self.importance_sample)
435
-
436
-
437
- sample['rays'] = sample_rays
438
-
439
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_mix.py DELETED
@@ -1,470 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- self.root_dir = root_dir
70
- self.split = split
71
- self.imgs_per_instance = 16
72
- self.n_views = n_views
73
- self.N_rays = N_rays
74
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
-
76
- self.clean_image = clean_image
77
- self.importance_sample = importance_sample
78
- self.test_ref_views = test_ref_views # used for testing
79
- self.scale_factor = 1.0
80
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
-
82
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
83
- with open(lvis_json_path, 'r') as f:
84
- lvis_paths = json.load(f)
85
- if self.split == 'train':
86
- self.lvis_paths = lvis_paths['train']
87
- else:
88
- self.lvis_paths = lvis_paths['val']
89
- if img_wh is not None:
90
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
- 'img_wh must both be multiples of 32!'
92
-
93
-
94
- pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
- with open(pose_json_path_narrow, 'r') as f:
96
- narrow_meta = json.load(f)
97
-
98
- pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
99
- with open(pose_json_path_two_stage, 'r') as f:
100
- two_stage_meta = json.load(f)
101
-
102
-
103
- self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
104
- self.img_wh = (256, 256)
105
- self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
106
- intrinsic = np.eye(4)
107
- assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
108
- intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
109
- self.intrinsic = intrinsic
110
- assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
111
- self.near_far = np.array(narrow_meta["near_far"])
112
- self.near_far[1] = 1.8
113
- self.define_transforms()
114
- self.blender2opencv = np.array(
115
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
- )
117
-
118
-
119
- self.c2ws = []
120
- self.w2cs = []
121
- self.near_fars = []
122
- for idx, img_id in enumerate(self.img_ids):
123
- pose = self.input_poses[idx]
124
- c2w = pose @ self.blender2opencv
125
- self.c2ws.append(c2w)
126
- self.w2cs.append(np.linalg.inv(c2w))
127
- self.near_fars.append(self.near_far)
128
-
129
-
130
-
131
- self.c2ws = np.stack(self.c2ws, axis=0)
132
- self.w2cs = np.stack(self.w2cs, axis=0)
133
-
134
-
135
- self.all_intrinsics = [] # the cam info of the whole scene
136
- self.all_extrinsics = []
137
- self.all_near_fars = []
138
- self.load_cam_info()
139
-
140
- # * bounding box for rendering
141
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
- self.bbox_max = np.array([1.0, 1.0, 1.0])
143
-
144
- # - used for cost volume regularization
145
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
-
148
-
149
- def define_transforms(self):
150
- self.transform = T.Compose([T.ToTensor()])
151
-
152
-
153
-
154
- def load_cam_info(self):
155
- for vid, img_id in enumerate(self.img_ids):
156
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
- self.all_intrinsics.append(intrinsic)
158
- self.all_extrinsics.append(extrinsic)
159
- self.all_near_fars.append(near_far)
160
-
161
- def read_depth(self, filename):
162
- pass
163
-
164
- def read_mask(self, filename):
165
- mask_h = cv2.imread(filename, 0)
166
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
- interpolation=cv2.INTER_NEAREST)
168
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
- interpolation=cv2.INTER_NEAREST)
170
-
171
- mask[mask > 0] = 1 # the masks stored in png are not binary
172
- mask_h[mask_h > 0] = 1
173
-
174
- return mask, mask_h
175
-
176
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
-
178
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
-
180
- radius = radius * factor
181
- scale_mat = np.diag([radius, radius, radius, 1.0])
182
- scale_mat[:3, 3] = center.cpu().numpy()
183
- scale_mat = scale_mat.astype(np.float32)
184
-
185
- return scale_mat, 1. / radius.cpu().numpy()
186
-
187
- def __len__(self):
188
- return self.imgs_per_instance * len(self.lvis_paths)
189
-
190
-
191
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
- pass
193
-
194
-
195
- def __getitem__(self, idx):
196
- sample = {}
197
- origin_idx = idx
198
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
- idx_original=idx
201
-
202
- folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
203
-
204
- folder_id = folder_uid_dict['folder_id']
205
- uid = folder_uid_dict['uid']
206
-
207
- if idx % self.imgs_per_instance < 8:
208
- idx = idx % self.imgs_per_instance # [0, 7]
209
- # target view
210
- c2w = self.c2ws[idx]
211
- w2c = np.linalg.inv(c2w)
212
- w2c_ref = w2c
213
- w2c_ref_inv = np.linalg.inv(w2c_ref)
214
-
215
- w2cs.append(w2c @ w2c_ref_inv)
216
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
217
-
218
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
219
-
220
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
221
-
222
-
223
- img = Image.open(img_filename)
224
-
225
- img = self.transform(img) # (4, h, w)
226
-
227
-
228
- if img.shape[0] == 4:
229
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
230
- imgs += [img]
231
-
232
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
233
- mask_h = depth_h > 0
234
- # print("valid pixels", np.sum(mask_h))
235
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
- surface_points = directions * depth_h[..., None] # [H, W, 3]
237
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
- depth_h = distance
239
-
240
-
241
- depths_h.append(depth_h)
242
- masks_h.append(mask_h)
243
-
244
- intrinsic = self.intrinsic
245
- intrinsics.append(intrinsic)
246
-
247
-
248
- near_fars.append(self.near_fars[idx])
249
- image_perm = 0 # only supervised on reference view
250
-
251
- mask_dilated = None
252
-
253
- # src_views = range(8+idx*4, 8+(idx+1)*4)
254
-
255
- src_views = range(8, 8 + 8 * 4)
256
- src_views_used = []
257
- for vid in src_views:
258
- src_views_used.append(vid)
259
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
260
-
261
- img = Image.open(img_filename)
262
- img_wh = self.img_wh
263
-
264
- img = self.transform(img)
265
- if img.shape[0] == 4:
266
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
267
-
268
- imgs += [img]
269
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
270
- depths_h.append(depth_h)
271
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
272
-
273
- near_fars.append(self.all_near_fars[vid])
274
- intrinsics.append(self.all_intrinsics[vid])
275
-
276
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
277
-
278
- else:
279
- idx = idx % self.imgs_per_instance - 8 # [0, 5]
280
-
281
- c2w = self.c2ws[idx + 40]
282
- w2c = np.linalg.inv(c2w)
283
- w2c_ref = w2c
284
- w2c_ref_inv = np.linalg.inv(w2c_ref)
285
-
286
- w2cs.append(w2c @ w2c_ref_inv)
287
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
288
-
289
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
290
-
291
-
292
- img = Image.open(img_filename)
293
-
294
- img = self.transform(img) # (4, h, w)
295
-
296
- # print("img_pre", img.shape)
297
- if img.shape[0] == 4:
298
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
299
- # print("img", img.shape)
300
- imgs += [img]
301
-
302
-
303
- depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
304
- depth_h = depth_h.fill_(-1.0)
305
- # depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0)
306
- # print("depth_h", depth_h.shape)
307
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
308
- depths_h.append(depth_h)
309
- masks_h.append(mask_h)
310
-
311
- intrinsic = self.intrinsic
312
- intrinsics.append(intrinsic)
313
-
314
-
315
- near_fars.append(self.near_fars[idx])
316
- image_perm = 0 # only supervised on reference view
317
-
318
- mask_dilated = None
319
-
320
-
321
-
322
- src_views = range(40+8, 40+8+32)
323
- src_views_used = []
324
- for vid in src_views:
325
- view_dix_to_use = (vid - 40 - 8) // 4
326
-
327
- src_views_used.append(vid)
328
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-48) % 4 + 1}.png')
329
-
330
- img = Image.open(img_filename)
331
- img_wh = self.img_wh
332
-
333
- img = self.transform(img)
334
- # print("img shape1: ", img.shape)
335
- if img.shape[0] == 4:
336
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
337
- # print("img shape2: ", img.shape)
338
- imgs += [img]
339
- depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
340
- depth_h = depth_h.fill_(-1.0)
341
- depths_h.append(depth_h)
342
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
343
-
344
- near_fars.append(self.all_near_fars[vid])
345
- intrinsics.append(self.all_intrinsics[vid])
346
-
347
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
348
-
349
-
350
- scale_mat, scale_factor = self.cal_scale_mat(
351
- img_hw=[img_wh[1], img_wh[0]],
352
- intrinsics=intrinsics, extrinsics=w2cs,
353
- near_fars=near_fars, factor=1.1
354
- )
355
-
356
-
357
- new_near_fars = []
358
- new_w2cs = []
359
- new_c2ws = []
360
- new_affine_mats = []
361
- new_depths_h = []
362
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
363
-
364
- P = intrinsic @ extrinsic @ scale_mat
365
- P = P[:3, :4]
366
- # - should use load_K_Rt_from_P() to obtain c2w
367
- c2w = load_K_Rt_from_P(None, P)[1]
368
- w2c = np.linalg.inv(c2w)
369
- new_w2cs.append(w2c)
370
- new_c2ws.append(c2w)
371
- affine_mat = np.eye(4)
372
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
373
- new_affine_mats.append(affine_mat)
374
-
375
- camera_o = c2w[:3, 3]
376
- dist = np.sqrt(np.sum(camera_o ** 2))
377
- near = dist - 1
378
- far = dist + 1
379
-
380
- new_near_fars.append([0.95 * near, 1.05 * far])
381
-
382
- new_depths_h.append(depth * scale_factor)
383
-
384
- # print(new_near_fars)
385
- # print("img numeber: ", len(imgs))
386
- imgs = torch.stack(imgs).float()
387
- depths_h = np.stack(new_depths_h)
388
- masks_h = np.stack(masks_h)
389
-
390
- affine_mats = np.stack(new_affine_mats)
391
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
392
- new_near_fars)
393
-
394
- if self.split == 'train':
395
- start_idx = 0
396
- else:
397
- start_idx = 1
398
-
399
- view_ids = [idx_original % self.imgs_per_instance] + src_views_used
400
- sample['origin_idx'] = origin_idx
401
- sample['images'] = imgs # (V, 3, H, W)
402
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
403
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
404
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
405
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
406
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
407
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
408
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
409
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
410
-
411
- # sample['light_idx'] = torch.tensor(light_idx)
412
- sample['scan'] = folder_id
413
-
414
- sample['scale_factor'] = torch.tensor(scale_factor)
415
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
416
- sample['render_img_idx'] = torch.tensor(image_perm)
417
- sample['partial_vol_origin'] = self.partial_vol_origin
418
- if view_ids[0] < 8:
419
- meta_end = "_narrow"+ "_refview" + str(view_ids[0])
420
- else:
421
- meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
422
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
423
-
424
-
425
- # - image to render
426
- sample['query_image'] = sample['images'][0]
427
- sample['query_c2w'] = sample['c2ws'][0]
428
- sample['query_w2c'] = sample['w2cs'][0]
429
- sample['query_intrinsic'] = sample['intrinsics'][0]
430
- sample['query_depth'] = sample['depths_h'][0]
431
- sample['query_mask'] = sample['masks_h'][0]
432
- sample['query_near_far'] = sample['near_fars'][0]
433
-
434
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
435
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
436
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
437
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
438
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
439
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
440
- sample['view_ids'] = sample['view_ids'][start_idx:]
441
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
442
-
443
- sample['scale_mat'] = torch.from_numpy(scale_mat)
444
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
445
-
446
- # - generate rays
447
- if ('val' in self.split) or ('test' in self.split):
448
- sample_rays = gen_rays_from_single_image(
449
- img_wh[1], img_wh[0],
450
- sample['query_image'],
451
- sample['query_intrinsic'],
452
- sample['query_c2w'],
453
- depth=sample['query_depth'],
454
- mask=sample['query_mask'] if self.clean_image else None)
455
- else:
456
- sample_rays = gen_random_rays_from_single_image(
457
- img_wh[1], img_wh[0],
458
- self.N_rays,
459
- sample['query_image'],
460
- sample['query_intrinsic'],
461
- sample['query_c2w'],
462
- depth=sample['query_depth'],
463
- mask=sample['query_mask'] if self.clean_image else None,
464
- dilated_mask=mask_dilated,
465
- importance_sample=self.importance_sample)
466
-
467
-
468
- sample['rays'] = sample_rays
469
-
470
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_8_wide_from_2_stage.py DELETED
@@ -1,395 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- self.root_dir = root_dir
70
- self.split = split
71
-
72
- self.imgs_per_instance = 8
73
-
74
- self.n_views = n_views
75
- self.N_rays = N_rays
76
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
77
-
78
- self.clean_image = clean_image
79
- self.importance_sample = importance_sample
80
- self.test_ref_views = test_ref_views # used for testing
81
- self.scale_factor = 1.0
82
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
83
-
84
- lvis_json_path = '/objaverse-processed/zero12345_img/random32_split.json' # folder_id and uid
85
- with open(lvis_json_path, 'r') as f:
86
- lvis_paths = json.load(f)
87
- if self.split == 'train':
88
- self.lvis_paths = lvis_paths['train']
89
- else:
90
- self.lvis_paths = lvis_paths['val']
91
- if img_wh is not None:
92
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
93
- 'img_wh must both be multiples of 32!'
94
-
95
-
96
- pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
97
- with open(pose_json_path_narrow, 'r') as f:
98
- narrow_meta = json.load(f)
99
-
100
- pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
101
- with open(pose_json_path_two_stage, 'r') as f:
102
- two_stage_meta = json.load(f)
103
-
104
-
105
- self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
106
- self.img_wh = (256, 256)
107
- self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
108
- intrinsic = np.eye(4)
109
- assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
110
- intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
111
- self.intrinsic = intrinsic
112
- assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
113
- self.near_far = np.array(narrow_meta["near_far"])
114
- self.near_far[1] = 1.8
115
- self.define_transforms()
116
- self.blender2opencv = np.array(
117
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
118
- )
119
-
120
-
121
- self.c2ws = []
122
- self.w2cs = []
123
- self.near_fars = []
124
- for idx, img_id in enumerate(self.img_ids):
125
- pose = self.input_poses[idx]
126
- c2w = pose @ self.blender2opencv
127
- self.c2ws.append(c2w)
128
- self.w2cs.append(np.linalg.inv(c2w))
129
- self.near_fars.append(self.near_far)
130
-
131
-
132
-
133
- self.c2ws = np.stack(self.c2ws, axis=0)
134
- self.w2cs = np.stack(self.w2cs, axis=0)
135
-
136
-
137
- self.all_intrinsics = [] # the cam info of the whole scene
138
- self.all_extrinsics = []
139
- self.all_near_fars = []
140
- self.load_cam_info()
141
-
142
- # * bounding box for rendering
143
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
144
- self.bbox_max = np.array([1.0, 1.0, 1.0])
145
-
146
- # - used for cost volume regularization
147
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
148
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
149
-
150
-
151
- def define_transforms(self):
152
- self.transform = T.Compose([T.ToTensor()])
153
-
154
-
155
-
156
- def load_cam_info(self):
157
- for vid, img_id in enumerate(self.img_ids):
158
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
159
- self.all_intrinsics.append(intrinsic)
160
- self.all_extrinsics.append(extrinsic)
161
- self.all_near_fars.append(near_far)
162
-
163
- def read_depth(self, filename):
164
- pass
165
-
166
- def read_mask(self, filename):
167
- mask_h = cv2.imread(filename, 0)
168
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
169
- interpolation=cv2.INTER_NEAREST)
170
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
171
- interpolation=cv2.INTER_NEAREST)
172
-
173
- mask[mask > 0] = 1 # the masks stored in png are not binary
174
- mask_h[mask_h > 0] = 1
175
-
176
- return mask, mask_h
177
-
178
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
179
-
180
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
181
-
182
- radius = radius * factor
183
- scale_mat = np.diag([radius, radius, radius, 1.0])
184
- scale_mat[:3, 3] = center.cpu().numpy()
185
- scale_mat = scale_mat.astype(np.float32)
186
-
187
- return scale_mat, 1. / radius.cpu().numpy()
188
-
189
- def __len__(self):
190
- return self.imgs_per_instance * len(self.lvis_paths)
191
-
192
-
193
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
194
- pass
195
-
196
-
197
- def __getitem__(self, idx):
198
- sample = {}
199
- origin_idx = idx
200
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
201
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
202
- idx_original=idx
203
-
204
- folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
205
-
206
- folder_id = folder_uid_dict['folder_id']
207
- uid = folder_uid_dict['uid']
208
-
209
- idx = idx % self.imgs_per_instance # [0, 7]
210
- # target view
211
- c2w = self.c2ws[idx]
212
- w2c = np.linalg.inv(c2w)
213
- w2c_ref = w2c
214
- w2c_ref_inv = np.linalg.inv(w2c_ref)
215
-
216
- w2cs.append(w2c @ w2c_ref_inv)
217
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
218
-
219
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
220
-
221
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
222
-
223
-
224
- img = Image.open(img_filename)
225
-
226
- img = self.transform(img) # (4, h, w)
227
-
228
-
229
- if img.shape[0] == 4:
230
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
231
- imgs += [img]
232
-
233
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
234
- mask_h = depth_h > 0
235
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
- surface_points = directions * depth_h[..., None] # [H, W, 3]
237
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
- depth_h = distance
239
-
240
-
241
- depths_h.append(depth_h)
242
- masks_h.append(mask_h)
243
-
244
- intrinsic = self.intrinsic
245
- intrinsics.append(intrinsic)
246
-
247
-
248
- near_fars.append(self.near_fars[idx])
249
- image_perm = 0 # only supervised on reference view
250
-
251
- mask_dilated = None
252
-
253
-
254
-
255
- src_views = range(0, 8)
256
- src_views_used = []
257
- for vid in src_views:
258
- src_views_used.append(vid)
259
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{vid}_0.png')
260
-
261
- img = Image.open(img_filename)
262
- img_wh = self.img_wh
263
-
264
- img = self.transform(img)
265
- if img.shape[0] == 4:
266
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
267
- imgs += [img]
268
- depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
269
- depth_h = depth_h.fill_(-1.0)
270
- depths_h.append(depth_h)
271
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
272
-
273
- near_fars.append(self.all_near_fars[vid])
274
- intrinsics.append(self.all_intrinsics[vid])
275
-
276
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
277
-
278
-
279
- scale_mat, scale_factor = self.cal_scale_mat(
280
- img_hw=[img_wh[1], img_wh[0]],
281
- intrinsics=intrinsics, extrinsics=w2cs,
282
- near_fars=near_fars, factor=1.1
283
- )
284
-
285
-
286
- new_near_fars = []
287
- new_w2cs = []
288
- new_c2ws = []
289
- new_affine_mats = []
290
- new_depths_h = []
291
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
292
-
293
- P = intrinsic @ extrinsic @ scale_mat
294
- P = P[:3, :4]
295
- # - should use load_K_Rt_from_P() to obtain c2w
296
- c2w = load_K_Rt_from_P(None, P)[1]
297
- w2c = np.linalg.inv(c2w)
298
- new_w2cs.append(w2c)
299
- new_c2ws.append(c2w)
300
- affine_mat = np.eye(4)
301
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
302
- new_affine_mats.append(affine_mat)
303
-
304
- camera_o = c2w[:3, 3]
305
- dist = np.sqrt(np.sum(camera_o ** 2))
306
- near = dist - 1
307
- far = dist + 1
308
-
309
- new_near_fars.append([0.95 * near, 1.05 * far])
310
-
311
- new_depths_h.append(depth * scale_factor)
312
-
313
-
314
- imgs = torch.stack(imgs).float()
315
- depths_h = np.stack(new_depths_h)
316
- masks_h = np.stack(masks_h)
317
-
318
- affine_mats = np.stack(new_affine_mats)
319
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
320
- new_near_fars)
321
-
322
- if self.split == 'train':
323
- start_idx = 0
324
- else:
325
- start_idx = 1
326
-
327
- view_ids = [idx_original % self.imgs_per_instance] + src_views_used
328
- sample['origin_idx'] = origin_idx
329
- sample['images'] = imgs # (V, 3, H, W)
330
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
331
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
332
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
333
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
334
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
335
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
336
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
337
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
338
-
339
- # sample['light_idx'] = torch.tensor(light_idx)
340
- sample['scan'] = folder_id
341
-
342
- sample['scale_factor'] = torch.tensor(scale_factor)
343
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
344
- sample['render_img_idx'] = torch.tensor(image_perm)
345
- sample['partial_vol_origin'] = self.partial_vol_origin
346
- meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
347
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
348
-
349
-
350
- # - image to render
351
- sample['query_image'] = sample['images'][0]
352
- sample['query_c2w'] = sample['c2ws'][0]
353
- sample['query_w2c'] = sample['w2cs'][0]
354
- sample['query_intrinsic'] = sample['intrinsics'][0]
355
- sample['query_depth'] = sample['depths_h'][0]
356
- sample['query_mask'] = sample['masks_h'][0]
357
- sample['query_near_far'] = sample['near_fars'][0]
358
-
359
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
360
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
361
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
362
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
363
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
364
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
365
- sample['view_ids'] = sample['view_ids'][start_idx:]
366
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
367
-
368
- sample['scale_mat'] = torch.from_numpy(scale_mat)
369
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
370
-
371
- # - generate rays
372
- if ('val' in self.split) or ('test' in self.split):
373
- sample_rays = gen_rays_from_single_image(
374
- img_wh[1], img_wh[0],
375
- sample['query_image'],
376
- sample['query_intrinsic'],
377
- sample['query_c2w'],
378
- depth=sample['query_depth'],
379
- mask=sample['query_mask'] if self.clean_image else None)
380
- else:
381
- sample_rays = gen_random_rays_from_single_image(
382
- img_wh[1], img_wh[0],
383
- self.N_rays,
384
- sample['query_image'],
385
- sample['query_intrinsic'],
386
- sample['query_c2w'],
387
- depth=sample['query_depth'],
388
- mask=sample['query_mask'] if self.clean_image else None,
389
- dilated_mask=mask_dilated,
390
- importance_sample=self.importance_sample)
391
-
392
-
393
- sample['rays'] = sample_rays
394
-
395
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_4_1_eval_new_data.py DELETED
@@ -1,418 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
-
18
-
19
- def get_ray_directions(H, W, focal, center=None):
20
- """
21
- Get ray directions for all pixels in camera coordinate.
22
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
- ray-tracing-generating-camera-rays/standard-coordinate-systems
24
- Inputs:
25
- H, W, focal: image height, width and focal length
26
- Outputs:
27
- directions: (H, W, 3), the direction of the rays in camera coordinate
28
- """
29
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
-
31
- i, j = grid.unbind(-1)
32
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
- # see https://github.com/bmild/nerf/issues/24
34
- cent = center if center is not None else [W / 2, H / 2]
35
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
-
37
- return directions
38
-
39
- def load_K_Rt_from_P(filename, P=None):
40
- if P is None:
41
- lines = open(filename).read().splitlines()
42
- if len(lines) == 4:
43
- lines = lines[1:]
44
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
- P = np.asarray(lines).astype(np.float32).squeeze()
46
-
47
- out = cv2.decomposeProjectionMatrix(P)
48
- K = out[0]
49
- R = out[1]
50
- t = out[2]
51
-
52
- K = K / K[2, 2]
53
- intrinsics = np.eye(4)
54
- intrinsics[:3, :3] = K
55
-
56
- pose = np.eye(4, dtype=np.float32)
57
- pose[:3, :3] = R.transpose() # ? why need transpose here
58
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
-
60
- return intrinsics, pose # ! return cam2world matrix here
61
-
62
-
63
- # ! load one ref-image with multiple src-images in camera coordinate system
64
- class BlenderPerView(Dataset):
65
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
- split_filepath=None, pair_filepath=None,
67
- N_rays=512,
68
- vol_dims=[128, 128, 128], batch_size=1,
69
- clean_image=False, importance_sample=False, test_ref_views=[],
70
- specific_dataset_name = 'GSO'
71
- ):
72
-
73
- # print("root_dir: ", root_dir)
74
- self.root_dir = root_dir
75
- self.split = split
76
- # self.specific_dataset_name = 'Realfusion'
77
- # self.specific_dataset_name = 'GSO'
78
- # self.specific_dataset_name = 'Objaverse'
79
- # self.specific_dataset_name = 'Zero123'
80
-
81
- self.specific_dataset_name = specific_dataset_name
82
- self.n_views = n_views
83
- self.N_rays = N_rays
84
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
-
86
- self.clean_image = clean_image
87
- self.importance_sample = importance_sample
88
- self.test_ref_views = test_ref_views # used for testing
89
- self.scale_factor = 1.0
90
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
- assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
- # find all subfolders
93
- main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
- self.shape_list = os.listdir(main_folder)
95
- self.shape_list.sort()
96
-
97
- # self.shape_list = ['barrel_render']
98
- # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
99
-
100
-
101
- self.lvis_paths = []
102
- for shape_name in self.shape_list:
103
- self.lvis_paths.append(os.path.join(main_folder, shape_name))
104
-
105
- # print("lvis_paths: ", self.lvis_paths)
106
-
107
- if img_wh is not None:
108
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
109
- 'img_wh must both be multiples of 32!'
110
-
111
-
112
- # * bounding box for rendering
113
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
114
- self.bbox_max = np.array([1.0, 1.0, 1.0])
115
-
116
- # - used for cost volume regularization
117
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
118
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
119
-
120
-
121
- def define_transforms(self):
122
- self.transform = T.Compose([T.ToTensor()])
123
-
124
-
125
-
126
- def load_cam_info(self):
127
- for vid, img_id in enumerate(self.img_ids):
128
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
129
- self.all_intrinsics.append(intrinsic)
130
- self.all_extrinsics.append(extrinsic)
131
- self.all_near_fars.append(near_far)
132
-
133
- def read_depth(self, filename):
134
- pass
135
-
136
- def read_mask(self, filename):
137
- mask_h = cv2.imread(filename, 0)
138
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
139
- interpolation=cv2.INTER_NEAREST)
140
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
141
- interpolation=cv2.INTER_NEAREST)
142
-
143
- mask[mask > 0] = 1 # the masks stored in png are not binary
144
- mask_h[mask_h > 0] = 1
145
-
146
- return mask, mask_h
147
-
148
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
149
-
150
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
151
-
152
- radius = radius * factor
153
- scale_mat = np.diag([radius, radius, radius, 1.0])
154
- scale_mat[:3, 3] = center.cpu().numpy()
155
- scale_mat = scale_mat.astype(np.float32)
156
-
157
- return scale_mat, 1. / radius.cpu().numpy()
158
-
159
- def __len__(self):
160
- return 8*len(self.lvis_paths)
161
- # return len(self.lvis_paths)
162
-
163
-
164
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
165
- pass
166
-
167
-
168
- def __getitem__(self, idx):
169
- sample = {}
170
- # idx = idx * 8 # to be deleted
171
- origin_idx = idx
172
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
173
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
174
-
175
- folder_path = self.lvis_paths[idx//8]
176
- idx = idx % 8 # [0, 7]
177
-
178
- # last subdir name
179
- shape_name = os.path.split(folder_path)[-1]
180
-
181
- pose_json_path = os.path.join(folder_path, "pose.json")
182
- with open(pose_json_path, 'r') as f:
183
- meta = json.load(f)
184
-
185
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
186
- self.img_wh = (256, 256)
187
- self.input_poses = np.array(list(meta["c2ws"].values()))
188
- intrinsic = np.eye(4)
189
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
190
- self.intrinsic = intrinsic
191
- self.near_far = np.array(meta["near_far"])
192
- self.near_far[1] = 1.8
193
- self.define_transforms()
194
- self.blender2opencv = np.array(
195
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
196
- )
197
-
198
- self.c2ws = []
199
- self.w2cs = []
200
- self.near_fars = []
201
- # self.root_dir = root_dir
202
- for image_dix, img_id in enumerate(self.img_ids):
203
- pose = self.input_poses[image_dix]
204
- c2w = pose @ self.blender2opencv
205
- self.c2ws.append(c2w)
206
- self.w2cs.append(np.linalg.inv(c2w))
207
- self.near_fars.append(self.near_far)
208
- self.c2ws = np.stack(self.c2ws, axis=0)
209
- self.w2cs = np.stack(self.w2cs, axis=0)
210
-
211
-
212
- self.all_intrinsics = [] # the cam info of the whole scene
213
- self.all_extrinsics = []
214
- self.all_near_fars = []
215
- self.load_cam_info()
216
-
217
-
218
- # target view
219
- c2w = self.c2ws[idx]
220
- w2c = np.linalg.inv(c2w)
221
- w2c_ref = w2c
222
- w2c_ref_inv = np.linalg.inv(w2c_ref)
223
-
224
- w2cs.append(w2c @ w2c_ref_inv)
225
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
226
-
227
- # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
228
- img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
229
-
230
- img = Image.open(img_filename)
231
- img = self.transform(img) # (4, h, w)
232
-
233
-
234
- if img.shape[0] == 4:
235
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
236
- imgs += [img]
237
-
238
-
239
- depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
240
- depth_h = depth_h.fill_(-1.0)
241
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
242
-
243
-
244
- depths_h.append(depth_h)
245
- masks_h.append(mask_h)
246
-
247
- intrinsic = self.intrinsic
248
- intrinsics.append(intrinsic)
249
-
250
-
251
- near_fars.append(self.near_fars[idx])
252
- image_perm = 0 # only supervised on reference view
253
-
254
- mask_dilated = None
255
-
256
-
257
- # src_views = range(8, 8 + 8 * 4)
258
- src_views = range(8+idx*4, 8+(idx+1)*4)
259
- for vid in src_views:
260
-
261
- # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
262
- img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
263
- img = Image.open(img_filename)
264
- img_wh = self.img_wh
265
-
266
- img = self.transform(img)
267
- if img.shape[0] == 4:
268
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
269
-
270
- imgs += [img]
271
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
272
- depths_h.append(depth_h)
273
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
274
-
275
- near_fars.append(self.all_near_fars[vid])
276
- intrinsics.append(self.all_intrinsics[vid])
277
-
278
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
279
-
280
-
281
- # ! estimate scale_mat
282
- scale_mat, scale_factor = self.cal_scale_mat(
283
- img_hw=[img_wh[1], img_wh[0]],
284
- intrinsics=intrinsics, extrinsics=w2cs,
285
- near_fars=near_fars, factor=1.1
286
- )
287
-
288
-
289
- new_near_fars = []
290
- new_w2cs = []
291
- new_c2ws = []
292
- new_affine_mats = []
293
- new_depths_h = []
294
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
295
-
296
- P = intrinsic @ extrinsic @ scale_mat
297
- P = P[:3, :4]
298
- # - should use load_K_Rt_from_P() to obtain c2w
299
- c2w = load_K_Rt_from_P(None, P)[1]
300
- w2c = np.linalg.inv(c2w)
301
- new_w2cs.append(w2c)
302
- new_c2ws.append(c2w)
303
- affine_mat = np.eye(4)
304
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
305
- new_affine_mats.append(affine_mat)
306
-
307
- camera_o = c2w[:3, 3]
308
- dist = np.sqrt(np.sum(camera_o ** 2))
309
- near = dist - 1
310
- far = dist + 1
311
-
312
- new_near_fars.append([0.95 * near, 1.05 * far])
313
- new_depths_h.append(depth * scale_factor)
314
-
315
- # print(new_near_fars)
316
- imgs = torch.stack(imgs).float()
317
- depths_h = np.stack(new_depths_h)
318
- masks_h = np.stack(masks_h)
319
-
320
- affine_mats = np.stack(new_affine_mats)
321
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
322
- new_near_fars)
323
-
324
- if self.split == 'train':
325
- start_idx = 0
326
- else:
327
- start_idx = 1
328
-
329
-
330
-
331
- target_w2cs = []
332
- target_intrinsics = []
333
- new_target_w2cs = []
334
- for i_idx in range(8):
335
- target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
336
- target_intrinsics.append(self.all_intrinsics[i_idx])
337
-
338
- for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
339
-
340
- P = intrinsic @ extrinsic @ scale_mat
341
- P = P[:3, :4]
342
- # - should use load_K_Rt_from_P() to obtain c2w
343
- c2w = load_K_Rt_from_P(None, P)[1]
344
- w2c = np.linalg.inv(c2w)
345
- new_target_w2cs.append(w2c)
346
- target_w2cs = np.stack(new_target_w2cs)
347
-
348
-
349
-
350
- view_ids = [idx] + list(src_views)
351
- sample['origin_idx'] = origin_idx
352
- sample['images'] = imgs # (V, 3, H, W)
353
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
354
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
355
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
356
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
357
- sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
358
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
359
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
360
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
361
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
362
-
363
- # sample['light_idx'] = torch.tensor(light_idx)
364
- sample['scan'] = shape_name
365
-
366
- sample['scale_factor'] = torch.tensor(scale_factor)
367
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
368
- sample['render_img_idx'] = torch.tensor(image_perm)
369
- sample['partial_vol_origin'] = self.partial_vol_origin
370
- sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
371
- # print("meta: ", sample['meta'])
372
-
373
- # - image to render
374
- sample['query_image'] = sample['images'][0]
375
- sample['query_c2w'] = sample['c2ws'][0]
376
- sample['query_w2c'] = sample['w2cs'][0]
377
- sample['query_intrinsic'] = sample['intrinsics'][0]
378
- sample['query_depth'] = sample['depths_h'][0]
379
- sample['query_mask'] = sample['masks_h'][0]
380
- sample['query_near_far'] = sample['near_fars'][0]
381
-
382
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
383
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
384
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
385
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
386
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
387
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
388
- sample['view_ids'] = sample['view_ids'][start_idx:]
389
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
390
-
391
- sample['scale_mat'] = torch.from_numpy(scale_mat)
392
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
393
-
394
- # - generate rays
395
- if ('val' in self.split) or ('test' in self.split):
396
- sample_rays = gen_rays_from_single_image(
397
- img_wh[1], img_wh[0],
398
- sample['query_image'],
399
- sample['query_intrinsic'],
400
- sample['query_c2w'],
401
- depth=sample['query_depth'],
402
- mask=sample['query_mask'] if self.clean_image else None)
403
- else:
404
- sample_rays = gen_random_rays_from_single_image(
405
- img_wh[1], img_wh[0],
406
- self.N_rays,
407
- sample['query_image'],
408
- sample['query_intrinsic'],
409
- sample['query_c2w'],
410
- depth=sample['query_depth'],
411
- mask=sample['query_mask'] if self.clean_image else None,
412
- dilated_mask=mask_dilated,
413
- importance_sample=self.importance_sample)
414
-
415
-
416
- sample['rays'] = sample_rays
417
-
418
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_6.py DELETED
@@ -1,399 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
- self.near_far[1] = 1.8
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- pass
155
-
156
- def read_mask(self, filename):
157
- mask_h = cv2.imread(filename, 0)
158
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- mask[mask > 0] = 1 # the masks stored in png are not binary
164
- mask_h[mask_h > 0] = 1
165
-
166
- return mask, mask_h
167
-
168
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
-
170
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
- # print("center", center)
172
- # print("radius", radius)
173
- # print("bounds", bounds)
174
- # import ipdb; ipdb.set_trace()
175
- radius = radius * factor
176
- scale_mat = np.diag([radius, radius, radius, 1.0])
177
- scale_mat[:3, 3] = center.cpu().numpy()
178
- scale_mat = scale_mat.astype(np.float32)
179
-
180
- return scale_mat, 1. / radius.cpu().numpy()
181
-
182
- def __len__(self):
183
- if self.split == 'train':
184
- return 6*len(self.lvis_paths)
185
- else:
186
- return 8*len(self.lvis_paths)
187
-
188
-
189
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
190
- pass
191
-
192
-
193
- def __getitem__(self, idx):
194
- sample = {}
195
- origin_idx = idx
196
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
197
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
198
-
199
- if self.split == 'train':
200
- folder_uid_dict = self.lvis_paths[idx//6]
201
- idx = idx % 6 # [0, 5]
202
- if idx == 4:
203
- idx = 5
204
- elif idx == 5:
205
- idx = 7
206
- else:
207
- folder_uid_dict = self.lvis_paths[idx//8]
208
- idx = idx % 8 # [0, 7]
209
-
210
- folder_id = folder_uid_dict['folder_id']
211
- uid = folder_uid_dict['uid']
212
-
213
-
214
- # target view
215
- c2w = self.c2ws[idx]
216
- w2c = np.linalg.inv(c2w)
217
- w2c_ref = w2c
218
- w2c_ref_inv = np.linalg.inv(w2c_ref)
219
-
220
- w2cs.append(w2c @ w2c_ref_inv)
221
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
222
-
223
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
224
-
225
- depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
226
-
227
-
228
- img = Image.open(img_filename)
229
-
230
- img = self.transform(img) # (4, h, w)
231
-
232
-
233
- if img.shape[0] == 4:
234
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
235
- imgs += [img]
236
-
237
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
238
- mask_h = depth_h > 0
239
- # print("valid pixels", np.sum(mask_h))
240
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
241
- surface_points = directions * depth_h[..., None] # [H, W, 3]
242
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
243
- depth_h = distance
244
-
245
-
246
- depths_h.append(depth_h)
247
- masks_h.append(mask_h)
248
-
249
- intrinsic = self.intrinsic
250
- intrinsics.append(intrinsic)
251
-
252
-
253
- near_fars.append(self.near_fars[idx])
254
- image_perm = 0 # only supervised on reference view
255
-
256
- mask_dilated = None
257
-
258
- # src_views = range(8+idx*4, 8+(idx+1)*4)
259
- src_views = range(8, 8 + 8 * 4)
260
-
261
- for vid in src_views:
262
- if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6):
263
- continue
264
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
265
-
266
- img = Image.open(img_filename)
267
- img_wh = self.img_wh
268
-
269
- img = self.transform(img)
270
- if img.shape[0] == 4:
271
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
272
-
273
- imgs += [img]
274
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
275
- depths_h.append(depth_h)
276
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
277
-
278
- near_fars.append(self.all_near_fars[vid])
279
- intrinsics.append(self.all_intrinsics[vid])
280
-
281
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
282
- # print("len(imges)", len(imgs))
283
-
284
- # ! estimate scale_mat
285
- scale_mat, scale_factor = self.cal_scale_mat(
286
- img_hw=[img_wh[1], img_wh[0]],
287
- intrinsics=intrinsics, extrinsics=w2cs,
288
- near_fars=near_fars, factor=1.1
289
- )
290
-
291
-
292
- new_near_fars = []
293
- new_w2cs = []
294
- new_c2ws = []
295
- new_affine_mats = []
296
- new_depths_h = []
297
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
298
-
299
- P = intrinsic @ extrinsic @ scale_mat
300
- P = P[:3, :4]
301
- # - should use load_K_Rt_from_P() to obtain c2w
302
- c2w = load_K_Rt_from_P(None, P)[1]
303
- w2c = np.linalg.inv(c2w)
304
- new_w2cs.append(w2c)
305
- new_c2ws.append(c2w)
306
- affine_mat = np.eye(4)
307
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
308
- new_affine_mats.append(affine_mat)
309
-
310
- camera_o = c2w[:3, 3]
311
- dist = np.sqrt(np.sum(camera_o ** 2))
312
- near = dist - 1
313
- far = dist + 1
314
-
315
- new_near_fars.append([0.95 * near, 1.05 * far])
316
- new_depths_h.append(depth * scale_factor)
317
-
318
- # print(new_near_fars)
319
- imgs = torch.stack(imgs).float()
320
- depths_h = np.stack(new_depths_h)
321
- masks_h = np.stack(masks_h)
322
-
323
- affine_mats = np.stack(new_affine_mats)
324
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
325
- new_near_fars)
326
-
327
- if self.split == 'train':
328
- start_idx = 0
329
- else:
330
- start_idx = 1
331
-
332
- view_ids = [idx] + list(src_views)
333
- sample['origin_idx'] = origin_idx
334
- sample['images'] = imgs # (V, 3, H, W)
335
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
336
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
337
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
338
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
339
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
340
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
341
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
342
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
343
-
344
- # sample['light_idx'] = torch.tensor(light_idx)
345
- sample['scan'] = folder_id
346
-
347
- sample['scale_factor'] = torch.tensor(scale_factor)
348
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
349
- sample['render_img_idx'] = torch.tensor(image_perm)
350
- sample['partial_vol_origin'] = self.partial_vol_origin
351
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
352
-
353
-
354
- # - image to render
355
- sample['query_image'] = sample['images'][0]
356
- sample['query_c2w'] = sample['c2ws'][0]
357
- sample['query_w2c'] = sample['w2cs'][0]
358
- sample['query_intrinsic'] = sample['intrinsics'][0]
359
- sample['query_depth'] = sample['depths_h'][0]
360
- sample['query_mask'] = sample['masks_h'][0]
361
- sample['query_near_far'] = sample['near_fars'][0]
362
-
363
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
364
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
365
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
366
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
367
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
368
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
369
- sample['view_ids'] = sample['view_ids'][start_idx:]
370
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
371
-
372
- sample['scale_mat'] = torch.from_numpy(scale_mat)
373
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
374
-
375
- # - generate rays
376
- if ('val' in self.split) or ('test' in self.split):
377
- sample_rays = gen_rays_from_single_image(
378
- img_wh[1], img_wh[0],
379
- sample['query_image'],
380
- sample['query_intrinsic'],
381
- sample['query_c2w'],
382
- depth=sample['query_depth'],
383
- mask=sample['query_mask'] if self.clean_image else None)
384
- else:
385
- sample_rays = gen_random_rays_from_single_image(
386
- img_wh[1], img_wh[0],
387
- self.N_rays,
388
- sample['query_image'],
389
- sample['query_intrinsic'],
390
- sample['query_c2w'],
391
- depth=sample['query_depth'],
392
- mask=sample['query_mask'] if self.clean_image else None,
393
- dilated_mask=mask_dilated,
394
- importance_sample=self.importance_sample)
395
-
396
-
397
- sample['rays'] = sample_rays
398
-
399
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_8_3_fixed.py DELETED
@@ -1,393 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
- self.near_far[1] = 1.8
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- pass
155
-
156
- def read_mask(self, filename):
157
- mask_h = cv2.imread(filename, 0)
158
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- mask[mask > 0] = 1 # the masks stored in png are not binary
164
- mask_h[mask_h > 0] = 1
165
-
166
- return mask, mask_h
167
-
168
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
-
170
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
- # print("center", center)
172
- # print("radius", radius)
173
- # print("bounds", bounds)
174
- # import ipdb; ipdb.set_trace()
175
- radius = radius * factor
176
- scale_mat = np.diag([radius, radius, radius, 1.0])
177
- scale_mat[:3, 3] = center.cpu().numpy()
178
- scale_mat = scale_mat.astype(np.float32)
179
-
180
- return scale_mat, 1. / radius.cpu().numpy()
181
-
182
- def __len__(self):
183
- return 8*len(self.lvis_paths)
184
-
185
-
186
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
- pass
188
-
189
-
190
- def __getitem__(self, idx):
191
- sample = {}
192
- origin_idx = idx
193
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
194
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
195
-
196
-
197
- folder_uid_dict = self.lvis_paths[idx//8]
198
- idx = idx % 8 # [0, 7]
199
- folder_id = folder_uid_dict['folder_id']
200
- uid = folder_uid_dict['uid']
201
-
202
-
203
- # target view
204
- c2w = self.c2ws[idx]
205
- w2c = np.linalg.inv(c2w)
206
- w2c_ref = w2c
207
- w2c_ref_inv = np.linalg.inv(w2c_ref)
208
-
209
- w2cs.append(w2c @ w2c_ref_inv)
210
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
211
-
212
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
213
-
214
- depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
215
-
216
-
217
- img = Image.open(img_filename)
218
-
219
- img = self.transform(img) # (4, h, w)
220
-
221
-
222
- if img.shape[0] == 4:
223
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
224
- imgs += [img]
225
-
226
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
227
- mask_h = depth_h > 0
228
- # print("valid pixels", np.sum(mask_h))
229
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
230
- surface_points = directions * depth_h[..., None] # [H, W, 3]
231
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
232
- depth_h = distance
233
-
234
-
235
- depths_h.append(depth_h)
236
- masks_h.append(mask_h)
237
-
238
- intrinsic = self.intrinsic
239
- intrinsics.append(intrinsic)
240
-
241
-
242
- near_fars.append(self.near_fars[idx])
243
- image_perm = 0 # only supervised on reference view
244
-
245
- mask_dilated = None
246
-
247
- # src_views = range(8+idx*4, 8+(idx+1)*4)
248
- src_views = list()
249
- for i in range(8):
250
- # randomly choose 3 different number from [0,3]
251
- # local_idxs = np.random.choice(4, 3, replace=False)
252
- local_idxs = [0, 2, 3]
253
- # local_idxs = np.random.choice(4, 3, replace=False)
254
-
255
- local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs]
256
- src_views += local_idxs
257
- for vid in src_views:
258
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
259
-
260
- img = Image.open(img_filename)
261
- img_wh = self.img_wh
262
-
263
- img = self.transform(img)
264
- if img.shape[0] == 4:
265
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
266
-
267
- imgs += [img]
268
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
269
- depths_h.append(depth_h)
270
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
271
-
272
- near_fars.append(self.all_near_fars[vid])
273
- intrinsics.append(self.all_intrinsics[vid])
274
-
275
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
276
-
277
- # print("len(imgs)", len(imgs))
278
- # ! estimate scale_mat
279
- scale_mat, scale_factor = self.cal_scale_mat(
280
- img_hw=[img_wh[1], img_wh[0]],
281
- intrinsics=intrinsics, extrinsics=w2cs,
282
- near_fars=near_fars, factor=1.1
283
- )
284
-
285
-
286
- new_near_fars = []
287
- new_w2cs = []
288
- new_c2ws = []
289
- new_affine_mats = []
290
- new_depths_h = []
291
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
292
-
293
- P = intrinsic @ extrinsic @ scale_mat
294
- P = P[:3, :4]
295
- # - should use load_K_Rt_from_P() to obtain c2w
296
- c2w = load_K_Rt_from_P(None, P)[1]
297
- w2c = np.linalg.inv(c2w)
298
- new_w2cs.append(w2c)
299
- new_c2ws.append(c2w)
300
- affine_mat = np.eye(4)
301
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
302
- new_affine_mats.append(affine_mat)
303
-
304
- camera_o = c2w[:3, 3]
305
- dist = np.sqrt(np.sum(camera_o ** 2))
306
- near = dist - 1
307
- far = dist + 1
308
-
309
- new_near_fars.append([0.95 * near, 1.05 * far])
310
- new_depths_h.append(depth * scale_factor)
311
-
312
- # print(new_near_fars)
313
- imgs = torch.stack(imgs).float()
314
- depths_h = np.stack(new_depths_h)
315
- masks_h = np.stack(masks_h)
316
-
317
- affine_mats = np.stack(new_affine_mats)
318
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
319
- new_near_fars)
320
-
321
- if self.split == 'train':
322
- start_idx = 0
323
- else:
324
- start_idx = 1
325
-
326
- view_ids = [idx] + list(src_views)
327
- sample['origin_idx'] = origin_idx
328
- sample['images'] = imgs # (V, 3, H, W)
329
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
330
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
331
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
332
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
333
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
334
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
335
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
336
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
337
-
338
- # sample['light_idx'] = torch.tensor(light_idx)
339
- sample['scan'] = folder_id
340
-
341
- sample['scale_factor'] = torch.tensor(scale_factor)
342
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
343
- sample['render_img_idx'] = torch.tensor(image_perm)
344
- sample['partial_vol_origin'] = self.partial_vol_origin
345
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
346
-
347
-
348
- # - image to render
349
- sample['query_image'] = sample['images'][0]
350
- sample['query_c2w'] = sample['c2ws'][0]
351
- sample['query_w2c'] = sample['w2cs'][0]
352
- sample['query_intrinsic'] = sample['intrinsics'][0]
353
- sample['query_depth'] = sample['depths_h'][0]
354
- sample['query_mask'] = sample['masks_h'][0]
355
- sample['query_near_far'] = sample['near_fars'][0]
356
-
357
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
358
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
359
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
360
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
361
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
362
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
363
- sample['view_ids'] = sample['view_ids'][start_idx:]
364
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
365
-
366
- sample['scale_mat'] = torch.from_numpy(scale_mat)
367
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
368
-
369
- # - generate rays
370
- if ('val' in self.split) or ('test' in self.split):
371
- sample_rays = gen_rays_from_single_image(
372
- img_wh[1], img_wh[0],
373
- sample['query_image'],
374
- sample['query_intrinsic'],
375
- sample['query_c2w'],
376
- depth=sample['query_depth'],
377
- mask=sample['query_mask'] if self.clean_image else None)
378
- else:
379
- sample_rays = gen_random_rays_from_single_image(
380
- img_wh[1], img_wh[0],
381
- self.N_rays,
382
- sample['query_image'],
383
- sample['query_intrinsic'],
384
- sample['query_c2w'],
385
- depth=sample['query_depth'],
386
- mask=sample['query_mask'] if self.clean_image else None,
387
- dilated_mask=mask_dilated,
388
- importance_sample=self.importance_sample)
389
-
390
-
391
- sample['rays'] = sample_rays
392
-
393
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_8_3_random.py DELETED
@@ -1,395 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
- self.near_far[1] = 1.8
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- pass
155
-
156
- def read_mask(self, filename):
157
- mask_h = cv2.imread(filename, 0)
158
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- mask[mask > 0] = 1 # the masks stored in png are not binary
164
- mask_h[mask_h > 0] = 1
165
-
166
- return mask, mask_h
167
-
168
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
-
170
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
- # print("center", center)
172
- # print("radius", radius)
173
- # print("bounds", bounds)
174
- # import ipdb; ipdb.set_trace()
175
- radius = radius * factor
176
- scale_mat = np.diag([radius, radius, radius, 1.0])
177
- scale_mat[:3, 3] = center.cpu().numpy()
178
- scale_mat = scale_mat.astype(np.float32)
179
-
180
- return scale_mat, 1. / radius.cpu().numpy()
181
-
182
- def __len__(self):
183
- return 8*len(self.lvis_paths)
184
-
185
-
186
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
- pass
188
-
189
-
190
- def __getitem__(self, idx):
191
- sample = {}
192
- origin_idx = idx
193
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
194
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
195
-
196
-
197
- folder_uid_dict = self.lvis_paths[idx//8]
198
- idx = idx % 8 # [0, 7]
199
- folder_id = folder_uid_dict['folder_id']
200
- uid = folder_uid_dict['uid']
201
-
202
-
203
- # target view
204
- c2w = self.c2ws[idx]
205
- w2c = np.linalg.inv(c2w)
206
- w2c_ref = w2c
207
- w2c_ref_inv = np.linalg.inv(w2c_ref)
208
-
209
- w2cs.append(w2c @ w2c_ref_inv)
210
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
211
-
212
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
213
-
214
- depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
215
-
216
-
217
- img = Image.open(img_filename)
218
-
219
- img = self.transform(img) # (4, h, w)
220
-
221
-
222
- if img.shape[0] == 4:
223
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
224
- imgs += [img]
225
-
226
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
227
- mask_h = depth_h > 0
228
- # print("valid pixels", np.sum(mask_h))
229
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
230
- surface_points = directions * depth_h[..., None] # [H, W, 3]
231
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
232
- depth_h = distance
233
-
234
-
235
- depths_h.append(depth_h)
236
- masks_h.append(mask_h)
237
-
238
- intrinsic = self.intrinsic
239
- intrinsics.append(intrinsic)
240
-
241
-
242
- near_fars.append(self.near_fars[idx])
243
- image_perm = 0 # only supervised on reference view
244
-
245
- mask_dilated = None
246
-
247
- # src_views = range(8+idx*4, 8+(idx+1)*4)
248
- src_views = list()
249
- for i in range(8):
250
-
251
- if self.split == 'train':
252
- local_idxs = np.random.choice(4, 3, replace=False)
253
- else:
254
- local_idxs = [0, 2, 3]
255
- # local_idxs = np.random.choice(4, 3, replace=False)
256
-
257
- local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs]
258
- src_views += local_idxs
259
- for vid in src_views:
260
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
261
-
262
- img = Image.open(img_filename)
263
- img_wh = self.img_wh
264
-
265
- img = self.transform(img)
266
- if img.shape[0] == 4:
267
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
268
-
269
- imgs += [img]
270
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
271
- depths_h.append(depth_h)
272
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
273
-
274
- near_fars.append(self.all_near_fars[vid])
275
- intrinsics.append(self.all_intrinsics[vid])
276
-
277
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
278
-
279
- # print("len(imgs)", len(imgs))
280
- # ! estimate scale_mat
281
- scale_mat, scale_factor = self.cal_scale_mat(
282
- img_hw=[img_wh[1], img_wh[0]],
283
- intrinsics=intrinsics, extrinsics=w2cs,
284
- near_fars=near_fars, factor=1.1
285
- )
286
-
287
-
288
- new_near_fars = []
289
- new_w2cs = []
290
- new_c2ws = []
291
- new_affine_mats = []
292
- new_depths_h = []
293
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
294
-
295
- P = intrinsic @ extrinsic @ scale_mat
296
- P = P[:3, :4]
297
- # - should use load_K_Rt_from_P() to obtain c2w
298
- c2w = load_K_Rt_from_P(None, P)[1]
299
- w2c = np.linalg.inv(c2w)
300
- new_w2cs.append(w2c)
301
- new_c2ws.append(c2w)
302
- affine_mat = np.eye(4)
303
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
304
- new_affine_mats.append(affine_mat)
305
-
306
- camera_o = c2w[:3, 3]
307
- dist = np.sqrt(np.sum(camera_o ** 2))
308
- near = dist - 1
309
- far = dist + 1
310
-
311
- new_near_fars.append([0.95 * near, 1.05 * far])
312
- new_depths_h.append(depth * scale_factor)
313
-
314
- # print(new_near_fars)
315
- imgs = torch.stack(imgs).float()
316
- depths_h = np.stack(new_depths_h)
317
- masks_h = np.stack(masks_h)
318
-
319
- affine_mats = np.stack(new_affine_mats)
320
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
321
- new_near_fars)
322
-
323
- if self.split == 'train':
324
- start_idx = 0
325
- else:
326
- start_idx = 1
327
-
328
- view_ids = [idx] + list(src_views)
329
- sample['origin_idx'] = origin_idx
330
- sample['images'] = imgs # (V, 3, H, W)
331
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
332
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
333
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
334
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
335
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
336
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
337
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
338
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
339
-
340
- # sample['light_idx'] = torch.tensor(light_idx)
341
- sample['scan'] = folder_id
342
-
343
- sample['scale_factor'] = torch.tensor(scale_factor)
344
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
345
- sample['render_img_idx'] = torch.tensor(image_perm)
346
- sample['partial_vol_origin'] = self.partial_vol_origin
347
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
348
-
349
-
350
- # - image to render
351
- sample['query_image'] = sample['images'][0]
352
- sample['query_c2w'] = sample['c2ws'][0]
353
- sample['query_w2c'] = sample['w2cs'][0]
354
- sample['query_intrinsic'] = sample['intrinsics'][0]
355
- sample['query_depth'] = sample['depths_h'][0]
356
- sample['query_mask'] = sample['masks_h'][0]
357
- sample['query_near_far'] = sample['near_fars'][0]
358
-
359
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
360
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
361
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
362
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
363
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
364
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
365
- sample['view_ids'] = sample['view_ids'][start_idx:]
366
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
367
-
368
- sample['scale_mat'] = torch.from_numpy(scale_mat)
369
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
370
-
371
- # - generate rays
372
- if ('val' in self.split) or ('test' in self.split):
373
- sample_rays = gen_rays_from_single_image(
374
- img_wh[1], img_wh[0],
375
- sample['query_image'],
376
- sample['query_intrinsic'],
377
- sample['query_c2w'],
378
- depth=sample['query_depth'],
379
- mask=sample['query_mask'] if self.clean_image else None)
380
- else:
381
- sample_rays = gen_random_rays_from_single_image(
382
- img_wh[1], img_wh[0],
383
- self.N_rays,
384
- sample['query_image'],
385
- sample['query_intrinsic'],
386
- sample['query_c2w'],
387
- depth=sample['query_depth'],
388
- mask=sample['query_mask'] if self.clean_image else None,
389
- dilated_mask=mask_dilated,
390
- importance_sample=self.importance_sample)
391
-
392
-
393
- sample['rays'] = sample_rays
394
-
395
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_8_4_random_shading.py DELETED
@@ -1,432 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
- self.near_far[1] = 1.8
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- pass
155
-
156
- def read_mask(self, filename):
157
- mask_h = cv2.imread(filename, 0)
158
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- mask[mask > 0] = 1 # the masks stored in png are not binary
164
- mask_h[mask_h > 0] = 1
165
-
166
- return mask, mask_h
167
-
168
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
-
170
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
- # print("center", center)
172
- # print("radius", radius)
173
- # print("bounds", bounds)
174
- # import ipdb; ipdb.set_trace()
175
- radius = radius * factor
176
- scale_mat = np.diag([radius, radius, radius, 1.0])
177
- scale_mat[:3, 3] = center.cpu().numpy()
178
- scale_mat = scale_mat.astype(np.float32)
179
-
180
- return scale_mat, 1. / radius.cpu().numpy()
181
-
182
- def __len__(self):
183
- return 8*len(self.lvis_paths)
184
-
185
-
186
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
- pass
188
-
189
-
190
- def __getitem__(self, idx):
191
- sample = {}
192
- origin_idx = idx
193
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
194
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
195
-
196
-
197
- folder_uid_dict = self.lvis_paths[idx//8]
198
- idx = idx % 8 # [0, 7]
199
- folder_id = folder_uid_dict['folder_id']
200
- uid = folder_uid_dict['uid']
201
-
202
-
203
- # target view
204
- c2w = self.c2ws[idx]
205
- w2c = np.linalg.inv(c2w)
206
- w2c_ref = w2c
207
- w2c_ref_inv = np.linalg.inv(w2c_ref)
208
-
209
- w2cs.append(w2c @ w2c_ref_inv)
210
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
211
-
212
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
213
-
214
- depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
215
-
216
-
217
- img = Image.open(img_filename)
218
-
219
- img = self.transform(img) # (4, h, w)
220
-
221
-
222
- if img.shape[0] == 4:
223
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
224
- imgs += [img]
225
-
226
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
227
- mask_h = depth_h > 0
228
- # print("valid pixels", np.sum(mask_h))
229
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
230
- surface_points = directions * depth_h[..., None] # [H, W, 3]
231
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
232
- depth_h = distance
233
-
234
-
235
- depths_h.append(depth_h)
236
- masks_h.append(mask_h)
237
-
238
- intrinsic = self.intrinsic
239
- intrinsics.append(intrinsic)
240
-
241
-
242
- near_fars.append(self.near_fars[idx])
243
- image_perm = 0 # only supervised on reference view
244
-
245
- mask_dilated = None
246
-
247
- # src_views = range(8+idx*4, 8+(idx+1)*4)
248
- src_views = range(8, 8 + 8 * 4)
249
-
250
- for vid in src_views:
251
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
252
-
253
- img = Image.open(img_filename)
254
- img_wh = self.img_wh
255
-
256
- img = self.transform(img)
257
- if img.shape[0] == 4:
258
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
259
-
260
- imgs += [img]
261
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
262
- depths_h.append(depth_h)
263
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
264
-
265
- near_fars.append(self.all_near_fars[vid])
266
- intrinsics.append(self.all_intrinsics[vid])
267
-
268
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
269
-
270
-
271
- # ! estimate scale_mat
272
- scale_mat, scale_factor = self.cal_scale_mat(
273
- img_hw=[img_wh[1], img_wh[0]],
274
- intrinsics=intrinsics, extrinsics=w2cs,
275
- near_fars=near_fars, factor=1.1
276
- )
277
-
278
-
279
- new_near_fars = []
280
- new_w2cs = []
281
- new_c2ws = []
282
- new_affine_mats = []
283
- new_depths_h = []
284
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
285
-
286
- P = intrinsic @ extrinsic @ scale_mat
287
- P = P[:3, :4]
288
- # - should use load_K_Rt_from_P() to obtain c2w
289
- c2w = load_K_Rt_from_P(None, P)[1]
290
- w2c = np.linalg.inv(c2w)
291
- new_w2cs.append(w2c)
292
- new_c2ws.append(c2w)
293
- affine_mat = np.eye(4)
294
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
295
- new_affine_mats.append(affine_mat)
296
-
297
- camera_o = c2w[:3, 3]
298
- dist = np.sqrt(np.sum(camera_o ** 2))
299
- near = dist - 1
300
- far = dist + 1
301
-
302
- new_near_fars.append([0.95 * near, 1.05 * far])
303
- new_depths_h.append(depth * scale_factor)
304
-
305
- if self.split == 'train':
306
- # randomly select one view from eight views as reference view
307
- idx_to_select = np.random.randint(0, 8)
308
-
309
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx_to_select}.png')
310
- img = Image.open(img_filename)
311
- img = self.transform(img) # (4, h, w)
312
-
313
- if img.shape[0] == 4:
314
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
315
-
316
- imgs[0] = img
317
-
318
- w2c_selected = self.all_extrinsics[idx_to_select] @ w2c_ref_inv
319
- P = self.all_intrinsics[idx_to_select] @ w2c_selected @ scale_mat
320
- P = P[:3, :4]
321
-
322
- c2w = load_K_Rt_from_P(None, P)[1]
323
- w2c = np.linalg.inv(c2w)
324
- affine_mat = np.eye(4)
325
- affine_mat[:3, :4] = self.all_intrinsics[idx_to_select][:3, :3] @ w2c[:3, :4]
326
- new_affine_mats[0] = affine_mat
327
- camera_o = c2w[:3, 3]
328
- dist = np.sqrt(np.sum(camera_o ** 2))
329
- near = dist - 1
330
- far = dist + 1
331
- new_near_fars[0] = [0.95 * near, 1.05 * far]
332
-
333
- new_w2cs[0] = w2c
334
- new_c2ws[0] = c2w
335
-
336
- depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx_to_select}_depth_mm.png'))
337
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
338
- mask_h = depth_h > 0
339
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
340
- surface_points = directions * depth_h[..., None] # [H, W, 3]
341
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
342
- depth_h = distance * scale_factor
343
-
344
- new_depths_h[0] = depth_h
345
- masks_h[0] = mask_h
346
-
347
-
348
-
349
- # print(new_near_fars)
350
- imgs = torch.stack(imgs).float()
351
- depths_h = np.stack(new_depths_h)
352
- masks_h = np.stack(masks_h)
353
-
354
- affine_mats = np.stack(new_affine_mats)
355
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
356
- new_near_fars)
357
-
358
- if self.split == 'train':
359
- start_idx = 0
360
- else:
361
- start_idx = 1
362
-
363
-
364
- view_ids = [idx] + list(src_views)
365
- sample['origin_idx'] = origin_idx
366
- sample['images'] = imgs # (V, 3, H, W)
367
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
368
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
369
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
370
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
371
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
372
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
373
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
374
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
375
-
376
- # sample['light_idx'] = torch.tensor(light_idx)
377
- sample['scan'] = folder_id
378
-
379
- sample['scale_factor'] = torch.tensor(scale_factor)
380
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
381
- sample['render_img_idx'] = torch.tensor(image_perm)
382
- sample['partial_vol_origin'] = self.partial_vol_origin
383
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
384
-
385
-
386
- # - image to render
387
- sample['query_image'] = sample['images'][0]
388
- sample['query_c2w'] = sample['c2ws'][0]
389
- sample['query_w2c'] = sample['w2cs'][0]
390
- sample['query_intrinsic'] = sample['intrinsics'][0]
391
- sample['query_depth'] = sample['depths_h'][0]
392
- sample['query_mask'] = sample['masks_h'][0]
393
- sample['query_near_far'] = sample['near_fars'][0]
394
-
395
-
396
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
397
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
398
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
399
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
400
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
401
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
402
- sample['view_ids'] = sample['view_ids'][start_idx:]
403
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
404
-
405
- sample['scale_mat'] = torch.from_numpy(scale_mat)
406
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
407
-
408
- # - generate rays
409
- if ('val' in self.split) or ('test' in self.split):
410
- sample_rays = gen_rays_from_single_image(
411
- img_wh[1], img_wh[0],
412
- sample['query_image'],
413
- sample['query_intrinsic'],
414
- sample['query_c2w'],
415
- depth=sample['query_depth'],
416
- mask=sample['query_mask'] if self.clean_image else None)
417
- else:
418
- sample_rays = gen_random_rays_from_single_image(
419
- img_wh[1], img_wh[0],
420
- self.N_rays,
421
- sample['query_image'],
422
- sample['query_intrinsic'],
423
- sample['query_c2w'],
424
- depth=sample['query_depth'],
425
- mask=sample['query_mask'] if self.clean_image else None,
426
- dilated_mask=mask_dilated,
427
- importance_sample=self.importance_sample)
428
-
429
-
430
- sample['rays'] = sample_rays
431
-
432
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all.py DELETED
@@ -1,386 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
- self.near_far[1] = 1.8
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- pass
155
-
156
- def read_mask(self, filename):
157
- mask_h = cv2.imread(filename, 0)
158
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- mask[mask > 0] = 1 # the masks stored in png are not binary
164
- mask_h[mask_h > 0] = 1
165
-
166
- return mask, mask_h
167
-
168
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
-
170
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
- # print("center", center)
172
- # print("radius", radius)
173
- # print("bounds", bounds)
174
- # import ipdb; ipdb.set_trace()
175
- radius = radius * factor
176
- scale_mat = np.diag([radius, radius, radius, 1.0])
177
- scale_mat[:3, 3] = center.cpu().numpy()
178
- scale_mat = scale_mat.astype(np.float32)
179
-
180
- return scale_mat, 1. / radius.cpu().numpy()
181
-
182
- def __len__(self):
183
- return 8*len(self.lvis_paths)
184
-
185
-
186
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
- pass
188
-
189
-
190
- def __getitem__(self, idx):
191
- sample = {}
192
- origin_idx = idx
193
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
194
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
195
-
196
-
197
- folder_uid_dict = self.lvis_paths[idx//8]
198
- idx = idx % 8 # [0, 7]
199
- folder_id = folder_uid_dict['folder_id']
200
- uid = folder_uid_dict['uid']
201
-
202
-
203
- # target view
204
- c2w = self.c2ws[idx]
205
- w2c = np.linalg.inv(c2w)
206
- w2c_ref = w2c
207
- w2c_ref_inv = np.linalg.inv(w2c_ref)
208
-
209
- w2cs.append(w2c @ w2c_ref_inv)
210
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
211
-
212
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
213
-
214
- depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
215
-
216
-
217
- img = Image.open(img_filename)
218
-
219
- img = self.transform(img) # (4, h, w)
220
-
221
-
222
- if img.shape[0] == 4:
223
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
224
- imgs += [img]
225
-
226
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
227
- mask_h = depth_h > 0
228
- # print("valid pixels", np.sum(mask_h))
229
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
230
- surface_points = directions * depth_h[..., None] # [H, W, 3]
231
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
232
- depth_h = distance
233
-
234
-
235
- depths_h.append(depth_h)
236
- masks_h.append(mask_h)
237
-
238
- intrinsic = self.intrinsic
239
- intrinsics.append(intrinsic)
240
-
241
-
242
- near_fars.append(self.near_fars[idx])
243
- image_perm = 0 # only supervised on reference view
244
-
245
- mask_dilated = None
246
-
247
- # src_views = range(8+idx*4, 8+(idx+1)*4)
248
- src_views = range(8, 8 + 8 * 4)
249
-
250
- for vid in src_views:
251
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
252
-
253
- img = Image.open(img_filename)
254
- img_wh = self.img_wh
255
-
256
- img = self.transform(img)
257
- if img.shape[0] == 4:
258
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
259
-
260
- imgs += [img]
261
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
262
- depths_h.append(depth_h)
263
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
264
-
265
- near_fars.append(self.all_near_fars[vid])
266
- intrinsics.append(self.all_intrinsics[vid])
267
-
268
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
269
-
270
-
271
- # ! estimate scale_mat
272
- scale_mat, scale_factor = self.cal_scale_mat(
273
- img_hw=[img_wh[1], img_wh[0]],
274
- intrinsics=intrinsics, extrinsics=w2cs,
275
- near_fars=near_fars, factor=1.1
276
- )
277
-
278
-
279
- new_near_fars = []
280
- new_w2cs = []
281
- new_c2ws = []
282
- new_affine_mats = []
283
- new_depths_h = []
284
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
285
-
286
- P = intrinsic @ extrinsic @ scale_mat
287
- P = P[:3, :4]
288
- # - should use load_K_Rt_from_P() to obtain c2w
289
- c2w = load_K_Rt_from_P(None, P)[1]
290
- w2c = np.linalg.inv(c2w)
291
- new_w2cs.append(w2c)
292
- new_c2ws.append(c2w)
293
- affine_mat = np.eye(4)
294
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
295
- new_affine_mats.append(affine_mat)
296
-
297
- camera_o = c2w[:3, 3]
298
- dist = np.sqrt(np.sum(camera_o ** 2))
299
- near = dist - 1
300
- far = dist + 1
301
-
302
- new_near_fars.append([0.95 * near, 1.05 * far])
303
- new_depths_h.append(depth * scale_factor)
304
-
305
- # print(new_near_fars)
306
- imgs = torch.stack(imgs).float()
307
- depths_h = np.stack(new_depths_h)
308
- masks_h = np.stack(masks_h)
309
-
310
- affine_mats = np.stack(new_affine_mats)
311
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
312
- new_near_fars)
313
-
314
- if self.split == 'train':
315
- start_idx = 0
316
- else:
317
- start_idx = 1
318
-
319
- view_ids = [idx] + list(src_views)
320
- sample['origin_idx'] = origin_idx
321
- sample['images'] = imgs # (V, 3, H, W)
322
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
323
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
324
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
325
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
326
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
327
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
328
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
329
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
330
-
331
- # sample['light_idx'] = torch.tensor(light_idx)
332
- sample['scan'] = folder_id
333
-
334
- sample['scale_factor'] = torch.tensor(scale_factor)
335
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
336
- sample['render_img_idx'] = torch.tensor(image_perm)
337
- sample['partial_vol_origin'] = self.partial_vol_origin
338
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
339
-
340
-
341
- # - image to render
342
- sample['query_image'] = sample['images'][0]
343
- sample['query_c2w'] = sample['c2ws'][0]
344
- sample['query_w2c'] = sample['w2cs'][0]
345
- sample['query_intrinsic'] = sample['intrinsics'][0]
346
- sample['query_depth'] = sample['depths_h'][0]
347
- sample['query_mask'] = sample['masks_h'][0]
348
- sample['query_near_far'] = sample['near_fars'][0]
349
-
350
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
351
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
352
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
353
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
354
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
355
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
356
- sample['view_ids'] = sample['view_ids'][start_idx:]
357
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
358
-
359
- sample['scale_mat'] = torch.from_numpy(scale_mat)
360
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
361
-
362
- # - generate rays
363
- if ('val' in self.split) or ('test' in self.split):
364
- sample_rays = gen_rays_from_single_image(
365
- img_wh[1], img_wh[0],
366
- sample['query_image'],
367
- sample['query_intrinsic'],
368
- sample['query_c2w'],
369
- depth=sample['query_depth'],
370
- mask=sample['query_mask'] if self.clean_image else None)
371
- else:
372
- sample_rays = gen_random_rays_from_single_image(
373
- img_wh[1], img_wh[0],
374
- self.N_rays,
375
- sample['query_image'],
376
- sample['query_intrinsic'],
377
- sample['query_c2w'],
378
- depth=sample['query_depth'],
379
- mask=sample['query_mask'] if self.clean_image else None,
380
- dilated_mask=mask_dilated,
381
- importance_sample=self.importance_sample)
382
-
383
-
384
- sample['rays'] = sample_rays
385
-
386
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage.py DELETED
@@ -1,410 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
- self.near_far[1] = 1.8
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- pass
155
-
156
- def read_mask(self, filename):
157
- mask_h = cv2.imread(filename, 0)
158
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- mask[mask > 0] = 1 # the masks stored in png are not binary
164
- mask_h[mask_h > 0] = 1
165
-
166
- return mask, mask_h
167
-
168
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
-
170
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
- # print("center", center)
172
- # print("radius", radius)
173
- # print("bounds", bounds)
174
- # import ipdb; ipdb.set_trace()
175
- radius = radius * factor
176
- scale_mat = np.diag([radius, radius, radius, 1.0])
177
- scale_mat[:3, 3] = center.cpu().numpy()
178
- scale_mat = scale_mat.astype(np.float32)
179
-
180
- return scale_mat, 1. / radius.cpu().numpy()
181
-
182
- def __len__(self):
183
- return 8*len(self.lvis_paths)
184
-
185
-
186
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
- pass
188
-
189
-
190
- def __getitem__(self, idx):
191
- sample = {}
192
- origin_idx = idx
193
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
194
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
195
-
196
-
197
- folder_uid_dict = self.lvis_paths[idx//8]
198
- idx = idx % 8 # [0, 7]
199
- folder_id = folder_uid_dict['folder_id']
200
- uid = folder_uid_dict['uid']
201
-
202
-
203
- # target view
204
- c2w = self.c2ws[idx]
205
- w2c = np.linalg.inv(c2w)
206
- w2c_ref = w2c
207
- w2c_ref_inv = np.linalg.inv(w2c_ref)
208
-
209
- w2cs.append(w2c @ w2c_ref_inv)
210
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
211
-
212
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
213
-
214
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
215
-
216
-
217
- img = Image.open(img_filename)
218
-
219
- img = self.transform(img) # (4, h, w)
220
-
221
- # print("img_pre", img.shape)
222
- if img.shape[0] == 4:
223
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
224
- # print("img", img.shape)
225
- imgs += [img]
226
-
227
-
228
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
229
- mask_h = depth_h > 0
230
- # print("valid pixels", np.sum(mask_h))
231
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
232
- surface_points = directions * depth_h[..., None] # [H, W, 3]
233
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
234
- depth_h = distance
235
- # print("depth_h", depth_h.shape)
236
-
237
- depths_h.append(depth_h)
238
- masks_h.append(mask_h)
239
-
240
- intrinsic = self.intrinsic
241
- intrinsics.append(intrinsic)
242
-
243
-
244
- near_fars.append(self.near_fars[idx])
245
- image_perm = 0 # only supervised on reference view
246
-
247
- mask_dilated = None
248
-
249
- # src_views = range(8+idx*4, 8+(idx+1)*4)
250
- src_views = range(8, 8 + 8 * 4)
251
-
252
- for vid in src_views:
253
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{(vid - 8) // 4}_{vid % 4 + 1}.png')
254
-
255
- img = Image.open(img_filename)
256
- img_wh = self.img_wh
257
-
258
- img = self.transform(img)
259
- if img.shape[0] == 4:
260
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
261
-
262
- imgs += [img]
263
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
264
- depths_h.append(depth_h)
265
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
266
-
267
- near_fars.append(self.all_near_fars[vid])
268
- intrinsics.append(self.all_intrinsics[vid])
269
-
270
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
271
-
272
-
273
- # ! estimate scale_mat
274
- scale_mat, scale_factor = self.cal_scale_mat(
275
- img_hw=[img_wh[1], img_wh[0]],
276
- intrinsics=intrinsics, extrinsics=w2cs,
277
- near_fars=near_fars, factor=1.1
278
- )
279
-
280
-
281
- new_near_fars = []
282
- new_w2cs = []
283
- new_c2ws = []
284
- new_affine_mats = []
285
- new_depths_h = []
286
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
287
-
288
- P = intrinsic @ extrinsic @ scale_mat
289
- P = P[:3, :4]
290
- # - should use load_K_Rt_from_P() to obtain c2w
291
- c2w = load_K_Rt_from_P(None, P)[1]
292
- w2c = np.linalg.inv(c2w)
293
- new_w2cs.append(w2c)
294
- new_c2ws.append(c2w)
295
- affine_mat = np.eye(4)
296
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
297
- new_affine_mats.append(affine_mat)
298
-
299
- camera_o = c2w[:3, 3]
300
- dist = np.sqrt(np.sum(camera_o ** 2))
301
- near = dist - 1
302
- far = dist + 1
303
-
304
- new_near_fars.append([0.95 * near, 1.05 * far])
305
- new_depths_h.append(depth * scale_factor)
306
-
307
- # print(new_near_fars)
308
- imgs = torch.stack(imgs).float()
309
- depths_h = np.stack(new_depths_h)
310
- masks_h = np.stack(masks_h)
311
-
312
- affine_mats = np.stack(new_affine_mats)
313
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
314
- new_near_fars)
315
-
316
- if self.split == 'train':
317
- start_idx = 0
318
- else:
319
- start_idx = 1
320
-
321
-
322
-
323
- target_w2cs = []
324
- target_intrinsics = []
325
- new_target_w2cs = []
326
- for i_idx in range(8):
327
- target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
328
- target_intrinsics.append(self.all_intrinsics[i_idx])
329
-
330
- for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
331
-
332
- P = intrinsic @ extrinsic @ scale_mat
333
- P = P[:3, :4]
334
- # - should use load_K_Rt_from_P() to obtain c2w
335
- c2w = load_K_Rt_from_P(None, P)[1]
336
- w2c = np.linalg.inv(c2w)
337
- new_target_w2cs.append(w2c)
338
- target_w2cs = np.stack(new_target_w2cs)
339
-
340
-
341
-
342
- view_ids = [idx] + list(src_views)
343
- sample['origin_idx'] = origin_idx
344
- sample['images'] = imgs # (V, 3, H, W)
345
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
346
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
347
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
348
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
349
- sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
350
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
351
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
352
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
353
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
354
-
355
- # sample['light_idx'] = torch.tensor(light_idx)
356
- sample['scan'] = folder_id
357
-
358
- sample['scale_factor'] = torch.tensor(scale_factor)
359
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
360
- sample['render_img_idx'] = torch.tensor(image_perm)
361
- sample['partial_vol_origin'] = self.partial_vol_origin
362
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
363
-
364
-
365
- # - image to render
366
- sample['query_image'] = sample['images'][0]
367
- sample['query_c2w'] = sample['c2ws'][0]
368
- sample['query_w2c'] = sample['w2cs'][0]
369
- sample['query_intrinsic'] = sample['intrinsics'][0]
370
- sample['query_depth'] = sample['depths_h'][0]
371
- sample['query_mask'] = sample['masks_h'][0]
372
- sample['query_near_far'] = sample['near_fars'][0]
373
-
374
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
375
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
376
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
377
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
378
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
379
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
380
- sample['view_ids'] = sample['view_ids'][start_idx:]
381
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
382
-
383
- sample['scale_mat'] = torch.from_numpy(scale_mat)
384
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
385
-
386
- # - generate rays
387
- if ('val' in self.split) or ('test' in self.split):
388
- sample_rays = gen_rays_from_single_image(
389
- img_wh[1], img_wh[0],
390
- sample['query_image'],
391
- sample['query_intrinsic'],
392
- sample['query_c2w'],
393
- depth=sample['query_depth'],
394
- mask=sample['query_mask'] if self.clean_image else None)
395
- else:
396
- sample_rays = gen_random_rays_from_single_image(
397
- img_wh[1], img_wh[0],
398
- self.N_rays,
399
- sample['query_image'],
400
- sample['query_intrinsic'],
401
- sample['query_c2w'],
402
- depth=sample['query_depth'],
403
- mask=sample['query_mask'] if self.clean_image else None,
404
- dilated_mask=mask_dilated,
405
- importance_sample=self.importance_sample)
406
-
407
-
408
- sample['rays'] = sample_rays
409
-
410
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage_temp.py DELETED
@@ -1,411 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
- self.near_far[1] = 1.8
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- pass
155
-
156
- def read_mask(self, filename):
157
- mask_h = cv2.imread(filename, 0)
158
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- mask[mask > 0] = 1 # the masks stored in png are not binary
164
- mask_h[mask_h > 0] = 1
165
-
166
- return mask, mask_h
167
-
168
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
-
170
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
- # print("center", center)
172
- # print("radius", radius)
173
- # print("bounds", bounds)
174
- # import ipdb; ipdb.set_trace()
175
- radius = radius * factor
176
- scale_mat = np.diag([radius, radius, radius, 1.0])
177
- scale_mat[:3, 3] = center.cpu().numpy()
178
- scale_mat = scale_mat.astype(np.float32)
179
-
180
- return scale_mat, 1. / radius.cpu().numpy()
181
-
182
- def __len__(self):
183
- return 10
184
-
185
-
186
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
- pass
188
-
189
-
190
- def __getitem__(self, idx):
191
- idx = idx * 8
192
- sample = {}
193
- origin_idx = idx
194
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
195
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
196
-
197
-
198
- folder_uid_dict = self.lvis_paths[idx//8]
199
- idx = idx % 8 # [0, 7]
200
- folder_id = folder_uid_dict['folder_id']
201
- uid = folder_uid_dict['uid']
202
-
203
-
204
- # target view
205
- c2w = self.c2ws[idx]
206
- w2c = np.linalg.inv(c2w)
207
- w2c_ref = w2c
208
- w2c_ref_inv = np.linalg.inv(w2c_ref)
209
-
210
- w2cs.append(w2c @ w2c_ref_inv)
211
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
212
-
213
- img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
214
-
215
- depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
216
-
217
-
218
- img = Image.open(img_filename)
219
-
220
- img = self.transform(img) # (4, h, w)
221
-
222
- # print("img_pre", img.shape)
223
- if img.shape[0] == 4:
224
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
225
- # print("img", img.shape)
226
- imgs += [img]
227
-
228
-
229
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
230
- mask_h = depth_h > 0
231
- # print("valid pixels", np.sum(mask_h))
232
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
233
- surface_points = directions * depth_h[..., None] # [H, W, 3]
234
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
235
- depth_h = distance
236
- # print("depth_h", depth_h.shape)
237
-
238
- depths_h.append(depth_h)
239
- masks_h.append(mask_h)
240
-
241
- intrinsic = self.intrinsic
242
- intrinsics.append(intrinsic)
243
-
244
-
245
- near_fars.append(self.near_fars[idx])
246
- image_perm = 0 # only supervised on reference view
247
-
248
- mask_dilated = None
249
-
250
- # src_views = range(8+idx*4, 8+(idx+1)*4)
251
- src_views = range(8, 8 + 8 * 4)
252
-
253
- for vid in src_views:
254
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{(vid - 8) // 4}_{vid % 4 + 1}.png')
255
-
256
- img = Image.open(img_filename)
257
- img_wh = self.img_wh
258
-
259
- img = self.transform(img)
260
- if img.shape[0] == 4:
261
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
262
-
263
- imgs += [img]
264
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
265
- depths_h.append(depth_h)
266
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
267
-
268
- near_fars.append(self.all_near_fars[vid])
269
- intrinsics.append(self.all_intrinsics[vid])
270
-
271
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
272
-
273
-
274
- # ! estimate scale_mat
275
- scale_mat, scale_factor = self.cal_scale_mat(
276
- img_hw=[img_wh[1], img_wh[0]],
277
- intrinsics=intrinsics, extrinsics=w2cs,
278
- near_fars=near_fars, factor=1.1
279
- )
280
-
281
-
282
- new_near_fars = []
283
- new_w2cs = []
284
- new_c2ws = []
285
- new_affine_mats = []
286
- new_depths_h = []
287
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
288
-
289
- P = intrinsic @ extrinsic @ scale_mat
290
- P = P[:3, :4]
291
- # - should use load_K_Rt_from_P() to obtain c2w
292
- c2w = load_K_Rt_from_P(None, P)[1]
293
- w2c = np.linalg.inv(c2w)
294
- new_w2cs.append(w2c)
295
- new_c2ws.append(c2w)
296
- affine_mat = np.eye(4)
297
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
298
- new_affine_mats.append(affine_mat)
299
-
300
- camera_o = c2w[:3, 3]
301
- dist = np.sqrt(np.sum(camera_o ** 2))
302
- near = dist - 1
303
- far = dist + 1
304
-
305
- new_near_fars.append([0.95 * near, 1.05 * far])
306
- new_depths_h.append(depth * scale_factor)
307
-
308
- # print(new_near_fars)
309
- imgs = torch.stack(imgs).float()
310
- depths_h = np.stack(new_depths_h)
311
- masks_h = np.stack(masks_h)
312
-
313
- affine_mats = np.stack(new_affine_mats)
314
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
315
- new_near_fars)
316
-
317
- if self.split == 'train':
318
- start_idx = 0
319
- else:
320
- start_idx = 1
321
-
322
-
323
-
324
- target_w2cs = []
325
- target_intrinsics = []
326
- new_target_w2cs = []
327
- for i_idx in range(8):
328
- target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
329
- target_intrinsics.append(self.all_intrinsics[i_idx])
330
-
331
- for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
332
-
333
- P = intrinsic @ extrinsic @ scale_mat
334
- P = P[:3, :4]
335
- # - should use load_K_Rt_from_P() to obtain c2w
336
- c2w = load_K_Rt_from_P(None, P)[1]
337
- w2c = np.linalg.inv(c2w)
338
- new_target_w2cs.append(w2c)
339
- target_w2cs = np.stack(new_target_w2cs)
340
-
341
-
342
-
343
- view_ids = [idx] + list(src_views)
344
- sample['origin_idx'] = origin_idx
345
- sample['images'] = imgs # (V, 3, H, W)
346
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
347
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
348
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
349
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
350
- sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
351
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
352
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
353
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
354
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
355
-
356
- # sample['light_idx'] = torch.tensor(light_idx)
357
- sample['scan'] = folder_id
358
-
359
- sample['scale_factor'] = torch.tensor(scale_factor)
360
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
361
- sample['render_img_idx'] = torch.tensor(image_perm)
362
- sample['partial_vol_origin'] = self.partial_vol_origin
363
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
364
-
365
-
366
- # - image to render
367
- sample['query_image'] = sample['images'][0]
368
- sample['query_c2w'] = sample['c2ws'][0]
369
- sample['query_w2c'] = sample['w2cs'][0]
370
- sample['query_intrinsic'] = sample['intrinsics'][0]
371
- sample['query_depth'] = sample['depths_h'][0]
372
- sample['query_mask'] = sample['masks_h'][0]
373
- sample['query_near_far'] = sample['near_fars'][0]
374
-
375
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
376
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
377
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
378
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
379
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
380
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
381
- sample['view_ids'] = sample['view_ids'][start_idx:]
382
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
383
-
384
- sample['scale_mat'] = torch.from_numpy(scale_mat)
385
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
386
-
387
- # - generate rays
388
- if ('val' in self.split) or ('test' in self.split):
389
- sample_rays = gen_rays_from_single_image(
390
- img_wh[1], img_wh[0],
391
- sample['query_image'],
392
- sample['query_intrinsic'],
393
- sample['query_c2w'],
394
- depth=sample['query_depth'],
395
- mask=sample['query_mask'] if self.clean_image else None)
396
- else:
397
- sample_rays = gen_random_rays_from_single_image(
398
- img_wh[1], img_wh[0],
399
- self.N_rays,
400
- sample['query_image'],
401
- sample['query_intrinsic'],
402
- sample['query_c2w'],
403
- depth=sample['query_depth'],
404
- mask=sample['query_mask'] if self.clean_image else None,
405
- dilated_mask=mask_dilated,
406
- importance_sample=self.importance_sample)
407
-
408
-
409
- sample['rays'] = sample_rays
410
-
411
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py CHANGED
@@ -1,6 +1,6 @@
1
  from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
  import os
 
4
  import numpy as np
5
  import cv2
6
  from PIL import Image
@@ -9,12 +9,7 @@ from torchvision import transforms as T
9
  from data.scene import get_boundingbox
10
 
11
  from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
  from kornia import create_meshgrid
16
- import open3d as o3d
17
-
18
 
19
  def get_ray_directions(H, W, focal, center=None):
20
  """
@@ -73,10 +68,6 @@ class BlenderPerView(Dataset):
73
  # print("root_dir: ", root_dir)
74
  self.root_dir = root_dir
75
  self.split = split
76
- # self.specific_dataset_name = 'Realfusion'
77
- # self.specific_dataset_name = 'GSO'
78
- # self.specific_dataset_name = 'Objaverse'
79
- # self.specific_dataset_name = 'Zero123'
80
 
81
  self.specific_dataset_name = specific_dataset_name
82
  self.n_views = n_views
@@ -102,8 +93,6 @@ class BlenderPerView(Dataset):
102
  for shape_name in self.shape_list:
103
  self.lvis_paths.append(os.path.join(main_folder, shape_name))
104
 
105
- # print("lvis_paths: ", self.lvis_paths)
106
-
107
  if img_wh is not None:
108
  assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
109
  'img_wh must both be multiples of 32!'
@@ -130,9 +119,6 @@ class BlenderPerView(Dataset):
130
  self.all_extrinsics.append(extrinsic)
131
  self.all_near_fars.append(near_far)
132
 
133
- def read_depth(self, filename):
134
- pass
135
-
136
  def read_mask(self, filename):
137
  mask_h = cv2.imread(filename, 0)
138
  mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
@@ -160,11 +146,6 @@ class BlenderPerView(Dataset):
160
  # return 8*len(self.lvis_paths)
161
  return len(self.lvis_paths)
162
 
163
-
164
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
165
- pass
166
-
167
-
168
  def __getitem__(self, idx):
169
  sample = {}
170
  idx = idx * 8 # to be deleted
@@ -198,9 +179,8 @@ class BlenderPerView(Dataset):
198
  self.c2ws = []
199
  self.w2cs = []
200
  self.near_fars = []
201
- # self.root_dir = root_dir
202
- for image_dix, img_id in enumerate(self.img_ids):
203
- pose = self.input_poses[image_dix]
204
  c2w = pose @ self.blender2opencv
205
  self.c2ws.append(c2w)
206
  self.w2cs.append(np.linalg.inv(c2w))
@@ -224,7 +204,6 @@ class BlenderPerView(Dataset):
224
  w2cs.append(w2c @ w2c_ref_inv)
225
  c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
226
 
227
- # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
228
  img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
229
 
230
  img = Image.open(img_filename)
@@ -258,7 +237,6 @@ class BlenderPerView(Dataset):
258
 
259
  for vid in src_views:
260
 
261
- # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
262
  img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
263
  img = Image.open(img_filename)
264
  img_wh = self.img_wh
@@ -312,7 +290,6 @@ class BlenderPerView(Dataset):
312
  new_near_fars.append([0.95 * near, 1.05 * far])
313
  new_depths_h.append(depth * scale_factor)
314
 
315
- # print(new_near_fars)
316
  imgs = torch.stack(imgs).float()
317
  depths_h = np.stack(new_depths_h)
318
  masks_h = np.stack(masks_h)
@@ -360,7 +337,6 @@ class BlenderPerView(Dataset):
360
  sample['view_ids'] = torch.from_numpy(np.array(view_ids))
361
  sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
362
 
363
- # sample['light_idx'] = torch.tensor(light_idx)
364
  sample['scan'] = shape_name
365
 
366
  sample['scale_factor'] = torch.tensor(scale_factor)
 
1
  from torch.utils.data import Dataset
 
2
  import os
3
+ import json
4
  import numpy as np
5
  import cv2
6
  from PIL import Image
 
9
  from data.scene import get_boundingbox
10
 
11
  from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
 
 
 
12
  from kornia import create_meshgrid
 
 
13
 
14
  def get_ray_directions(H, W, focal, center=None):
15
  """
 
68
  # print("root_dir: ", root_dir)
69
  self.root_dir = root_dir
70
  self.split = split
 
 
 
 
71
 
72
  self.specific_dataset_name = specific_dataset_name
73
  self.n_views = n_views
 
93
  for shape_name in self.shape_list:
94
  self.lvis_paths.append(os.path.join(main_folder, shape_name))
95
 
 
 
96
  if img_wh is not None:
97
  assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
98
  'img_wh must both be multiples of 32!'
 
119
  self.all_extrinsics.append(extrinsic)
120
  self.all_near_fars.append(near_far)
121
 
 
 
 
122
  def read_mask(self, filename):
123
  mask_h = cv2.imread(filename, 0)
124
  mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
 
146
  # return 8*len(self.lvis_paths)
147
  return len(self.lvis_paths)
148
 
 
 
 
 
 
149
  def __getitem__(self, idx):
150
  sample = {}
151
  idx = idx * 8 # to be deleted
 
179
  self.c2ws = []
180
  self.w2cs = []
181
  self.near_fars = []
182
+ for image_idx, img_id in enumerate(self.img_ids):
183
+ pose = self.input_poses[image_idx]
 
184
  c2w = pose @ self.blender2opencv
185
  self.c2ws.append(c2w)
186
  self.w2cs.append(np.linalg.inv(c2w))
 
204
  w2cs.append(w2c @ w2c_ref_inv)
205
  c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
206
 
 
207
  img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
208
 
209
  img = Image.open(img_filename)
 
237
 
238
  for vid in src_views:
239
 
 
240
  img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
241
  img = Image.open(img_filename)
242
  img_wh = self.img_wh
 
290
  new_near_fars.append([0.95 * near, 1.05 * far])
291
  new_depths_h.append(depth * scale_factor)
292
 
 
293
  imgs = torch.stack(imgs).float()
294
  depths_h = np.stack(new_depths_h)
295
  masks_h = np.stack(masks_h)
 
337
  sample['view_ids'] = torch.from_numpy(np.array(view_ids))
338
  sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
339
 
 
340
  sample['scan'] = shape_name
341
 
342
  sample['scale_factor'] = torch.tensor(scale_factor)
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data3_1.py DELETED
@@ -1,414 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
- # self.specific_dataset_name = 'Realfusion'
73
- self.specific_dataset_name = 'Objaverse'
74
- self.n_views = n_views
75
- self.N_rays = N_rays
76
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
77
-
78
- self.clean_image = clean_image
79
- self.importance_sample = importance_sample
80
- self.test_ref_views = test_ref_views # used for testing
81
- self.scale_factor = 1.0
82
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
83
- assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
84
- # find all subfolders
85
- main_folder = os.path.join(root_dir, self.specific_dataset_name)
86
- self.shape_list = os.listdir(main_folder)
87
- self.shape_list.sort()
88
-
89
- # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
90
-
91
-
92
- self.lvis_paths = []
93
- for shape_name in self.shape_list:
94
- self.lvis_paths.append(os.path.join(main_folder, shape_name))
95
-
96
- # print("lvis_paths: ", self.lvis_paths)
97
-
98
- if img_wh is not None:
99
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
100
- 'img_wh must both be multiples of 32!'
101
-
102
-
103
- # * bounding box for rendering
104
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
105
- self.bbox_max = np.array([1.0, 1.0, 1.0])
106
-
107
- # - used for cost volume regularization
108
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
109
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
110
-
111
-
112
- def define_transforms(self):
113
- self.transform = T.Compose([T.ToTensor()])
114
-
115
-
116
-
117
- def load_cam_info(self):
118
- for vid, img_id in enumerate(self.img_ids):
119
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
120
- self.all_intrinsics.append(intrinsic)
121
- self.all_extrinsics.append(extrinsic)
122
- self.all_near_fars.append(near_far)
123
-
124
- def read_depth(self, filename):
125
- pass
126
-
127
- def read_mask(self, filename):
128
- mask_h = cv2.imread(filename, 0)
129
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
130
- interpolation=cv2.INTER_NEAREST)
131
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
132
- interpolation=cv2.INTER_NEAREST)
133
-
134
- mask[mask > 0] = 1 # the masks stored in png are not binary
135
- mask_h[mask_h > 0] = 1
136
-
137
- return mask, mask_h
138
-
139
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
140
-
141
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
142
-
143
- radius = radius * factor
144
- scale_mat = np.diag([radius, radius, radius, 1.0])
145
- scale_mat[:3, 3] = center.cpu().numpy()
146
- scale_mat = scale_mat.astype(np.float32)
147
-
148
- return scale_mat, 1. / radius.cpu().numpy()
149
-
150
- def __len__(self):
151
- # return 8*len(self.lvis_paths)
152
- return len(self.lvis_paths)
153
-
154
-
155
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
156
- pass
157
-
158
-
159
- def __getitem__(self, idx):
160
- sample = {}
161
- idx = idx * 8 # to be deleted
162
- origin_idx = idx
163
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
164
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
165
-
166
-
167
- folder_path = self.lvis_paths[idx//8]
168
- idx = idx % 8 # [0, 7]
169
-
170
- # last subdir name
171
- shape_name = os.path.split(folder_path)[-1]
172
-
173
-
174
- pose_json_path = os.path.join(folder_path, "pose.json")
175
- with open(pose_json_path, 'r') as f:
176
- meta = json.load(f)
177
-
178
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
179
- self.img_wh = (256, 256)
180
- self.input_poses = np.array(list(meta["c2ws"].values()))
181
- intrinsic = np.eye(4)
182
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
183
- self.intrinsic = intrinsic
184
- self.near_far = np.array(meta["near_far"])
185
- self.near_far[1] = 1.8
186
- self.define_transforms()
187
- self.blender2opencv = np.array(
188
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
189
- )
190
-
191
-
192
- self.c2ws = []
193
- self.w2cs = []
194
- self.near_fars = []
195
- # self.root_dir = root_dir
196
- for image_dix, img_id in enumerate(self.img_ids):
197
- pose = self.input_poses[image_dix]
198
- c2w = pose @ self.blender2opencv
199
- self.c2ws.append(c2w)
200
- self.w2cs.append(np.linalg.inv(c2w))
201
- self.near_fars.append(self.near_far)
202
- self.c2ws = np.stack(self.c2ws, axis=0)
203
- self.w2cs = np.stack(self.w2cs, axis=0)
204
-
205
-
206
- self.all_intrinsics = [] # the cam info of the whole scene
207
- self.all_extrinsics = []
208
- self.all_near_fars = []
209
- self.load_cam_info()
210
-
211
-
212
- # target view
213
- c2w = self.c2ws[idx]
214
- w2c = np.linalg.inv(c2w)
215
- w2c_ref = w2c
216
- w2c_ref_inv = np.linalg.inv(w2c_ref)
217
-
218
- w2cs.append(w2c @ w2c_ref_inv)
219
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
220
-
221
- img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
222
- # print(self.img_ids)
223
- img = Image.open(img_filename)
224
- img = self.transform(img) # (4, h, w)
225
-
226
-
227
- if img.shape[0] == 4:
228
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
229
- imgs += [img]
230
-
231
-
232
- depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
233
- depth_h = depth_h.fill_(-1.0)
234
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
235
-
236
-
237
- depths_h.append(depth_h)
238
- masks_h.append(mask_h)
239
-
240
- intrinsic = self.intrinsic
241
- intrinsics.append(intrinsic)
242
-
243
-
244
- near_fars.append(self.near_fars[idx])
245
- image_perm = 0 # only supervised on reference view
246
-
247
- mask_dilated = None
248
-
249
- # src_views = range(8+idx*4, 8+(idx+1)*4)
250
- src_views = range(8, 8 + 8 * 4)
251
-
252
- for vid in src_views:
253
- if vid % 4 == 0:
254
- vid = (vid - 8) // 4
255
- img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[vid]}')
256
- else:
257
- img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
258
-
259
- img = Image.open(img_filename)
260
- img_wh = self.img_wh
261
-
262
- img = self.transform(img)
263
- if img.shape[0] == 4:
264
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
265
-
266
- imgs += [img]
267
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
268
- depths_h.append(depth_h)
269
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
270
-
271
- near_fars.append(self.all_near_fars[vid])
272
- intrinsics.append(self.all_intrinsics[vid])
273
-
274
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
275
-
276
-
277
- # ! estimate scale_mat
278
- scale_mat, scale_factor = self.cal_scale_mat(
279
- img_hw=[img_wh[1], img_wh[0]],
280
- intrinsics=intrinsics, extrinsics=w2cs,
281
- near_fars=near_fars, factor=1.1
282
- )
283
-
284
-
285
- new_near_fars = []
286
- new_w2cs = []
287
- new_c2ws = []
288
- new_affine_mats = []
289
- new_depths_h = []
290
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
291
-
292
- P = intrinsic @ extrinsic @ scale_mat
293
- P = P[:3, :4]
294
- # - should use load_K_Rt_from_P() to obtain c2w
295
- c2w = load_K_Rt_from_P(None, P)[1]
296
- w2c = np.linalg.inv(c2w)
297
- new_w2cs.append(w2c)
298
- new_c2ws.append(c2w)
299
- affine_mat = np.eye(4)
300
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
301
- new_affine_mats.append(affine_mat)
302
-
303
- camera_o = c2w[:3, 3]
304
- dist = np.sqrt(np.sum(camera_o ** 2))
305
- near = dist - 1
306
- far = dist + 1
307
-
308
- new_near_fars.append([0.95 * near, 1.05 * far])
309
- new_depths_h.append(depth * scale_factor)
310
-
311
- # print(new_near_fars)
312
- imgs = torch.stack(imgs).float()
313
- depths_h = np.stack(new_depths_h)
314
- masks_h = np.stack(masks_h)
315
-
316
- affine_mats = np.stack(new_affine_mats)
317
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
318
- new_near_fars)
319
-
320
- if self.split == 'train':
321
- start_idx = 0
322
- else:
323
- start_idx = 1
324
-
325
-
326
-
327
- target_w2cs = []
328
- target_intrinsics = []
329
- new_target_w2cs = []
330
- for i_idx in range(8):
331
- target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
332
- target_intrinsics.append(self.all_intrinsics[i_idx])
333
-
334
- for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
335
-
336
- P = intrinsic @ extrinsic @ scale_mat
337
- P = P[:3, :4]
338
- # - should use load_K_Rt_from_P() to obtain c2w
339
- c2w = load_K_Rt_from_P(None, P)[1]
340
- w2c = np.linalg.inv(c2w)
341
- new_target_w2cs.append(w2c)
342
- target_w2cs = np.stack(new_target_w2cs)
343
-
344
-
345
-
346
- view_ids = [idx] + list(src_views)
347
- sample['origin_idx'] = origin_idx
348
- sample['images'] = imgs # (V, 3, H, W)
349
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
350
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
351
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
352
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
353
- sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
354
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
355
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
356
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
357
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
358
-
359
- # sample['light_idx'] = torch.tensor(light_idx)
360
- sample['scan'] = shape_name
361
-
362
- sample['scale_factor'] = torch.tensor(scale_factor)
363
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
364
- sample['render_img_idx'] = torch.tensor(image_perm)
365
- sample['partial_vol_origin'] = self.partial_vol_origin
366
- sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
367
- # print("meta: ", sample['meta'])
368
-
369
- # - image to render
370
- sample['query_image'] = sample['images'][0]
371
- sample['query_c2w'] = sample['c2ws'][0]
372
- sample['query_w2c'] = sample['w2cs'][0]
373
- sample['query_intrinsic'] = sample['intrinsics'][0]
374
- sample['query_depth'] = sample['depths_h'][0]
375
- sample['query_mask'] = sample['masks_h'][0]
376
- sample['query_near_far'] = sample['near_fars'][0]
377
-
378
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
379
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
380
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
381
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
382
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
383
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
384
- sample['view_ids'] = sample['view_ids'][start_idx:]
385
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
386
-
387
- sample['scale_mat'] = torch.from_numpy(scale_mat)
388
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
389
-
390
- # - generate rays
391
- if ('val' in self.split) or ('test' in self.split):
392
- sample_rays = gen_rays_from_single_image(
393
- img_wh[1], img_wh[0],
394
- sample['query_image'],
395
- sample['query_intrinsic'],
396
- sample['query_c2w'],
397
- depth=sample['query_depth'],
398
- mask=sample['query_mask'] if self.clean_image else None)
399
- else:
400
- sample_rays = gen_random_rays_from_single_image(
401
- img_wh[1], img_wh[0],
402
- self.N_rays,
403
- sample['query_image'],
404
- sample['query_intrinsic'],
405
- sample['query_c2w'],
406
- depth=sample['query_depth'],
407
- mask=sample['query_mask'] if self.clean_image else None,
408
- dilated_mask=mask_dilated,
409
- importance_sample=self.importance_sample)
410
-
411
-
412
- sample['rays'] = sample_rays
413
-
414
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_32_wide.py DELETED
@@ -1,465 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
-
18
- def calc_pose(phis, thetas, size, radius = 1.2):
19
- import torch
20
- def normalize(vectors):
21
- return vectors / (torch.norm(vectors, dim=-1, keepdim=True) + 1e-10)
22
- # device = torch.device('cuda')
23
- thetas = torch.FloatTensor(thetas)
24
- phis = torch.FloatTensor(phis)
25
-
26
- centers = torch.stack([
27
- radius * torch.sin(thetas) * torch.sin(phis),
28
- -radius * torch.cos(thetas) * torch.sin(phis),
29
- radius * torch.cos(phis),
30
- ], dim=-1) # [B, 3]
31
-
32
- # lookat
33
- forward_vector = normalize(centers).squeeze(0)
34
- up_vector = torch.FloatTensor([0, 0, 1]).unsqueeze(0).repeat(size, 1)
35
- right_vector = normalize(torch.cross(up_vector, forward_vector, dim=-1))
36
- if right_vector.pow(2).sum() < 0.01:
37
- right_vector = torch.FloatTensor([0, 1, 0]).unsqueeze(0).repeat(size, 1)
38
- up_vector = normalize(torch.cross(forward_vector, right_vector, dim=-1))
39
-
40
- poses = torch.eye(4, dtype=torch.float)[:3].unsqueeze(0).repeat(size, 1, 1)
41
- poses[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), dim=-1)
42
- poses[:, :3, 3] = centers
43
- return poses
44
-
45
- def get_ray_directions(H, W, focal, center=None):
46
- """
47
- Get ray directions for all pixels in camera coordinate.
48
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
49
- ray-tracing-generating-camera-rays/standard-coordinate-systems
50
- Inputs:
51
- H, W, focal: image height, width and focal length
52
- Outputs:
53
- directions: (H, W, 3), the direction of the rays in camera coordinate
54
- """
55
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
56
-
57
- i, j = grid.unbind(-1)
58
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
59
- # see https://github.com/bmild/nerf/issues/24
60
- cent = center if center is not None else [W / 2, H / 2]
61
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
62
-
63
- return directions
64
-
65
- def load_K_Rt_from_P(filename, P=None):
66
- if P is None:
67
- lines = open(filename).read().splitlines()
68
- if len(lines) == 4:
69
- lines = lines[1:]
70
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
71
- P = np.asarray(lines).astype(np.float32).squeeze()
72
-
73
- out = cv2.decomposeProjectionMatrix(P)
74
- K = out[0]
75
- R = out[1]
76
- t = out[2]
77
-
78
- K = K / K[2, 2]
79
- intrinsics = np.eye(4)
80
- intrinsics[:3, :3] = K
81
-
82
- pose = np.eye(4, dtype=np.float32)
83
- pose[:3, :3] = R.transpose() # ? why need transpose here
84
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
85
-
86
- return intrinsics, pose # ! return cam2world matrix here
87
-
88
-
89
- # ! load one ref-image with multiple src-images in camera coordinate system
90
- class BlenderPerView(Dataset):
91
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
92
- split_filepath=None, pair_filepath=None,
93
- N_rays=512,
94
- vol_dims=[128, 128, 128], batch_size=1,
95
- clean_image=False, importance_sample=False, test_ref_views=[],
96
- specific_dataset_name = 'GSO'
97
- ):
98
-
99
- # print("root_dir: ", root_dir)
100
- self.root_dir = root_dir
101
- self.split = split
102
- # self.specific_dataset_name = 'Realfusion'
103
- # self.specific_dataset_name = 'GSO'
104
- # self.specific_dataset_name = 'Objaverse'
105
- # self.specific_dataset_name = 'Zero123'
106
-
107
- self.specific_dataset_name = specific_dataset_name
108
- self.n_views = n_views
109
- self.N_rays = N_rays
110
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
111
-
112
- self.clean_image = clean_image
113
- self.importance_sample = importance_sample
114
- self.test_ref_views = test_ref_views # used for testing
115
- self.scale_factor = 1.0
116
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
117
- assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
118
- # find all subfolders
119
- main_folder = os.path.join(root_dir)
120
- self.shape_list = os.listdir(main_folder)
121
- self.shape_list.sort()
122
-
123
- # self.shape_list = ['barrel_render']
124
- # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
125
-
126
-
127
- self.lvis_paths = []
128
- for shape_name in self.shape_list:
129
- self.lvis_paths.append(os.path.join(main_folder, shape_name))
130
-
131
- # print("lvis_paths: ", self.lvis_paths)
132
-
133
- if img_wh is not None:
134
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
135
- 'img_wh must both be multiples of 32!'
136
-
137
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
138
-
139
- with open(pose_json_path, 'r') as f:
140
- meta = json.load(f)
141
- intrinsic = np.eye(4)
142
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
143
- self.intrinsic = intrinsic
144
- self.near_far = np.array(meta["near_far"])
145
- self.near_far[1] = 1.8
146
-
147
- # * bounding box for rendering
148
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
149
- self.bbox_max = np.array([1.0, 1.0, 1.0])
150
-
151
- # - used for cost volume regularization
152
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
153
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
154
-
155
-
156
- def define_transforms(self):
157
- self.transform = T.Compose([T.ToTensor()])
158
-
159
-
160
-
161
- def load_cam_info(self):
162
- for vid in range(self.input_poses.shape[0]):
163
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
164
- self.all_intrinsics.append(intrinsic)
165
- self.all_extrinsics.append(extrinsic)
166
- self.all_near_fars.append(near_far)
167
-
168
- def read_depth(self, filename):
169
- pass
170
-
171
- def read_mask(self, filename):
172
- mask_h = cv2.imread(filename, 0)
173
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
174
- interpolation=cv2.INTER_NEAREST)
175
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
176
- interpolation=cv2.INTER_NEAREST)
177
-
178
- mask[mask > 0] = 1 # the masks stored in png are not binary
179
- mask_h[mask_h > 0] = 1
180
-
181
- return mask, mask_h
182
-
183
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
184
-
185
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
186
-
187
- radius = radius * factor
188
- scale_mat = np.diag([radius, radius, radius, 1.0])
189
- scale_mat[:3, 3] = center.cpu().numpy()
190
- scale_mat = scale_mat.astype(np.float32)
191
-
192
- return scale_mat, 1. / radius.cpu().numpy()
193
-
194
- def __len__(self):
195
- # return 8*len(self.lvis_paths)
196
- return len(self.lvis_paths)
197
-
198
-
199
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
200
- pass
201
-
202
-
203
- def __getitem__(self, idx):
204
- sample = {}
205
- idx = idx * 8 # to be deleted
206
- origin_idx = idx
207
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
208
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
209
-
210
- folder_path = self.lvis_paths[idx//8]
211
- idx = idx % 8 # [0, 7]
212
-
213
- # last subdir name
214
- shape_name = os.path.split(folder_path)[-1]
215
-
216
- # pose_json_path = os.path.join(folder_path, "pose.json")
217
- # with open(pose_json_path, 'r') as f:
218
- # meta = json.load(f)
219
-
220
- # self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
221
- # self.img_wh = (256, 256)
222
- # self.input_poses = np.array(list(meta["c2ws"].values()))
223
- # intrinsic = np.eye(4)
224
- # intrinsic[:3, :3] = np.array(meta["intrinsics"])
225
- # self.intrinsic = intrinsic
226
- # self.near_far = np.array(meta["near_far"])
227
- # self.near_far[1] = 1.8
228
- # self.define_transforms()
229
- # self.blender2opencv = np.array(
230
- # [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
231
- # )
232
-
233
- pose_file = os.path.join(folder_path, '32_random', 'views.npz')
234
- pose_array = np.load(pose_file)
235
- pose = calc_pose(pose_array['elevations'], pose_array['azimuths'], 32) # [32, 3, 4] c2ws
236
-
237
- self.img_wh = (256, 256)
238
- self.input_poses = np.array(pose)
239
- self.input_poses = np.concatenate([self.input_poses, np.tile(np.array([0, 0, 0, 1], dtype=np.float32)[None, None, :], [self.input_poses.shape[0], 1, 1])], axis=1)
240
- self.define_transforms()
241
- self.blender2opencv = np.array(
242
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
243
- )
244
-
245
- self.c2ws = []
246
- self.w2cs = []
247
- self.near_fars = []
248
- # self.root_dir = root_dir
249
- for image_dix in range(pose.shape[0]):
250
- pose = self.input_poses[image_dix]
251
- c2w = pose @ self.blender2opencv
252
- self.c2ws.append(c2w)
253
- self.w2cs.append(np.linalg.inv(c2w))
254
- self.near_fars.append(self.near_far)
255
- self.c2ws = np.stack(self.c2ws, axis=0)
256
- self.w2cs = np.stack(self.w2cs, axis=0)
257
-
258
-
259
- self.all_intrinsics = [] # the cam info of the whole scene
260
- self.all_extrinsics = []
261
- self.all_near_fars = []
262
- self.load_cam_info()
263
-
264
-
265
- # target view
266
- c2w = self.c2ws[idx]
267
- w2c = np.linalg.inv(c2w)
268
- w2c_ref = w2c
269
- w2c_ref_inv = np.linalg.inv(w2c_ref)
270
-
271
- w2cs.append(w2c @ w2c_ref_inv)
272
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
273
-
274
- # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
275
- img_filename = os.path.join(folder_path, '32_random', f'{idx}.png')
276
-
277
- img = Image.open(img_filename)
278
- img = self.transform(img) # (4, h, w)
279
-
280
-
281
- if img.shape[0] == 4:
282
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
283
- imgs += [img]
284
-
285
-
286
- depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
287
- depth_h = depth_h.fill_(-1.0)
288
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
289
-
290
-
291
- depths_h.append(depth_h)
292
- masks_h.append(mask_h)
293
-
294
- intrinsic = self.intrinsic
295
- intrinsics.append(intrinsic)
296
-
297
-
298
- near_fars.append(self.near_fars[idx])
299
- image_perm = 0 # only supervised on reference view
300
-
301
- mask_dilated = None
302
-
303
-
304
- src_views = range(0, 8 * 4)
305
-
306
- for vid in src_views:
307
-
308
- # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
309
- img_filename = os.path.join(folder_path, '32_random', f'{vid}.png')
310
- img = Image.open(img_filename)
311
- img_wh = self.img_wh
312
-
313
- img = self.transform(img)
314
- if img.shape[0] == 4:
315
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
316
-
317
- imgs += [img]
318
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
319
- depths_h.append(depth_h)
320
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
321
-
322
- near_fars.append(self.all_near_fars[vid])
323
- intrinsics.append(self.all_intrinsics[vid])
324
-
325
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
326
-
327
-
328
- # ! estimate scale_mat
329
- scale_mat, scale_factor = self.cal_scale_mat(
330
- img_hw=[img_wh[1], img_wh[0]],
331
- intrinsics=intrinsics, extrinsics=w2cs,
332
- near_fars=near_fars, factor=1.1
333
- )
334
-
335
-
336
- new_near_fars = []
337
- new_w2cs = []
338
- new_c2ws = []
339
- new_affine_mats = []
340
- new_depths_h = []
341
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
342
-
343
- P = intrinsic @ extrinsic @ scale_mat
344
- P = P[:3, :4]
345
- # - should use load_K_Rt_from_P() to obtain c2w
346
- c2w = load_K_Rt_from_P(None, P)[1]
347
- w2c = np.linalg.inv(c2w)
348
- new_w2cs.append(w2c)
349
- new_c2ws.append(c2w)
350
- affine_mat = np.eye(4)
351
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
352
- new_affine_mats.append(affine_mat)
353
-
354
- camera_o = c2w[:3, 3]
355
- dist = np.sqrt(np.sum(camera_o ** 2))
356
- near = dist - 1
357
- far = dist + 1
358
-
359
- new_near_fars.append([0.95 * near, 1.05 * far])
360
- new_depths_h.append(depth * scale_factor)
361
-
362
- # print(new_near_fars)
363
- imgs = torch.stack(imgs).float()
364
- depths_h = np.stack(new_depths_h)
365
- masks_h = np.stack(masks_h)
366
-
367
- affine_mats = np.stack(new_affine_mats)
368
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
369
- new_near_fars)
370
-
371
- if self.split == 'train':
372
- start_idx = 0
373
- else:
374
- start_idx = 1
375
-
376
-
377
-
378
- target_w2cs = []
379
- target_intrinsics = []
380
- new_target_w2cs = []
381
- for i_idx in range(8):
382
- target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
383
- target_intrinsics.append(self.all_intrinsics[i_idx])
384
-
385
- for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
386
-
387
- P = intrinsic @ extrinsic @ scale_mat
388
- P = P[:3, :4]
389
- # - should use load_K_Rt_from_P() to obtain c2w
390
- c2w = load_K_Rt_from_P(None, P)[1]
391
- w2c = np.linalg.inv(c2w)
392
- new_target_w2cs.append(w2c)
393
- target_w2cs = np.stack(new_target_w2cs)
394
-
395
-
396
-
397
- view_ids = [idx] + list(src_views)
398
- sample['origin_idx'] = origin_idx
399
- sample['images'] = imgs # (V, 3, H, W)
400
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
401
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
402
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
403
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
404
- sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
405
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
406
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
407
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
408
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
409
-
410
- # sample['light_idx'] = torch.tensor(light_idx)
411
- sample['scan'] = shape_name
412
-
413
- sample['scale_factor'] = torch.tensor(scale_factor)
414
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
415
- sample['render_img_idx'] = torch.tensor(image_perm)
416
- sample['partial_vol_origin'] = self.partial_vol_origin
417
- sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
418
- # print("meta: ", sample['meta'])
419
-
420
- # - image to render
421
- sample['query_image'] = sample['images'][0]
422
- sample['query_c2w'] = sample['c2ws'][0]
423
- sample['query_w2c'] = sample['w2cs'][0]
424
- sample['query_intrinsic'] = sample['intrinsics'][0]
425
- sample['query_depth'] = sample['depths_h'][0]
426
- sample['query_mask'] = sample['masks_h'][0]
427
- sample['query_near_far'] = sample['near_fars'][0]
428
-
429
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
430
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
431
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
432
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
433
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
434
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
435
- sample['view_ids'] = sample['view_ids'][start_idx:]
436
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
437
-
438
- sample['scale_mat'] = torch.from_numpy(scale_mat)
439
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
440
-
441
- # - generate rays
442
- if ('val' in self.split) or ('test' in self.split):
443
- sample_rays = gen_rays_from_single_image(
444
- img_wh[1], img_wh[0],
445
- sample['query_image'],
446
- sample['query_intrinsic'],
447
- sample['query_c2w'],
448
- depth=sample['query_depth'],
449
- mask=sample['query_mask'] if self.clean_image else None)
450
- else:
451
- sample_rays = gen_random_rays_from_single_image(
452
- img_wh[1], img_wh[0],
453
- self.N_rays,
454
- sample['query_image'],
455
- sample['query_intrinsic'],
456
- sample['query_c2w'],
457
- depth=sample['query_depth'],
458
- mask=sample['query_mask'] if self.clean_image else None,
459
- dilated_mask=mask_dilated,
460
- importance_sample=self.importance_sample)
461
-
462
-
463
- sample['rays'] = sample_rays
464
-
465
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_4_4.py DELETED
@@ -1,419 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
-
18
-
19
- def get_ray_directions(H, W, focal, center=None):
20
- """
21
- Get ray directions for all pixels in camera coordinate.
22
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
- ray-tracing-generating-camera-rays/standard-coordinate-systems
24
- Inputs:
25
- H, W, focal: image height, width and focal length
26
- Outputs:
27
- directions: (H, W, 3), the direction of the rays in camera coordinate
28
- """
29
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
-
31
- i, j = grid.unbind(-1)
32
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
- # see https://github.com/bmild/nerf/issues/24
34
- cent = center if center is not None else [W / 2, H / 2]
35
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
-
37
- return directions
38
-
39
- def load_K_Rt_from_P(filename, P=None):
40
- if P is None:
41
- lines = open(filename).read().splitlines()
42
- if len(lines) == 4:
43
- lines = lines[1:]
44
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
- P = np.asarray(lines).astype(np.float32).squeeze()
46
-
47
- out = cv2.decomposeProjectionMatrix(P)
48
- K = out[0]
49
- R = out[1]
50
- t = out[2]
51
-
52
- K = K / K[2, 2]
53
- intrinsics = np.eye(4)
54
- intrinsics[:3, :3] = K
55
-
56
- pose = np.eye(4, dtype=np.float32)
57
- pose[:3, :3] = R.transpose() # ? why need transpose here
58
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
-
60
- return intrinsics, pose # ! return cam2world matrix here
61
-
62
-
63
- # ! load one ref-image with multiple src-images in camera coordinate system
64
- class BlenderPerView(Dataset):
65
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
- split_filepath=None, pair_filepath=None,
67
- N_rays=512,
68
- vol_dims=[128, 128, 128], batch_size=1,
69
- clean_image=False, importance_sample=False, test_ref_views=[],
70
- specific_dataset_name = 'GSO'
71
- ):
72
-
73
- # print("root_dir: ", root_dir)
74
- self.root_dir = root_dir
75
- self.split = split
76
- # self.specific_dataset_name = 'Realfusion'
77
- # self.specific_dataset_name = 'GSO'
78
- # self.specific_dataset_name = 'Objaverse'
79
- # self.specific_dataset_name = 'Zero123'
80
-
81
- self.specific_dataset_name = specific_dataset_name
82
- self.n_views = n_views
83
- self.N_rays = N_rays
84
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
-
86
- self.clean_image = clean_image
87
- self.importance_sample = importance_sample
88
- self.test_ref_views = test_ref_views # used for testing
89
- self.scale_factor = 1.0
90
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
- assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
- # find all subfolders
93
- main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
- self.shape_list = os.listdir(main_folder)
95
- self.shape_list.sort()
96
-
97
- # self.shape_list = ['barrel_render']
98
- # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
99
-
100
-
101
- self.lvis_paths = []
102
- for shape_name in self.shape_list:
103
- self.lvis_paths.append(os.path.join(main_folder, shape_name))
104
-
105
- # print("lvis_paths: ", self.lvis_paths)
106
-
107
- if img_wh is not None:
108
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
109
- 'img_wh must both be multiples of 32!'
110
-
111
-
112
- # * bounding box for rendering
113
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
114
- self.bbox_max = np.array([1.0, 1.0, 1.0])
115
-
116
- # - used for cost volume regularization
117
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
118
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
119
-
120
-
121
- def define_transforms(self):
122
- self.transform = T.Compose([T.ToTensor()])
123
-
124
-
125
-
126
- def load_cam_info(self):
127
- for vid, img_id in enumerate(self.img_ids):
128
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
129
- self.all_intrinsics.append(intrinsic)
130
- self.all_extrinsics.append(extrinsic)
131
- self.all_near_fars.append(near_far)
132
-
133
- def read_depth(self, filename):
134
- pass
135
-
136
- def read_mask(self, filename):
137
- mask_h = cv2.imread(filename, 0)
138
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
139
- interpolation=cv2.INTER_NEAREST)
140
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
141
- interpolation=cv2.INTER_NEAREST)
142
-
143
- mask[mask > 0] = 1 # the masks stored in png are not binary
144
- mask_h[mask_h > 0] = 1
145
-
146
- return mask, mask_h
147
-
148
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
149
-
150
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
151
-
152
- radius = radius * factor
153
- scale_mat = np.diag([radius, radius, radius, 1.0])
154
- scale_mat[:3, 3] = center.cpu().numpy()
155
- scale_mat = scale_mat.astype(np.float32)
156
-
157
- return scale_mat, 1. / radius.cpu().numpy()
158
-
159
- def __len__(self):
160
- # return 8*len(self.lvis_paths)
161
- return len(self.lvis_paths)
162
-
163
-
164
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
165
- pass
166
-
167
-
168
- def __getitem__(self, idx):
169
- sample = {}
170
- idx = idx * 8 # to be deleted
171
- origin_idx = idx
172
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
173
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
174
-
175
- folder_path = self.lvis_paths[idx//8]
176
- idx = idx % 8 # [0, 7]
177
-
178
- # last subdir name
179
- shape_name = os.path.split(folder_path)[-1]
180
-
181
- pose_json_path = os.path.join(folder_path, "pose.json")
182
- with open(pose_json_path, 'r') as f:
183
- meta = json.load(f)
184
-
185
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
186
- self.img_wh = (256, 256)
187
- self.input_poses = np.array(list(meta["c2ws"].values()))
188
- intrinsic = np.eye(4)
189
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
190
- self.intrinsic = intrinsic
191
- self.near_far = np.array(meta["near_far"])
192
- self.near_far[1] = 1.8
193
- self.define_transforms()
194
- self.blender2opencv = np.array(
195
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
196
- )
197
-
198
- self.c2ws = []
199
- self.w2cs = []
200
- self.near_fars = []
201
- # self.root_dir = root_dir
202
- for image_dix, img_id in enumerate(self.img_ids):
203
- pose = self.input_poses[image_dix]
204
- c2w = pose @ self.blender2opencv
205
- self.c2ws.append(c2w)
206
- self.w2cs.append(np.linalg.inv(c2w))
207
- self.near_fars.append(self.near_far)
208
- self.c2ws = np.stack(self.c2ws, axis=0)
209
- self.w2cs = np.stack(self.w2cs, axis=0)
210
-
211
-
212
- self.all_intrinsics = [] # the cam info of the whole scene
213
- self.all_extrinsics = []
214
- self.all_near_fars = []
215
- self.load_cam_info()
216
-
217
-
218
- # target view
219
- c2w = self.c2ws[idx]
220
- w2c = np.linalg.inv(c2w)
221
- w2c_ref = w2c
222
- w2c_ref_inv = np.linalg.inv(w2c_ref)
223
-
224
- w2cs.append(w2c @ w2c_ref_inv)
225
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
226
-
227
- # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
228
- img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
229
-
230
- img = Image.open(img_filename)
231
- img = self.transform(img) # (4, h, w)
232
-
233
-
234
- if img.shape[0] == 4:
235
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
236
- imgs += [img]
237
-
238
-
239
- depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
240
- depth_h = depth_h.fill_(-1.0)
241
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
242
-
243
-
244
- depths_h.append(depth_h)
245
- masks_h.append(mask_h)
246
-
247
- intrinsic = self.intrinsic
248
- intrinsics.append(intrinsic)
249
-
250
-
251
- near_fars.append(self.near_fars[idx])
252
- image_perm = 0 # only supervised on reference view
253
-
254
- mask_dilated = None
255
-
256
-
257
- src_views = range(8, 8 + 8 * 4)
258
-
259
- for vid in src_views:
260
- if (vid // 4) % 2 != 0:
261
- continue
262
- # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
263
- img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
264
- img = Image.open(img_filename)
265
- img_wh = self.img_wh
266
-
267
- img = self.transform(img)
268
- if img.shape[0] == 4:
269
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
270
-
271
- imgs += [img]
272
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
273
- depths_h.append(depth_h)
274
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
275
-
276
- near_fars.append(self.all_near_fars[vid])
277
- intrinsics.append(self.all_intrinsics[vid])
278
-
279
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
280
-
281
-
282
- # ! estimate scale_mat
283
- scale_mat, scale_factor = self.cal_scale_mat(
284
- img_hw=[img_wh[1], img_wh[0]],
285
- intrinsics=intrinsics, extrinsics=w2cs,
286
- near_fars=near_fars, factor=1.1
287
- )
288
-
289
-
290
- new_near_fars = []
291
- new_w2cs = []
292
- new_c2ws = []
293
- new_affine_mats = []
294
- new_depths_h = []
295
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
296
-
297
- P = intrinsic @ extrinsic @ scale_mat
298
- P = P[:3, :4]
299
- # - should use load_K_Rt_from_P() to obtain c2w
300
- c2w = load_K_Rt_from_P(None, P)[1]
301
- w2c = np.linalg.inv(c2w)
302
- new_w2cs.append(w2c)
303
- new_c2ws.append(c2w)
304
- affine_mat = np.eye(4)
305
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
306
- new_affine_mats.append(affine_mat)
307
-
308
- camera_o = c2w[:3, 3]
309
- dist = np.sqrt(np.sum(camera_o ** 2))
310
- near = dist - 1
311
- far = dist + 1
312
-
313
- new_near_fars.append([0.95 * near, 1.05 * far])
314
- new_depths_h.append(depth * scale_factor)
315
-
316
- # print(new_near_fars)
317
- imgs = torch.stack(imgs).float()
318
- depths_h = np.stack(new_depths_h)
319
- masks_h = np.stack(masks_h)
320
-
321
- affine_mats = np.stack(new_affine_mats)
322
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
323
- new_near_fars)
324
-
325
- if self.split == 'train':
326
- start_idx = 0
327
- else:
328
- start_idx = 1
329
-
330
-
331
-
332
- target_w2cs = []
333
- target_intrinsics = []
334
- new_target_w2cs = []
335
- for i_idx in range(8):
336
- target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
337
- target_intrinsics.append(self.all_intrinsics[i_idx])
338
-
339
- for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
340
-
341
- P = intrinsic @ extrinsic @ scale_mat
342
- P = P[:3, :4]
343
- # - should use load_K_Rt_from_P() to obtain c2w
344
- c2w = load_K_Rt_from_P(None, P)[1]
345
- w2c = np.linalg.inv(c2w)
346
- new_target_w2cs.append(w2c)
347
- target_w2cs = np.stack(new_target_w2cs)
348
-
349
-
350
-
351
- view_ids = [idx] + list(src_views)
352
- sample['origin_idx'] = origin_idx
353
- sample['images'] = imgs # (V, 3, H, W)
354
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
355
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
356
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
357
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
358
- sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
359
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
360
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
361
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
362
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
363
-
364
- # sample['light_idx'] = torch.tensor(light_idx)
365
- sample['scan'] = shape_name
366
-
367
- sample['scale_factor'] = torch.tensor(scale_factor)
368
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
369
- sample['render_img_idx'] = torch.tensor(image_perm)
370
- sample['partial_vol_origin'] = self.partial_vol_origin
371
- sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
372
- # print("meta: ", sample['meta'])
373
-
374
- # - image to render
375
- sample['query_image'] = sample['images'][0]
376
- sample['query_c2w'] = sample['c2ws'][0]
377
- sample['query_w2c'] = sample['w2cs'][0]
378
- sample['query_intrinsic'] = sample['intrinsics'][0]
379
- sample['query_depth'] = sample['depths_h'][0]
380
- sample['query_mask'] = sample['masks_h'][0]
381
- sample['query_near_far'] = sample['near_fars'][0]
382
-
383
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
384
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
385
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
386
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
387
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
388
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
389
- sample['view_ids'] = sample['view_ids'][start_idx:]
390
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
391
-
392
- sample['scale_mat'] = torch.from_numpy(scale_mat)
393
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
394
-
395
- # - generate rays
396
- if ('val' in self.split) or ('test' in self.split):
397
- sample_rays = gen_rays_from_single_image(
398
- img_wh[1], img_wh[0],
399
- sample['query_image'],
400
- sample['query_intrinsic'],
401
- sample['query_c2w'],
402
- depth=sample['query_depth'],
403
- mask=sample['query_mask'] if self.clean_image else None)
404
- else:
405
- sample_rays = gen_random_rays_from_single_image(
406
- img_wh[1], img_wh[0],
407
- self.N_rays,
408
- sample['query_image'],
409
- sample['query_intrinsic'],
410
- sample['query_c2w'],
411
- depth=sample['query_depth'],
412
- mask=sample['query_mask'] if self.clean_image else None,
413
- dilated_mask=mask_dilated,
414
- importance_sample=self.importance_sample)
415
-
416
-
417
- sample['rays'] = sample_rays
418
-
419
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_6_4.py DELETED
@@ -1,420 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
-
18
-
19
- def get_ray_directions(H, W, focal, center=None):
20
- """
21
- Get ray directions for all pixels in camera coordinate.
22
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
- ray-tracing-generating-camera-rays/standard-coordinate-systems
24
- Inputs:
25
- H, W, focal: image height, width and focal length
26
- Outputs:
27
- directions: (H, W, 3), the direction of the rays in camera coordinate
28
- """
29
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
-
31
- i, j = grid.unbind(-1)
32
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
- # see https://github.com/bmild/nerf/issues/24
34
- cent = center if center is not None else [W / 2, H / 2]
35
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
-
37
- return directions
38
-
39
- def load_K_Rt_from_P(filename, P=None):
40
- if P is None:
41
- lines = open(filename).read().splitlines()
42
- if len(lines) == 4:
43
- lines = lines[1:]
44
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
- P = np.asarray(lines).astype(np.float32).squeeze()
46
-
47
- out = cv2.decomposeProjectionMatrix(P)
48
- K = out[0]
49
- R = out[1]
50
- t = out[2]
51
-
52
- K = K / K[2, 2]
53
- intrinsics = np.eye(4)
54
- intrinsics[:3, :3] = K
55
-
56
- pose = np.eye(4, dtype=np.float32)
57
- pose[:3, :3] = R.transpose() # ? why need transpose here
58
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
-
60
- return intrinsics, pose # ! return cam2world matrix here
61
-
62
-
63
- # ! load one ref-image with multiple src-images in camera coordinate system
64
- class BlenderPerView(Dataset):
65
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
- split_filepath=None, pair_filepath=None,
67
- N_rays=512,
68
- vol_dims=[128, 128, 128], batch_size=1,
69
- clean_image=False, importance_sample=False, test_ref_views=[],
70
- specific_dataset_name = 'GSO'
71
- ):
72
-
73
- # print("root_dir: ", root_dir)
74
- self.root_dir = root_dir
75
- self.split = split
76
- # self.specific_dataset_name = 'Realfusion'
77
- # self.specific_dataset_name = 'GSO'
78
- # self.specific_dataset_name = 'Objaverse'
79
- # self.specific_dataset_name = 'Zero123'
80
-
81
- self.specific_dataset_name = specific_dataset_name
82
- self.n_views = n_views
83
- self.N_rays = N_rays
84
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
-
86
- self.clean_image = clean_image
87
- self.importance_sample = importance_sample
88
- self.test_ref_views = test_ref_views # used for testing
89
- self.scale_factor = 1.0
90
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
- assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
- # find all subfolders
93
- main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
- self.shape_list = os.listdir(main_folder)
95
- self.shape_list.sort()
96
-
97
- # self.shape_list = ['barrel_render']
98
- # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
99
-
100
-
101
- self.lvis_paths = []
102
- for shape_name in self.shape_list:
103
- self.lvis_paths.append(os.path.join(main_folder, shape_name))
104
-
105
- # print("lvis_paths: ", self.lvis_paths)
106
-
107
- if img_wh is not None:
108
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
109
- 'img_wh must both be multiples of 32!'
110
-
111
-
112
- # * bounding box for rendering
113
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
114
- self.bbox_max = np.array([1.0, 1.0, 1.0])
115
-
116
- # - used for cost volume regularization
117
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
118
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
119
-
120
-
121
- def define_transforms(self):
122
- self.transform = T.Compose([T.ToTensor()])
123
-
124
-
125
-
126
- def load_cam_info(self):
127
- for vid, img_id in enumerate(self.img_ids):
128
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
129
- self.all_intrinsics.append(intrinsic)
130
- self.all_extrinsics.append(extrinsic)
131
- self.all_near_fars.append(near_far)
132
-
133
- def read_depth(self, filename):
134
- pass
135
-
136
- def read_mask(self, filename):
137
- mask_h = cv2.imread(filename, 0)
138
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
139
- interpolation=cv2.INTER_NEAREST)
140
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
141
- interpolation=cv2.INTER_NEAREST)
142
-
143
- mask[mask > 0] = 1 # the masks stored in png are not binary
144
- mask_h[mask_h > 0] = 1
145
-
146
- return mask, mask_h
147
-
148
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
149
-
150
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
151
-
152
- radius = radius * factor
153
- scale_mat = np.diag([radius, radius, radius, 1.0])
154
- scale_mat[:3, 3] = center.cpu().numpy()
155
- scale_mat = scale_mat.astype(np.float32)
156
-
157
- return scale_mat, 1. / radius.cpu().numpy()
158
-
159
- def __len__(self):
160
- # return 8*len(self.lvis_paths)
161
- return len(self.lvis_paths)
162
-
163
-
164
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
165
- pass
166
-
167
-
168
- def __getitem__(self, idx):
169
- sample = {}
170
- idx = idx * 8 # to be deleted
171
- origin_idx = idx
172
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
173
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
174
-
175
- folder_path = self.lvis_paths[idx//8]
176
- idx = idx % 8 # [0, 7]
177
-
178
- # last subdir name
179
- shape_name = os.path.split(folder_path)[-1]
180
-
181
- pose_json_path = os.path.join(folder_path, "pose.json")
182
- with open(pose_json_path, 'r') as f:
183
- meta = json.load(f)
184
-
185
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
186
- self.img_wh = (256, 256)
187
- self.input_poses = np.array(list(meta["c2ws"].values()))
188
- intrinsic = np.eye(4)
189
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
190
- self.intrinsic = intrinsic
191
- self.near_far = np.array(meta["near_far"])
192
- self.near_far[1] = 1.8
193
- self.define_transforms()
194
- self.blender2opencv = np.array(
195
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
196
- )
197
-
198
- self.c2ws = []
199
- self.w2cs = []
200
- self.near_fars = []
201
- # self.root_dir = root_dir
202
- for image_dix, img_id in enumerate(self.img_ids):
203
- pose = self.input_poses[image_dix]
204
- c2w = pose @ self.blender2opencv
205
- self.c2ws.append(c2w)
206
- self.w2cs.append(np.linalg.inv(c2w))
207
- self.near_fars.append(self.near_far)
208
- self.c2ws = np.stack(self.c2ws, axis=0)
209
- self.w2cs = np.stack(self.w2cs, axis=0)
210
-
211
-
212
- self.all_intrinsics = [] # the cam info of the whole scene
213
- self.all_extrinsics = []
214
- self.all_near_fars = []
215
- self.load_cam_info()
216
-
217
-
218
- # target view
219
- c2w = self.c2ws[idx]
220
- w2c = np.linalg.inv(c2w)
221
- w2c_ref = w2c
222
- w2c_ref_inv = np.linalg.inv(w2c_ref)
223
-
224
- w2cs.append(w2c @ w2c_ref_inv)
225
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
226
-
227
- # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
228
- img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
229
-
230
- img = Image.open(img_filename)
231
- img = self.transform(img) # (4, h, w)
232
-
233
-
234
- if img.shape[0] == 4:
235
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
236
- imgs += [img]
237
-
238
-
239
- depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
240
- depth_h = depth_h.fill_(-1.0)
241
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
242
-
243
-
244
- depths_h.append(depth_h)
245
- masks_h.append(mask_h)
246
-
247
- intrinsic = self.intrinsic
248
- intrinsics.append(intrinsic)
249
-
250
-
251
- near_fars.append(self.near_fars[idx])
252
- image_perm = 0 # only supervised on reference view
253
-
254
- mask_dilated = None
255
-
256
-
257
- src_views = range(8, 8 + 8 * 4)
258
-
259
- for vid in src_views:
260
- if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6):
261
- continue
262
-
263
- # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
264
- img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
265
- img = Image.open(img_filename)
266
- img_wh = self.img_wh
267
-
268
- img = self.transform(img)
269
- if img.shape[0] == 4:
270
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
271
-
272
- imgs += [img]
273
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
274
- depths_h.append(depth_h)
275
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
276
-
277
- near_fars.append(self.all_near_fars[vid])
278
- intrinsics.append(self.all_intrinsics[vid])
279
-
280
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
281
-
282
-
283
- # ! estimate scale_mat
284
- scale_mat, scale_factor = self.cal_scale_mat(
285
- img_hw=[img_wh[1], img_wh[0]],
286
- intrinsics=intrinsics, extrinsics=w2cs,
287
- near_fars=near_fars, factor=1.1
288
- )
289
-
290
-
291
- new_near_fars = []
292
- new_w2cs = []
293
- new_c2ws = []
294
- new_affine_mats = []
295
- new_depths_h = []
296
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
297
-
298
- P = intrinsic @ extrinsic @ scale_mat
299
- P = P[:3, :4]
300
- # - should use load_K_Rt_from_P() to obtain c2w
301
- c2w = load_K_Rt_from_P(None, P)[1]
302
- w2c = np.linalg.inv(c2w)
303
- new_w2cs.append(w2c)
304
- new_c2ws.append(c2w)
305
- affine_mat = np.eye(4)
306
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
307
- new_affine_mats.append(affine_mat)
308
-
309
- camera_o = c2w[:3, 3]
310
- dist = np.sqrt(np.sum(camera_o ** 2))
311
- near = dist - 1
312
- far = dist + 1
313
-
314
- new_near_fars.append([0.95 * near, 1.05 * far])
315
- new_depths_h.append(depth * scale_factor)
316
-
317
- # print(new_near_fars)
318
- imgs = torch.stack(imgs).float()
319
- depths_h = np.stack(new_depths_h)
320
- masks_h = np.stack(masks_h)
321
-
322
- affine_mats = np.stack(new_affine_mats)
323
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
324
- new_near_fars)
325
-
326
- if self.split == 'train':
327
- start_idx = 0
328
- else:
329
- start_idx = 1
330
-
331
-
332
-
333
- target_w2cs = []
334
- target_intrinsics = []
335
- new_target_w2cs = []
336
- for i_idx in range(8):
337
- target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
338
- target_intrinsics.append(self.all_intrinsics[i_idx])
339
-
340
- for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
341
-
342
- P = intrinsic @ extrinsic @ scale_mat
343
- P = P[:3, :4]
344
- # - should use load_K_Rt_from_P() to obtain c2w
345
- c2w = load_K_Rt_from_P(None, P)[1]
346
- w2c = np.linalg.inv(c2w)
347
- new_target_w2cs.append(w2c)
348
- target_w2cs = np.stack(new_target_w2cs)
349
-
350
-
351
-
352
- view_ids = [idx] + list(src_views)
353
- sample['origin_idx'] = origin_idx
354
- sample['images'] = imgs # (V, 3, H, W)
355
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
356
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
357
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
358
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
359
- sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
360
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
361
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
362
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
363
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
364
-
365
- # sample['light_idx'] = torch.tensor(light_idx)
366
- sample['scan'] = shape_name
367
-
368
- sample['scale_factor'] = torch.tensor(scale_factor)
369
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
370
- sample['render_img_idx'] = torch.tensor(image_perm)
371
- sample['partial_vol_origin'] = self.partial_vol_origin
372
- sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
373
- # print("meta: ", sample['meta'])
374
-
375
- # - image to render
376
- sample['query_image'] = sample['images'][0]
377
- sample['query_c2w'] = sample['c2ws'][0]
378
- sample['query_w2c'] = sample['w2cs'][0]
379
- sample['query_intrinsic'] = sample['intrinsics'][0]
380
- sample['query_depth'] = sample['depths_h'][0]
381
- sample['query_mask'] = sample['masks_h'][0]
382
- sample['query_near_far'] = sample['near_fars'][0]
383
-
384
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
385
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
386
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
387
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
388
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
389
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
390
- sample['view_ids'] = sample['view_ids'][start_idx:]
391
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
392
-
393
- sample['scale_mat'] = torch.from_numpy(scale_mat)
394
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
395
-
396
- # - generate rays
397
- if ('val' in self.split) or ('test' in self.split):
398
- sample_rays = gen_rays_from_single_image(
399
- img_wh[1], img_wh[0],
400
- sample['query_image'],
401
- sample['query_intrinsic'],
402
- sample['query_c2w'],
403
- depth=sample['query_depth'],
404
- mask=sample['query_mask'] if self.clean_image else None)
405
- else:
406
- sample_rays = gen_random_rays_from_single_image(
407
- img_wh[1], img_wh[0],
408
- self.N_rays,
409
- sample['query_image'],
410
- sample['query_intrinsic'],
411
- sample['query_c2w'],
412
- depth=sample['query_depth'],
413
- mask=sample['query_mask'] if self.clean_image else None,
414
- dilated_mask=mask_dilated,
415
- importance_sample=self.importance_sample)
416
-
417
-
418
- sample['rays'] = sample_rays
419
-
420
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_3.py DELETED
@@ -1,428 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
-
18
-
19
- def get_ray_directions(H, W, focal, center=None):
20
- """
21
- Get ray directions for all pixels in camera coordinate.
22
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
- ray-tracing-generating-camera-rays/standard-coordinate-systems
24
- Inputs:
25
- H, W, focal: image height, width and focal length
26
- Outputs:
27
- directions: (H, W, 3), the direction of the rays in camera coordinate
28
- """
29
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
-
31
- i, j = grid.unbind(-1)
32
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
- # see https://github.com/bmild/nerf/issues/24
34
- cent = center if center is not None else [W / 2, H / 2]
35
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
-
37
- return directions
38
-
39
- def load_K_Rt_from_P(filename, P=None):
40
- if P is None:
41
- lines = open(filename).read().splitlines()
42
- if len(lines) == 4:
43
- lines = lines[1:]
44
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
- P = np.asarray(lines).astype(np.float32).squeeze()
46
-
47
- out = cv2.decomposeProjectionMatrix(P)
48
- K = out[0]
49
- R = out[1]
50
- t = out[2]
51
-
52
- K = K / K[2, 2]
53
- intrinsics = np.eye(4)
54
- intrinsics[:3, :3] = K
55
-
56
- pose = np.eye(4, dtype=np.float32)
57
- pose[:3, :3] = R.transpose() # ? why need transpose here
58
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
-
60
- return intrinsics, pose # ! return cam2world matrix here
61
-
62
-
63
- # ! load one ref-image with multiple src-images in camera coordinate system
64
- class BlenderPerView(Dataset):
65
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
- split_filepath=None, pair_filepath=None,
67
- N_rays=512,
68
- vol_dims=[128, 128, 128], batch_size=1,
69
- clean_image=False, importance_sample=False, test_ref_views=[],
70
- specific_dataset_name = 'GSO'
71
- ):
72
-
73
- # print("root_dir: ", root_dir)
74
- self.root_dir = root_dir
75
- self.split = split
76
- # self.specific_dataset_name = 'Realfusion'
77
- # self.specific_dataset_name = 'GSO'
78
- # self.specific_dataset_name = 'Objaverse'
79
- # self.specific_dataset_name = 'Zero123'
80
-
81
- self.specific_dataset_name = specific_dataset_name
82
- self.n_views = n_views
83
- self.N_rays = N_rays
84
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
-
86
- self.clean_image = clean_image
87
- self.importance_sample = importance_sample
88
- self.test_ref_views = test_ref_views # used for testing
89
- self.scale_factor = 1.0
90
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
- assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
- # find all subfolders
93
- main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
- self.shape_list = os.listdir(main_folder)
95
- self.shape_list.sort()
96
-
97
- # self.shape_list = ['barrel_render']
98
- # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
99
-
100
-
101
- self.lvis_paths = []
102
- for shape_name in self.shape_list:
103
- self.lvis_paths.append(os.path.join(main_folder, shape_name))
104
-
105
- # print("lvis_paths: ", self.lvis_paths)
106
-
107
- if img_wh is not None:
108
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
109
- 'img_wh must both be multiples of 32!'
110
-
111
-
112
- # * bounding box for rendering
113
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
114
- self.bbox_max = np.array([1.0, 1.0, 1.0])
115
-
116
- # - used for cost volume regularization
117
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
118
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
119
-
120
-
121
- def define_transforms(self):
122
- self.transform = T.Compose([T.ToTensor()])
123
-
124
-
125
-
126
- def load_cam_info(self):
127
- for vid, img_id in enumerate(self.img_ids):
128
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
129
- self.all_intrinsics.append(intrinsic)
130
- self.all_extrinsics.append(extrinsic)
131
- self.all_near_fars.append(near_far)
132
-
133
- def read_depth(self, filename):
134
- pass
135
-
136
- def read_mask(self, filename):
137
- mask_h = cv2.imread(filename, 0)
138
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
139
- interpolation=cv2.INTER_NEAREST)
140
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
141
- interpolation=cv2.INTER_NEAREST)
142
-
143
- mask[mask > 0] = 1 # the masks stored in png are not binary
144
- mask_h[mask_h > 0] = 1
145
-
146
- return mask, mask_h
147
-
148
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
149
-
150
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
151
-
152
- radius = radius * factor
153
- scale_mat = np.diag([radius, radius, radius, 1.0])
154
- scale_mat[:3, 3] = center.cpu().numpy()
155
- scale_mat = scale_mat.astype(np.float32)
156
-
157
- return scale_mat, 1. / radius.cpu().numpy()
158
-
159
- def __len__(self):
160
- # return 8*len(self.lvis_paths)
161
- return len(self.lvis_paths)
162
-
163
-
164
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
165
- pass
166
-
167
-
168
- def __getitem__(self, idx):
169
- sample = {}
170
- idx = idx * 8 # to be deleted
171
- origin_idx = idx
172
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
173
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
174
-
175
- folder_path = self.lvis_paths[idx//8]
176
- idx = idx % 8 # [0, 7]
177
-
178
- # last subdir name
179
- shape_name = os.path.split(folder_path)[-1]
180
-
181
- pose_json_path = os.path.join(folder_path, "pose.json")
182
- with open(pose_json_path, 'r') as f:
183
- meta = json.load(f)
184
-
185
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
186
- self.img_wh = (256, 256)
187
- self.input_poses = np.array(list(meta["c2ws"].values()))
188
- intrinsic = np.eye(4)
189
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
190
- self.intrinsic = intrinsic
191
- self.near_far = np.array(meta["near_far"])
192
- self.near_far[1] = 1.8
193
- self.define_transforms()
194
- self.blender2opencv = np.array(
195
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
196
- )
197
-
198
- self.c2ws = []
199
- self.w2cs = []
200
- self.near_fars = []
201
- # self.root_dir = root_dir
202
- for image_dix, img_id in enumerate(self.img_ids):
203
- pose = self.input_poses[image_dix]
204
- c2w = pose @ self.blender2opencv
205
- self.c2ws.append(c2w)
206
- self.w2cs.append(np.linalg.inv(c2w))
207
- self.near_fars.append(self.near_far)
208
- self.c2ws = np.stack(self.c2ws, axis=0)
209
- self.w2cs = np.stack(self.w2cs, axis=0)
210
-
211
-
212
- self.all_intrinsics = [] # the cam info of the whole scene
213
- self.all_extrinsics = []
214
- self.all_near_fars = []
215
- self.load_cam_info()
216
-
217
-
218
- # target view
219
- c2w = self.c2ws[idx]
220
- w2c = np.linalg.inv(c2w)
221
- w2c_ref = w2c
222
- w2c_ref_inv = np.linalg.inv(w2c_ref)
223
-
224
- w2cs.append(w2c @ w2c_ref_inv)
225
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
226
-
227
- # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
228
- img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
229
-
230
- img = Image.open(img_filename)
231
- img = self.transform(img) # (4, h, w)
232
-
233
-
234
- if img.shape[0] == 4:
235
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
236
- imgs += [img]
237
-
238
-
239
- depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
240
- depth_h = depth_h.fill_(-1.0)
241
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
242
-
243
-
244
- depths_h.append(depth_h)
245
- masks_h.append(mask_h)
246
-
247
- intrinsic = self.intrinsic
248
- intrinsics.append(intrinsic)
249
-
250
-
251
- near_fars.append(self.near_fars[idx])
252
- image_perm = 0 # only supervised on reference view
253
-
254
- mask_dilated = None
255
-
256
-
257
- # src_views = range(8, 8 + 8 * 4)
258
-
259
- src_views = list()
260
- for i in range(8):
261
- # randomly choose 3 different number from [0,3]
262
- # local_idxs = np.random.choice(4, 3, replace=False)
263
- local_idxs = [0, 2, 3]
264
- # local_idxs = np.random.choice(4, 3, replace=False)
265
-
266
- local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs]
267
- src_views += local_idxs
268
-
269
- for vid in src_views:
270
-
271
- # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
272
- img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
273
- img = Image.open(img_filename)
274
- img_wh = self.img_wh
275
-
276
- img = self.transform(img)
277
- if img.shape[0] == 4:
278
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
279
-
280
- imgs += [img]
281
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
282
- depths_h.append(depth_h)
283
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
284
-
285
- near_fars.append(self.all_near_fars[vid])
286
- intrinsics.append(self.all_intrinsics[vid])
287
-
288
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
289
-
290
-
291
- # ! estimate scale_mat
292
- scale_mat, scale_factor = self.cal_scale_mat(
293
- img_hw=[img_wh[1], img_wh[0]],
294
- intrinsics=intrinsics, extrinsics=w2cs,
295
- near_fars=near_fars, factor=1.1
296
- )
297
-
298
-
299
- new_near_fars = []
300
- new_w2cs = []
301
- new_c2ws = []
302
- new_affine_mats = []
303
- new_depths_h = []
304
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
305
-
306
- P = intrinsic @ extrinsic @ scale_mat
307
- P = P[:3, :4]
308
- # - should use load_K_Rt_from_P() to obtain c2w
309
- c2w = load_K_Rt_from_P(None, P)[1]
310
- w2c = np.linalg.inv(c2w)
311
- new_w2cs.append(w2c)
312
- new_c2ws.append(c2w)
313
- affine_mat = np.eye(4)
314
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
315
- new_affine_mats.append(affine_mat)
316
-
317
- camera_o = c2w[:3, 3]
318
- dist = np.sqrt(np.sum(camera_o ** 2))
319
- near = dist - 1
320
- far = dist + 1
321
-
322
- new_near_fars.append([0.95 * near, 1.05 * far])
323
- new_depths_h.append(depth * scale_factor)
324
-
325
- # print(new_near_fars)
326
- imgs = torch.stack(imgs).float()
327
- depths_h = np.stack(new_depths_h)
328
- masks_h = np.stack(masks_h)
329
-
330
- affine_mats = np.stack(new_affine_mats)
331
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
332
- new_near_fars)
333
-
334
- if self.split == 'train':
335
- start_idx = 0
336
- else:
337
- start_idx = 1
338
-
339
-
340
-
341
- target_w2cs = []
342
- target_intrinsics = []
343
- new_target_w2cs = []
344
- for i_idx in range(8):
345
- target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
346
- target_intrinsics.append(self.all_intrinsics[i_idx])
347
-
348
- for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
349
-
350
- P = intrinsic @ extrinsic @ scale_mat
351
- P = P[:3, :4]
352
- # - should use load_K_Rt_from_P() to obtain c2w
353
- c2w = load_K_Rt_from_P(None, P)[1]
354
- w2c = np.linalg.inv(c2w)
355
- new_target_w2cs.append(w2c)
356
- target_w2cs = np.stack(new_target_w2cs)
357
-
358
-
359
-
360
- view_ids = [idx] + list(src_views)
361
- sample['origin_idx'] = origin_idx
362
- sample['images'] = imgs # (V, 3, H, W)
363
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
364
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
365
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
366
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
367
- sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
368
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
369
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
370
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
371
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
372
-
373
- # sample['light_idx'] = torch.tensor(light_idx)
374
- sample['scan'] = shape_name
375
-
376
- sample['scale_factor'] = torch.tensor(scale_factor)
377
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
378
- sample['render_img_idx'] = torch.tensor(image_perm)
379
- sample['partial_vol_origin'] = self.partial_vol_origin
380
- sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
381
- # print("meta: ", sample['meta'])
382
-
383
- # - image to render
384
- sample['query_image'] = sample['images'][0]
385
- sample['query_c2w'] = sample['c2ws'][0]
386
- sample['query_w2c'] = sample['w2cs'][0]
387
- sample['query_intrinsic'] = sample['intrinsics'][0]
388
- sample['query_depth'] = sample['depths_h'][0]
389
- sample['query_mask'] = sample['masks_h'][0]
390
- sample['query_near_far'] = sample['near_fars'][0]
391
-
392
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
393
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
394
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
395
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
396
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
397
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
398
- sample['view_ids'] = sample['view_ids'][start_idx:]
399
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
400
-
401
- sample['scale_mat'] = torch.from_numpy(scale_mat)
402
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
403
-
404
- # - generate rays
405
- if ('val' in self.split) or ('test' in self.split):
406
- sample_rays = gen_rays_from_single_image(
407
- img_wh[1], img_wh[0],
408
- sample['query_image'],
409
- sample['query_intrinsic'],
410
- sample['query_c2w'],
411
- depth=sample['query_depth'],
412
- mask=sample['query_mask'] if self.clean_image else None)
413
- else:
414
- sample_rays = gen_random_rays_from_single_image(
415
- img_wh[1], img_wh[0],
416
- self.N_rays,
417
- sample['query_image'],
418
- sample['query_intrinsic'],
419
- sample['query_c2w'],
420
- depth=sample['query_depth'],
421
- mask=sample['query_mask'] if self.clean_image else None,
422
- dilated_mask=mask_dilated,
423
- importance_sample=self.importance_sample)
424
-
425
-
426
- sample['rays'] = sample_rays
427
-
428
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_wide.py DELETED
@@ -1,420 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
-
18
-
19
- def get_ray_directions(H, W, focal, center=None):
20
- """
21
- Get ray directions for all pixels in camera coordinate.
22
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
- ray-tracing-generating-camera-rays/standard-coordinate-systems
24
- Inputs:
25
- H, W, focal: image height, width and focal length
26
- Outputs:
27
- directions: (H, W, 3), the direction of the rays in camera coordinate
28
- """
29
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
-
31
- i, j = grid.unbind(-1)
32
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
- # see https://github.com/bmild/nerf/issues/24
34
- cent = center if center is not None else [W / 2, H / 2]
35
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
-
37
- return directions
38
-
39
- def load_K_Rt_from_P(filename, P=None):
40
- if P is None:
41
- lines = open(filename).read().splitlines()
42
- if len(lines) == 4:
43
- lines = lines[1:]
44
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
- P = np.asarray(lines).astype(np.float32).squeeze()
46
-
47
- out = cv2.decomposeProjectionMatrix(P)
48
- K = out[0]
49
- R = out[1]
50
- t = out[2]
51
-
52
- K = K / K[2, 2]
53
- intrinsics = np.eye(4)
54
- intrinsics[:3, :3] = K
55
-
56
- pose = np.eye(4, dtype=np.float32)
57
- pose[:3, :3] = R.transpose() # ? why need transpose here
58
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
-
60
- return intrinsics, pose # ! return cam2world matrix here
61
-
62
-
63
- # ! load one ref-image with multiple src-images in camera coordinate system
64
- class BlenderPerView(Dataset):
65
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
- split_filepath=None, pair_filepath=None,
67
- N_rays=512,
68
- vol_dims=[128, 128, 128], batch_size=1,
69
- clean_image=False, importance_sample=False, test_ref_views=[],
70
- specific_dataset_name = 'GSO'
71
- ):
72
-
73
- # print("root_dir: ", root_dir)
74
- self.root_dir = root_dir
75
- self.split = split
76
- # self.specific_dataset_name = 'Realfusion'
77
- # self.specific_dataset_name = 'GSO'
78
- # self.specific_dataset_name = 'Objaverse'
79
- # self.specific_dataset_name = 'Zero123'
80
-
81
- self.specific_dataset_name = specific_dataset_name
82
- self.n_views = n_views
83
- self.N_rays = N_rays
84
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
-
86
- self.clean_image = clean_image
87
- self.importance_sample = importance_sample
88
- self.test_ref_views = test_ref_views # used for testing
89
- self.scale_factor = 1.0
90
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
- assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
- # find all subfolders
93
- main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
- self.shape_list = os.listdir(main_folder)
95
- self.shape_list.sort()
96
-
97
- # self.shape_list = ['barrel_render']
98
- # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
99
-
100
-
101
- self.lvis_paths = []
102
- for shape_name in self.shape_list:
103
- self.lvis_paths.append(os.path.join(main_folder, shape_name))
104
-
105
- # print("lvis_paths: ", self.lvis_paths)
106
-
107
- if img_wh is not None:
108
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
109
- 'img_wh must both be multiples of 32!'
110
-
111
-
112
- # * bounding box for rendering
113
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
114
- self.bbox_max = np.array([1.0, 1.0, 1.0])
115
-
116
- # - used for cost volume regularization
117
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
118
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
119
-
120
-
121
- def define_transforms(self):
122
- self.transform = T.Compose([T.ToTensor()])
123
-
124
-
125
-
126
- def load_cam_info(self):
127
- for vid, img_id in enumerate(self.img_ids):
128
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
129
- self.all_intrinsics.append(intrinsic)
130
- self.all_extrinsics.append(extrinsic)
131
- self.all_near_fars.append(near_far)
132
-
133
- def read_depth(self, filename):
134
- pass
135
-
136
- def read_mask(self, filename):
137
- mask_h = cv2.imread(filename, 0)
138
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
139
- interpolation=cv2.INTER_NEAREST)
140
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
141
- interpolation=cv2.INTER_NEAREST)
142
-
143
- mask[mask > 0] = 1 # the masks stored in png are not binary
144
- mask_h[mask_h > 0] = 1
145
-
146
- return mask, mask_h
147
-
148
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
149
-
150
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
151
-
152
- radius = radius * factor
153
- scale_mat = np.diag([radius, radius, radius, 1.0])
154
- scale_mat[:3, 3] = center.cpu().numpy()
155
- scale_mat = scale_mat.astype(np.float32)
156
-
157
- return scale_mat, 1. / radius.cpu().numpy()
158
-
159
- def __len__(self):
160
- # return 8*len(self.lvis_paths)
161
- return len(self.lvis_paths)
162
-
163
-
164
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
165
- pass
166
-
167
-
168
- def __getitem__(self, idx):
169
- sample = {}
170
- idx = idx * 8 # to be deleted
171
- origin_idx = idx
172
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
173
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
174
-
175
- folder_path = self.lvis_paths[idx//8]
176
- idx = idx % 8 # [0, 7]
177
-
178
- # last subdir name
179
- shape_name = os.path.split(folder_path)[-1]
180
-
181
- pose_json_path = os.path.join(folder_path, "pose.json")
182
- with open(pose_json_path, 'r') as f:
183
- meta = json.load(f)
184
-
185
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
186
- self.img_wh = (256, 256)
187
- self.input_poses = np.array(list(meta["c2ws"].values()))
188
- intrinsic = np.eye(4)
189
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
190
- self.intrinsic = intrinsic
191
- self.near_far = np.array(meta["near_far"])
192
- self.near_far[1] = 1.8
193
- self.define_transforms()
194
- self.blender2opencv = np.array(
195
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
196
- )
197
-
198
- self.c2ws = []
199
- self.w2cs = []
200
- self.near_fars = []
201
- # self.root_dir = root_dir
202
- for image_dix, img_id in enumerate(self.img_ids):
203
- pose = self.input_poses[image_dix]
204
- c2w = pose @ self.blender2opencv
205
- self.c2ws.append(c2w)
206
- self.w2cs.append(np.linalg.inv(c2w))
207
- self.near_fars.append(self.near_far)
208
- self.c2ws = np.stack(self.c2ws, axis=0)
209
- self.w2cs = np.stack(self.w2cs, axis=0)
210
-
211
-
212
- self.all_intrinsics = [] # the cam info of the whole scene
213
- self.all_extrinsics = []
214
- self.all_near_fars = []
215
- self.load_cam_info()
216
-
217
-
218
- # target view
219
- c2w = self.c2ws[idx]
220
- w2c = np.linalg.inv(c2w)
221
- w2c_ref = w2c
222
- w2c_ref_inv = np.linalg.inv(w2c_ref)
223
-
224
- w2cs.append(w2c @ w2c_ref_inv)
225
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
226
-
227
- # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
228
- img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
229
-
230
- img = Image.open(img_filename)
231
- img = self.transform(img) # (4, h, w)
232
-
233
-
234
- if img.shape[0] == 4:
235
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
236
- imgs += [img]
237
-
238
-
239
- depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
240
- depth_h = depth_h.fill_(-1.0)
241
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
242
-
243
-
244
- depths_h.append(depth_h)
245
- masks_h.append(mask_h)
246
-
247
- intrinsic = self.intrinsic
248
- intrinsics.append(intrinsic)
249
-
250
-
251
- near_fars.append(self.near_fars[idx])
252
- image_perm = 0 # only supervised on reference view
253
-
254
- mask_dilated = None
255
-
256
-
257
- src_views = range(8)
258
-
259
-
260
- for vid in src_views:
261
-
262
- # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
263
- # img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
264
- img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[vid]}')
265
- img = Image.open(img_filename)
266
- img_wh = self.img_wh
267
-
268
- img = self.transform(img)
269
- if img.shape[0] == 4:
270
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
271
-
272
- imgs += [img]
273
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
274
- depths_h.append(depth_h)
275
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
276
-
277
- near_fars.append(self.all_near_fars[vid])
278
- intrinsics.append(self.all_intrinsics[vid])
279
-
280
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
281
-
282
-
283
- # ! estimate scale_mat
284
- scale_mat, scale_factor = self.cal_scale_mat(
285
- img_hw=[img_wh[1], img_wh[0]],
286
- intrinsics=intrinsics, extrinsics=w2cs,
287
- near_fars=near_fars, factor=1.1
288
- )
289
-
290
-
291
- new_near_fars = []
292
- new_w2cs = []
293
- new_c2ws = []
294
- new_affine_mats = []
295
- new_depths_h = []
296
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
297
-
298
- P = intrinsic @ extrinsic @ scale_mat
299
- P = P[:3, :4]
300
- # - should use load_K_Rt_from_P() to obtain c2w
301
- c2w = load_K_Rt_from_P(None, P)[1]
302
- w2c = np.linalg.inv(c2w)
303
- new_w2cs.append(w2c)
304
- new_c2ws.append(c2w)
305
- affine_mat = np.eye(4)
306
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
307
- new_affine_mats.append(affine_mat)
308
-
309
- camera_o = c2w[:3, 3]
310
- dist = np.sqrt(np.sum(camera_o ** 2))
311
- near = dist - 1
312
- far = dist + 1
313
-
314
- new_near_fars.append([0.95 * near, 1.05 * far])
315
- new_depths_h.append(depth * scale_factor)
316
-
317
- # print(new_near_fars)
318
- imgs = torch.stack(imgs).float()
319
- depths_h = np.stack(new_depths_h)
320
- masks_h = np.stack(masks_h)
321
-
322
- affine_mats = np.stack(new_affine_mats)
323
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
324
- new_near_fars)
325
-
326
- if self.split == 'train':
327
- start_idx = 0
328
- else:
329
- start_idx = 1
330
-
331
-
332
-
333
- target_w2cs = []
334
- target_intrinsics = []
335
- new_target_w2cs = []
336
- for i_idx in range(8):
337
- target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
338
- target_intrinsics.append(self.all_intrinsics[i_idx])
339
-
340
- for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
341
-
342
- P = intrinsic @ extrinsic @ scale_mat
343
- P = P[:3, :4]
344
- # - should use load_K_Rt_from_P() to obtain c2w
345
- c2w = load_K_Rt_from_P(None, P)[1]
346
- w2c = np.linalg.inv(c2w)
347
- new_target_w2cs.append(w2c)
348
- target_w2cs = np.stack(new_target_w2cs)
349
-
350
-
351
-
352
- view_ids = [idx] + list(src_views)
353
- sample['origin_idx'] = origin_idx
354
- sample['images'] = imgs # (V, 3, H, W)
355
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
356
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
357
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
358
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
359
- sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
360
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
361
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
362
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
363
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
364
-
365
- # sample['light_idx'] = torch.tensor(light_idx)
366
- sample['scan'] = shape_name
367
-
368
- sample['scale_factor'] = torch.tensor(scale_factor)
369
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
370
- sample['render_img_idx'] = torch.tensor(image_perm)
371
- sample['partial_vol_origin'] = self.partial_vol_origin
372
- sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
373
- # print("meta: ", sample['meta'])
374
-
375
- # - image to render
376
- sample['query_image'] = sample['images'][0]
377
- sample['query_c2w'] = sample['c2ws'][0]
378
- sample['query_w2c'] = sample['w2cs'][0]
379
- sample['query_intrinsic'] = sample['intrinsics'][0]
380
- sample['query_depth'] = sample['depths_h'][0]
381
- sample['query_mask'] = sample['masks_h'][0]
382
- sample['query_near_far'] = sample['near_fars'][0]
383
-
384
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
385
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
386
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
387
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
388
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
389
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
390
- sample['view_ids'] = sample['view_ids'][start_idx:]
391
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
392
-
393
- sample['scale_mat'] = torch.from_numpy(scale_mat)
394
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
395
-
396
- # - generate rays
397
- if ('val' in self.split) or ('test' in self.split):
398
- sample_rays = gen_rays_from_single_image(
399
- img_wh[1], img_wh[0],
400
- sample['query_image'],
401
- sample['query_intrinsic'],
402
- sample['query_c2w'],
403
- depth=sample['query_depth'],
404
- mask=sample['query_mask'] if self.clean_image else None)
405
- else:
406
- sample_rays = gen_random_rays_from_single_image(
407
- img_wh[1], img_wh[0],
408
- self.N_rays,
409
- sample['query_image'],
410
- sample['query_intrinsic'],
411
- sample['query_c2w'],
412
- depth=sample['query_depth'],
413
- mask=sample['query_mask'] if self.clean_image else None,
414
- dilated_mask=mask_dilated,
415
- importance_sample=self.importance_sample)
416
-
417
-
418
- sample['rays'] = sample_rays
419
-
420
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_temp.py DELETED
@@ -1,417 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
-
18
-
19
- def get_ray_directions(H, W, focal, center=None):
20
- """
21
- Get ray directions for all pixels in camera coordinate.
22
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
- ray-tracing-generating-camera-rays/standard-coordinate-systems
24
- Inputs:
25
- H, W, focal: image height, width and focal length
26
- Outputs:
27
- directions: (H, W, 3), the direction of the rays in camera coordinate
28
- """
29
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
-
31
- i, j = grid.unbind(-1)
32
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
- # see https://github.com/bmild/nerf/issues/24
34
- cent = center if center is not None else [W / 2, H / 2]
35
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
-
37
- return directions
38
-
39
- def load_K_Rt_from_P(filename, P=None):
40
- if P is None:
41
- lines = open(filename).read().splitlines()
42
- if len(lines) == 4:
43
- lines = lines[1:]
44
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
- P = np.asarray(lines).astype(np.float32).squeeze()
46
-
47
- out = cv2.decomposeProjectionMatrix(P)
48
- K = out[0]
49
- R = out[1]
50
- t = out[2]
51
-
52
- K = K / K[2, 2]
53
- intrinsics = np.eye(4)
54
- intrinsics[:3, :3] = K
55
-
56
- pose = np.eye(4, dtype=np.float32)
57
- pose[:3, :3] = R.transpose() # ? why need transpose here
58
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
-
60
- return intrinsics, pose # ! return cam2world matrix here
61
-
62
-
63
- # ! load one ref-image with multiple src-images in camera coordinate system
64
- class BlenderPerView(Dataset):
65
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
- split_filepath=None, pair_filepath=None,
67
- N_rays=512,
68
- vol_dims=[128, 128, 128], batch_size=1,
69
- clean_image=False, importance_sample=False, test_ref_views=[],
70
- specific_dataset_name = 'GSO'
71
- ):
72
-
73
- # print("root_dir: ", root_dir)
74
- self.root_dir = root_dir
75
- self.split = split
76
- # self.specific_dataset_name = 'Realfusion'
77
- # self.specific_dataset_name = 'GSO'
78
- # self.specific_dataset_name = 'Objaverse'
79
- self.specific_dataset_name = 'Objaverse_archived'
80
-
81
- # self.specific_dataset_name = specific_dataset_name
82
- self.n_views = n_views
83
- self.N_rays = N_rays
84
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
-
86
- self.clean_image = clean_image
87
- self.importance_sample = importance_sample
88
- self.test_ref_views = test_ref_views # used for testing
89
- self.scale_factor = 1.0
90
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
- assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
- # find all subfolders
93
- main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
- self.shape_list = os.listdir(main_folder)
95
- self.shape_list.sort()
96
-
97
- # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
98
-
99
-
100
- self.lvis_paths = []
101
- for shape_name in self.shape_list:
102
- self.lvis_paths.append(os.path.join(main_folder, shape_name))
103
-
104
- # print("lvis_paths: ", self.lvis_paths)
105
-
106
- if img_wh is not None:
107
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
108
- 'img_wh must both be multiples of 32!'
109
-
110
-
111
- # * bounding box for rendering
112
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
113
- self.bbox_max = np.array([1.0, 1.0, 1.0])
114
-
115
- # - used for cost volume regularization
116
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
117
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
118
-
119
-
120
- def define_transforms(self):
121
- self.transform = T.Compose([T.ToTensor()])
122
-
123
-
124
-
125
- def load_cam_info(self):
126
- for vid, img_id in enumerate(self.img_ids):
127
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
128
- self.all_intrinsics.append(intrinsic)
129
- self.all_extrinsics.append(extrinsic)
130
- self.all_near_fars.append(near_far)
131
-
132
- def read_depth(self, filename):
133
- pass
134
-
135
- def read_mask(self, filename):
136
- mask_h = cv2.imread(filename, 0)
137
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
138
- interpolation=cv2.INTER_NEAREST)
139
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
140
- interpolation=cv2.INTER_NEAREST)
141
-
142
- mask[mask > 0] = 1 # the masks stored in png are not binary
143
- mask_h[mask_h > 0] = 1
144
-
145
- return mask, mask_h
146
-
147
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
148
-
149
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
150
-
151
- radius = radius * factor
152
- scale_mat = np.diag([radius, radius, radius, 1.0])
153
- scale_mat[:3, 3] = center.cpu().numpy()
154
- scale_mat = scale_mat.astype(np.float32)
155
-
156
- return scale_mat, 1. / radius.cpu().numpy()
157
-
158
- def __len__(self):
159
- # return 8*len(self.lvis_paths)
160
- return len(self.lvis_paths)
161
-
162
-
163
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
164
- pass
165
-
166
-
167
- def __getitem__(self, idx):
168
- sample = {}
169
- idx = idx * 8 # to be deleted
170
- origin_idx = idx
171
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
172
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
173
-
174
- folder_path = self.lvis_paths[idx//8]
175
- idx = idx % 8 # [0, 7]
176
-
177
- # last subdir name
178
- shape_name = os.path.split(folder_path)[-1]
179
-
180
- pose_json_path = os.path.join('/objaverse-processed/zero12345_img/zero12345_narrow_pose.json')
181
- with open(pose_json_path, 'r') as f:
182
- meta = json.load(f)
183
-
184
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
185
- self.img_wh = (256, 256)
186
- self.input_poses = np.array(list(meta["c2ws"].values()))
187
- intrinsic = np.eye(4)
188
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
189
- self.intrinsic = intrinsic
190
- self.near_far = np.array(meta["near_far"])
191
- self.near_far[1] = 1.8
192
- self.define_transforms()
193
- self.blender2opencv = np.array(
194
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
195
- )
196
-
197
- self.c2ws = []
198
- self.w2cs = []
199
- self.near_fars = []
200
- # self.root_dir = root_dir
201
- for image_dix, img_id in enumerate(self.img_ids):
202
- pose = self.input_poses[image_dix]
203
- c2w = pose @ self.blender2opencv
204
- self.c2ws.append(c2w)
205
- self.w2cs.append(np.linalg.inv(c2w))
206
- self.near_fars.append(self.near_far)
207
- self.c2ws = np.stack(self.c2ws, axis=0)
208
- self.w2cs = np.stack(self.w2cs, axis=0)
209
-
210
-
211
- self.all_intrinsics = [] # the cam info of the whole scene
212
- self.all_extrinsics = []
213
- self.all_near_fars = []
214
- self.load_cam_info()
215
-
216
-
217
- # target view
218
- c2w = self.c2ws[idx]
219
- w2c = np.linalg.inv(c2w)
220
- w2c_ref = w2c
221
- w2c_ref_inv = np.linalg.inv(w2c_ref)
222
-
223
- w2cs.append(w2c @ w2c_ref_inv)
224
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
225
-
226
- # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
227
- img_filename = os.path.join(folder_path, 'stage1_8', f'{idx}.png')
228
-
229
- img = Image.open(img_filename)
230
- img = self.transform(img) # (4, h, w)
231
-
232
-
233
- if img.shape[0] == 4:
234
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
235
- imgs += [img]
236
-
237
-
238
- depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
239
- depth_h = depth_h.fill_(-1.0)
240
- mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
241
-
242
-
243
- depths_h.append(depth_h)
244
- masks_h.append(mask_h)
245
-
246
- intrinsic = self.intrinsic
247
- intrinsics.append(intrinsic)
248
-
249
-
250
- near_fars.append(self.near_fars[idx])
251
- image_perm = 0 # only supervised on reference view
252
-
253
- mask_dilated = None
254
-
255
-
256
- src_views = range(8, 8 + 8 * 4)
257
-
258
- for vid in src_views:
259
-
260
- # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
261
- img_filename = os.path.join(folder_path, 'stage2_8', f'{(vid-8)//4}_{(vid-8)%4}.png')
262
- img = Image.open(img_filename)
263
- img_wh = self.img_wh
264
-
265
- img = self.transform(img)
266
- if img.shape[0] == 4:
267
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
268
-
269
- imgs += [img]
270
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
271
- depths_h.append(depth_h)
272
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
273
-
274
- near_fars.append(self.all_near_fars[vid])
275
- intrinsics.append(self.all_intrinsics[vid])
276
-
277
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
278
-
279
-
280
- # ! estimate scale_mat
281
- scale_mat, scale_factor = self.cal_scale_mat(
282
- img_hw=[img_wh[1], img_wh[0]],
283
- intrinsics=intrinsics, extrinsics=w2cs,
284
- near_fars=near_fars, factor=1.1
285
- )
286
-
287
-
288
- new_near_fars = []
289
- new_w2cs = []
290
- new_c2ws = []
291
- new_affine_mats = []
292
- new_depths_h = []
293
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
294
-
295
- P = intrinsic @ extrinsic @ scale_mat
296
- P = P[:3, :4]
297
- # - should use load_K_Rt_from_P() to obtain c2w
298
- c2w = load_K_Rt_from_P(None, P)[1]
299
- w2c = np.linalg.inv(c2w)
300
- new_w2cs.append(w2c)
301
- new_c2ws.append(c2w)
302
- affine_mat = np.eye(4)
303
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
304
- new_affine_mats.append(affine_mat)
305
-
306
- camera_o = c2w[:3, 3]
307
- dist = np.sqrt(np.sum(camera_o ** 2))
308
- near = dist - 1
309
- far = dist + 1
310
-
311
- new_near_fars.append([0.95 * near, 1.05 * far])
312
- new_depths_h.append(depth * scale_factor)
313
-
314
- # print(new_near_fars)
315
- imgs = torch.stack(imgs).float()
316
- depths_h = np.stack(new_depths_h)
317
- masks_h = np.stack(masks_h)
318
-
319
- affine_mats = np.stack(new_affine_mats)
320
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
321
- new_near_fars)
322
-
323
- if self.split == 'train':
324
- start_idx = 0
325
- else:
326
- start_idx = 1
327
-
328
-
329
-
330
- target_w2cs = []
331
- target_intrinsics = []
332
- new_target_w2cs = []
333
- for i_idx in range(8):
334
- target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
335
- target_intrinsics.append(self.all_intrinsics[i_idx])
336
-
337
- for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
338
-
339
- P = intrinsic @ extrinsic @ scale_mat
340
- P = P[:3, :4]
341
- # - should use load_K_Rt_from_P() to obtain c2w
342
- c2w = load_K_Rt_from_P(None, P)[1]
343
- w2c = np.linalg.inv(c2w)
344
- new_target_w2cs.append(w2c)
345
- target_w2cs = np.stack(new_target_w2cs)
346
-
347
-
348
-
349
- view_ids = [idx] + list(src_views)
350
- sample['origin_idx'] = origin_idx
351
- sample['images'] = imgs # (V, 3, H, W)
352
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
353
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
354
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
355
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
356
- sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
357
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
358
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
359
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
360
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
361
-
362
- # sample['light_idx'] = torch.tensor(light_idx)
363
- sample['scan'] = shape_name
364
-
365
- sample['scale_factor'] = torch.tensor(scale_factor)
366
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
367
- sample['render_img_idx'] = torch.tensor(image_perm)
368
- sample['partial_vol_origin'] = self.partial_vol_origin
369
- sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
370
- # print("meta: ", sample['meta'])
371
-
372
- # - image to render
373
- sample['query_image'] = sample['images'][0]
374
- sample['query_c2w'] = sample['c2ws'][0]
375
- sample['query_w2c'] = sample['w2cs'][0]
376
- sample['query_intrinsic'] = sample['intrinsics'][0]
377
- sample['query_depth'] = sample['depths_h'][0]
378
- sample['query_mask'] = sample['masks_h'][0]
379
- sample['query_near_far'] = sample['near_fars'][0]
380
-
381
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
382
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
383
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
384
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
385
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
386
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
387
- sample['view_ids'] = sample['view_ids'][start_idx:]
388
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
389
-
390
- sample['scale_mat'] = torch.from_numpy(scale_mat)
391
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
392
-
393
- # - generate rays
394
- if ('val' in self.split) or ('test' in self.split):
395
- sample_rays = gen_rays_from_single_image(
396
- img_wh[1], img_wh[0],
397
- sample['query_image'],
398
- sample['query_intrinsic'],
399
- sample['query_c2w'],
400
- depth=sample['query_depth'],
401
- mask=sample['query_mask'] if self.clean_image else None)
402
- else:
403
- sample_rays = gen_random_rays_from_single_image(
404
- img_wh[1], img_wh[0],
405
- self.N_rays,
406
- sample['query_image'],
407
- sample['query_intrinsic'],
408
- sample['query_c2w'],
409
- depth=sample['query_depth'],
410
- mask=sample['query_mask'] if self.clean_image else None,
411
- dilated_mask=mask_dilated,
412
- importance_sample=self.importance_sample)
413
-
414
-
415
- sample['rays'] = sample_rays
416
-
417
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all_no_depth.py DELETED
@@ -1,388 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
- self.near_far[1] = 1.8
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- pass
155
-
156
- def read_mask(self, filename):
157
- mask_h = cv2.imread(filename, 0)
158
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- mask[mask > 0] = 1 # the masks stored in png are not binary
164
- mask_h[mask_h > 0] = 1
165
-
166
- return mask, mask_h
167
-
168
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
-
170
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
- # print("center", center)
172
- # print("radius", radius)
173
- # print("bounds", bounds)
174
- # import ipdb; ipdb.set_trace()
175
- radius = radius * factor
176
- scale_mat = np.diag([radius, radius, radius, 1.0])
177
- scale_mat[:3, 3] = center.cpu().numpy()
178
- scale_mat = scale_mat.astype(np.float32)
179
-
180
- return scale_mat, 1. / radius.cpu().numpy()
181
-
182
- def __len__(self):
183
- return 8*len(self.lvis_paths)
184
-
185
-
186
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
- pass
188
-
189
-
190
- def __getitem__(self, idx):
191
- sample = {}
192
- origin_idx = idx
193
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
194
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
195
-
196
-
197
- folder_uid_dict = self.lvis_paths[idx//8]
198
- idx = idx % 8 # [0, 7]
199
- folder_id = folder_uid_dict['folder_id']
200
- uid = folder_uid_dict['uid']
201
-
202
-
203
- # target view
204
- c2w = self.c2ws[idx]
205
- w2c = np.linalg.inv(c2w)
206
- w2c_ref = w2c
207
- w2c_ref_inv = np.linalg.inv(w2c_ref)
208
-
209
- w2cs.append(w2c @ w2c_ref_inv)
210
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
211
-
212
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
213
-
214
- depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
215
-
216
-
217
- img = Image.open(img_filename)
218
-
219
- img = self.transform(img) # (4, h, w)
220
-
221
-
222
- if img.shape[0] == 4:
223
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
224
- imgs += [img]
225
-
226
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
227
- mask_h = depth_h > 0
228
- # print("valid pixels", np.sum(mask_h))
229
- # directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
230
- # surface_points = directions * depth_h[..., None] # [H, W, 3]
231
- # distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
232
- # depth_h = distance
233
-
234
- depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
235
- depth_h = depth_h.fill_(-1.0)
236
-
237
- depths_h.append(depth_h)
238
- masks_h.append(mask_h)
239
-
240
- intrinsic = self.intrinsic
241
- intrinsics.append(intrinsic)
242
-
243
-
244
- near_fars.append(self.near_fars[idx])
245
- image_perm = 0 # only supervised on reference view
246
-
247
- mask_dilated = None
248
-
249
- # src_views = range(8+idx*4, 8+(idx+1)*4)
250
- src_views = range(8, 8 + 8 * 4)
251
-
252
- for vid in src_views:
253
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
254
-
255
- img = Image.open(img_filename)
256
- img_wh = self.img_wh
257
-
258
- img = self.transform(img)
259
- if img.shape[0] == 4:
260
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
261
-
262
- imgs += [img]
263
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
264
- depths_h.append(depth_h)
265
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
266
-
267
- near_fars.append(self.all_near_fars[vid])
268
- intrinsics.append(self.all_intrinsics[vid])
269
-
270
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
271
-
272
-
273
- # ! estimate scale_mat
274
- scale_mat, scale_factor = self.cal_scale_mat(
275
- img_hw=[img_wh[1], img_wh[0]],
276
- intrinsics=intrinsics, extrinsics=w2cs,
277
- near_fars=near_fars, factor=1.1
278
- )
279
-
280
-
281
- new_near_fars = []
282
- new_w2cs = []
283
- new_c2ws = []
284
- new_affine_mats = []
285
- new_depths_h = []
286
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
287
-
288
- P = intrinsic @ extrinsic @ scale_mat
289
- P = P[:3, :4]
290
- # - should use load_K_Rt_from_P() to obtain c2w
291
- c2w = load_K_Rt_from_P(None, P)[1]
292
- w2c = np.linalg.inv(c2w)
293
- new_w2cs.append(w2c)
294
- new_c2ws.append(c2w)
295
- affine_mat = np.eye(4)
296
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
297
- new_affine_mats.append(affine_mat)
298
-
299
- camera_o = c2w[:3, 3]
300
- dist = np.sqrt(np.sum(camera_o ** 2))
301
- near = dist - 1
302
- far = dist + 1
303
-
304
- new_near_fars.append([0.95 * near, 1.05 * far])
305
- new_depths_h.append(depth * scale_factor)
306
-
307
- # print(new_near_fars)
308
- imgs = torch.stack(imgs).float()
309
- depths_h = np.stack(new_depths_h)
310
- masks_h = np.stack(masks_h)
311
-
312
- affine_mats = np.stack(new_affine_mats)
313
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
314
- new_near_fars)
315
-
316
- if self.split == 'train':
317
- start_idx = 0
318
- else:
319
- start_idx = 1
320
-
321
- view_ids = [idx] + list(src_views)
322
- sample['origin_idx'] = origin_idx
323
- sample['images'] = imgs # (V, 3, H, W)
324
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
325
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
326
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
327
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
328
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
329
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
330
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
331
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
332
-
333
- # sample['light_idx'] = torch.tensor(light_idx)
334
- sample['scan'] = folder_id
335
-
336
- sample['scale_factor'] = torch.tensor(scale_factor)
337
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
338
- sample['render_img_idx'] = torch.tensor(image_perm)
339
- sample['partial_vol_origin'] = self.partial_vol_origin
340
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
341
-
342
-
343
- # - image to render
344
- sample['query_image'] = sample['images'][0]
345
- sample['query_c2w'] = sample['c2ws'][0]
346
- sample['query_w2c'] = sample['w2cs'][0]
347
- sample['query_intrinsic'] = sample['intrinsics'][0]
348
- sample['query_depth'] = sample['depths_h'][0]
349
- sample['query_mask'] = sample['masks_h'][0]
350
- sample['query_near_far'] = sample['near_fars'][0]
351
-
352
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
353
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
354
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
355
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
356
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
357
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
358
- sample['view_ids'] = sample['view_ids'][start_idx:]
359
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
360
-
361
- sample['scale_mat'] = torch.from_numpy(scale_mat)
362
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
363
-
364
- # - generate rays
365
- if ('val' in self.split) or ('test' in self.split):
366
- sample_rays = gen_rays_from_single_image(
367
- img_wh[1], img_wh[0],
368
- sample['query_image'],
369
- sample['query_intrinsic'],
370
- sample['query_c2w'],
371
- depth=sample['query_depth'],
372
- mask=sample['query_mask'] if self.clean_image else None)
373
- else:
374
- sample_rays = gen_random_rays_from_single_image(
375
- img_wh[1], img_wh[0],
376
- self.N_rays,
377
- sample['query_image'],
378
- sample['query_intrinsic'],
379
- sample['query_c2w'],
380
- depth=sample['query_depth'],
381
- mask=sample['query_mask'] if self.clean_image else None,
382
- dilated_mask=mask_dilated,
383
- importance_sample=self.importance_sample)
384
-
385
-
386
- sample['rays'] = sample_rays
387
-
388
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4.py DELETED
@@ -1,389 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
- self.near_far[1] = 1.8
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- pass
155
-
156
- def read_mask(self, filename):
157
- mask_h = cv2.imread(filename, 0)
158
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- mask[mask > 0] = 1 # the masks stored in png are not binary
164
- mask_h[mask_h > 0] = 1
165
-
166
- return mask, mask_h
167
-
168
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
-
170
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
- # print("center", center)
172
- # print("radius", radius)
173
- # print("bounds", bounds)
174
- # import ipdb; ipdb.set_trace()
175
- radius = radius * factor
176
- scale_mat = np.diag([radius, radius, radius, 1.0])
177
- scale_mat[:3, 3] = center.cpu().numpy()
178
- scale_mat = scale_mat.astype(np.float32)
179
-
180
- return scale_mat, 1. / radius.cpu().numpy()
181
-
182
- def __len__(self):
183
- return 4*len(self.lvis_paths)
184
-
185
-
186
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
- pass
188
-
189
-
190
- def __getitem__(self, idx):
191
- idx = idx * 2
192
- sample = {}
193
- origin_idx = idx
194
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
195
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
196
-
197
-
198
- folder_uid_dict = self.lvis_paths[idx//8]
199
- idx = idx % 8 # [0, 7]
200
- folder_id = folder_uid_dict['folder_id']
201
- uid = folder_uid_dict['uid']
202
-
203
-
204
- # target view
205
- c2w = self.c2ws[idx]
206
- w2c = np.linalg.inv(c2w)
207
- w2c_ref = w2c
208
- w2c_ref_inv = np.linalg.inv(w2c_ref)
209
-
210
- w2cs.append(w2c @ w2c_ref_inv)
211
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
212
-
213
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
214
-
215
- depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
216
-
217
-
218
- img = Image.open(img_filename)
219
-
220
- img = self.transform(img) # (4, h, w)
221
-
222
-
223
- if img.shape[0] == 4:
224
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
225
- imgs += [img]
226
-
227
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
228
- mask_h = depth_h > 0
229
- # print("valid pixels", np.sum(mask_h))
230
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
231
- surface_points = directions * depth_h[..., None] # [H, W, 3]
232
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
233
- depth_h = distance
234
-
235
-
236
- depths_h.append(depth_h)
237
- masks_h.append(mask_h)
238
-
239
- intrinsic = self.intrinsic
240
- intrinsics.append(intrinsic)
241
-
242
-
243
- near_fars.append(self.near_fars[idx])
244
- image_perm = 0 # only supervised on reference view
245
-
246
- mask_dilated = None
247
-
248
- # src_views = range(8+idx*4, 8+(idx+1)*4)
249
- src_views = range(8, 8 + 8 * 4)
250
-
251
- for vid in src_views:
252
- if (vid // 4) % 2 != 0:
253
- continue
254
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
255
-
256
- img = Image.open(img_filename)
257
- img_wh = self.img_wh
258
-
259
- img = self.transform(img)
260
- if img.shape[0] == 4:
261
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
262
-
263
- imgs += [img]
264
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
265
- depths_h.append(depth_h)
266
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
267
-
268
- near_fars.append(self.all_near_fars[vid])
269
- intrinsics.append(self.all_intrinsics[vid])
270
-
271
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
272
-
273
- # print("len(imgs)", len(imgs))
274
- # ! estimate scale_mat
275
- scale_mat, scale_factor = self.cal_scale_mat(
276
- img_hw=[img_wh[1], img_wh[0]],
277
- intrinsics=intrinsics, extrinsics=w2cs,
278
- near_fars=near_fars, factor=1.1
279
- )
280
-
281
-
282
- new_near_fars = []
283
- new_w2cs = []
284
- new_c2ws = []
285
- new_affine_mats = []
286
- new_depths_h = []
287
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
288
-
289
- P = intrinsic @ extrinsic @ scale_mat
290
- P = P[:3, :4]
291
- # - should use load_K_Rt_from_P() to obtain c2w
292
- c2w = load_K_Rt_from_P(None, P)[1]
293
- w2c = np.linalg.inv(c2w)
294
- new_w2cs.append(w2c)
295
- new_c2ws.append(c2w)
296
- affine_mat = np.eye(4)
297
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
298
- new_affine_mats.append(affine_mat)
299
-
300
- camera_o = c2w[:3, 3]
301
- dist = np.sqrt(np.sum(camera_o ** 2))
302
- near = dist - 1
303
- far = dist + 1
304
-
305
- new_near_fars.append([0.95 * near, 1.05 * far])
306
- new_depths_h.append(depth * scale_factor)
307
-
308
- # print(new_near_fars)
309
- imgs = torch.stack(imgs).float()
310
- depths_h = np.stack(new_depths_h)
311
- masks_h = np.stack(masks_h)
312
-
313
- affine_mats = np.stack(new_affine_mats)
314
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
315
- new_near_fars)
316
-
317
- if self.split == 'train':
318
- start_idx = 0
319
- else:
320
- start_idx = 1
321
-
322
- view_ids = [idx] + list(src_views)
323
- sample['origin_idx'] = origin_idx
324
- sample['images'] = imgs # (V, 3, H, W)
325
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
326
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
327
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
328
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
329
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
330
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
331
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
332
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
333
-
334
- # sample['light_idx'] = torch.tensor(light_idx)
335
- sample['scan'] = folder_id
336
-
337
- sample['scale_factor'] = torch.tensor(scale_factor)
338
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
339
- sample['render_img_idx'] = torch.tensor(image_perm)
340
- sample['partial_vol_origin'] = self.partial_vol_origin
341
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
342
-
343
-
344
- # - image to render
345
- sample['query_image'] = sample['images'][0]
346
- sample['query_c2w'] = sample['c2ws'][0]
347
- sample['query_w2c'] = sample['w2cs'][0]
348
- sample['query_intrinsic'] = sample['intrinsics'][0]
349
- sample['query_depth'] = sample['depths_h'][0]
350
- sample['query_mask'] = sample['masks_h'][0]
351
- sample['query_near_far'] = sample['near_fars'][0]
352
-
353
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
354
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
355
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
356
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
357
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
358
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
359
- sample['view_ids'] = sample['view_ids'][start_idx:]
360
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
361
-
362
- sample['scale_mat'] = torch.from_numpy(scale_mat)
363
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
364
-
365
- # - generate rays
366
- if ('val' in self.split) or ('test' in self.split):
367
- sample_rays = gen_rays_from_single_image(
368
- img_wh[1], img_wh[0],
369
- sample['query_image'],
370
- sample['query_intrinsic'],
371
- sample['query_c2w'],
372
- depth=sample['query_depth'],
373
- mask=sample['query_mask'] if self.clean_image else None)
374
- else:
375
- sample_rays = gen_random_rays_from_single_image(
376
- img_wh[1], img_wh[0],
377
- self.N_rays,
378
- sample['query_image'],
379
- sample['query_intrinsic'],
380
- sample['query_c2w'],
381
- depth=sample['query_depth'],
382
- mask=sample['query_mask'] if self.clean_image else None,
383
- dilated_mask=mask_dilated,
384
- importance_sample=self.importance_sample)
385
-
386
-
387
- sample['rays'] = sample_rays
388
-
389
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4_and_4.py DELETED
@@ -1,395 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- def load_K_Rt_from_P(filename, P=None):
38
- if P is None:
39
- lines = open(filename).read().splitlines()
40
- if len(lines) == 4:
41
- lines = lines[1:]
42
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
- P = np.asarray(lines).astype(np.float32).squeeze()
44
-
45
- out = cv2.decomposeProjectionMatrix(P)
46
- K = out[0]
47
- R = out[1]
48
- t = out[2]
49
-
50
- K = K / K[2, 2]
51
- intrinsics = np.eye(4)
52
- intrinsics[:3, :3] = K
53
-
54
- pose = np.eye(4, dtype=np.float32)
55
- pose[:3, :3] = R.transpose() # ? why need transpose here
56
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
-
58
- return intrinsics, pose # ! return cam2world matrix here
59
-
60
-
61
- # ! load one ref-image with multiple src-images in camera coordinate system
62
- class BlenderPerView(Dataset):
63
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
- split_filepath=None, pair_filepath=None,
65
- N_rays=512,
66
- vol_dims=[128, 128, 128], batch_size=1,
67
- clean_image=False, importance_sample=False, test_ref_views=[]):
68
-
69
- # print("root_dir: ", root_dir)
70
- self.root_dir = root_dir
71
- self.split = split
72
-
73
- self.n_views = n_views
74
- self.N_rays = N_rays
75
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
-
77
- self.clean_image = clean_image
78
- self.importance_sample = importance_sample
79
- self.test_ref_views = test_ref_views # used for testing
80
- self.scale_factor = 1.0
81
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
-
83
- lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
- with open(lvis_json_path, 'r') as f:
85
- lvis_paths = json.load(f)
86
- if self.split == 'train':
87
- self.lvis_paths = lvis_paths['train']
88
- else:
89
- self.lvis_paths = lvis_paths['val']
90
- if img_wh is not None:
91
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
- 'img_wh must both be multiples of 32!'
93
-
94
-
95
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
- with open(pose_json_path, 'r') as f:
97
- meta = json.load(f)
98
-
99
- self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
- self.img_wh = (256, 256)
101
- self.input_poses = np.array(list(meta["c2ws"].values()))
102
- intrinsic = np.eye(4)
103
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
- self.intrinsic = intrinsic
105
- self.near_far = np.array(meta["near_far"])
106
- self.near_far[1] = 1.8
107
- self.define_transforms()
108
- self.blender2opencv = np.array(
109
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
- )
111
-
112
-
113
- self.c2ws = []
114
- self.w2cs = []
115
- self.near_fars = []
116
- # self.root_dir = root_dir
117
- for idx, img_id in enumerate(self.img_ids):
118
- pose = self.input_poses[idx]
119
- c2w = pose @ self.blender2opencv
120
- self.c2ws.append(c2w)
121
- self.w2cs.append(np.linalg.inv(c2w))
122
- self.near_fars.append(self.near_far)
123
- self.c2ws = np.stack(self.c2ws, axis=0)
124
- self.w2cs = np.stack(self.w2cs, axis=0)
125
-
126
-
127
- self.all_intrinsics = [] # the cam info of the whole scene
128
- self.all_extrinsics = []
129
- self.all_near_fars = []
130
- self.load_cam_info()
131
-
132
- # * bounding box for rendering
133
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
- self.bbox_max = np.array([1.0, 1.0, 1.0])
135
-
136
- # - used for cost volume regularization
137
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
-
140
-
141
- def define_transforms(self):
142
- self.transform = T.Compose([T.ToTensor()])
143
-
144
-
145
-
146
- def load_cam_info(self):
147
- for vid, img_id in enumerate(self.img_ids):
148
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
- self.all_intrinsics.append(intrinsic)
150
- self.all_extrinsics.append(extrinsic)
151
- self.all_near_fars.append(near_far)
152
-
153
- def read_depth(self, filename):
154
- pass
155
-
156
- def read_mask(self, filename):
157
- mask_h = cv2.imread(filename, 0)
158
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
- interpolation=cv2.INTER_NEAREST)
160
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
- interpolation=cv2.INTER_NEAREST)
162
-
163
- mask[mask > 0] = 1 # the masks stored in png are not binary
164
- mask_h[mask_h > 0] = 1
165
-
166
- return mask, mask_h
167
-
168
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
-
170
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
- # print("center", center)
172
- # print("radius", radius)
173
- # print("bounds", bounds)
174
- # import ipdb; ipdb.set_trace()
175
- radius = radius * factor
176
- scale_mat = np.diag([radius, radius, radius, 1.0])
177
- scale_mat[:3, 3] = center.cpu().numpy()
178
- scale_mat = scale_mat.astype(np.float32)
179
-
180
- return scale_mat, 1. / radius.cpu().numpy()
181
-
182
- def __len__(self):
183
- return 8*len(self.lvis_paths)
184
-
185
-
186
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
- pass
188
-
189
-
190
- def __getitem__(self, idx):
191
- idx = idx
192
- sample = {}
193
- origin_idx = idx
194
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
195
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
196
-
197
-
198
- folder_uid_dict = self.lvis_paths[idx//8]
199
- idx = idx % 8 # [0, 7]
200
- folder_id = folder_uid_dict['folder_id']
201
- uid = folder_uid_dict['uid']
202
-
203
-
204
- # target view
205
- c2w = self.c2ws[idx]
206
- w2c = np.linalg.inv(c2w)
207
- w2c_ref = w2c
208
- w2c_ref_inv = np.linalg.inv(w2c_ref)
209
-
210
- w2cs.append(w2c @ w2c_ref_inv)
211
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
212
-
213
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
214
-
215
- depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
216
-
217
-
218
- img = Image.open(img_filename)
219
-
220
- img = self.transform(img) # (4, h, w)
221
-
222
-
223
- if img.shape[0] == 4:
224
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
225
- imgs += [img]
226
-
227
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
228
- mask_h = depth_h > 0
229
- # print("valid pixels", np.sum(mask_h))
230
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
231
- surface_points = directions * depth_h[..., None] # [H, W, 3]
232
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
233
- depth_h = distance
234
-
235
-
236
- depths_h.append(depth_h)
237
- masks_h.append(mask_h)
238
-
239
- intrinsic = self.intrinsic
240
- intrinsics.append(intrinsic)
241
-
242
-
243
- near_fars.append(self.near_fars[idx])
244
- image_perm = 0 # only supervised on reference view
245
-
246
- mask_dilated = None
247
-
248
- # src_views = range(8+idx*4, 8+(idx+1)*4)
249
-
250
- src_views = range(8, 8 + 8 * 4)
251
-
252
- vid_list = []
253
- for vid in src_views:
254
- if (vid // 4) % 2 != idx % 2:
255
- continue
256
- vid_list.append(vid)
257
- img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
258
-
259
- img = Image.open(img_filename)
260
- img_wh = self.img_wh
261
-
262
- img = self.transform(img)
263
- if img.shape[0] == 4:
264
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
265
-
266
- imgs += [img]
267
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
268
- depths_h.append(depth_h)
269
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
270
-
271
- near_fars.append(self.all_near_fars[vid])
272
- intrinsics.append(self.all_intrinsics[vid])
273
-
274
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
275
-
276
-
277
- # print("idx:", idx)
278
- # print("len(imgs)", len(imgs))
279
- # print("vid_list", vid_list)
280
- # ! estimate scale_mat
281
- scale_mat, scale_factor = self.cal_scale_mat(
282
- img_hw=[img_wh[1], img_wh[0]],
283
- intrinsics=intrinsics, extrinsics=w2cs,
284
- near_fars=near_fars, factor=1.1
285
- )
286
-
287
-
288
- new_near_fars = []
289
- new_w2cs = []
290
- new_c2ws = []
291
- new_affine_mats = []
292
- new_depths_h = []
293
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
294
-
295
- P = intrinsic @ extrinsic @ scale_mat
296
- P = P[:3, :4]
297
- # - should use load_K_Rt_from_P() to obtain c2w
298
- c2w = load_K_Rt_from_P(None, P)[1]
299
- w2c = np.linalg.inv(c2w)
300
- new_w2cs.append(w2c)
301
- new_c2ws.append(c2w)
302
- affine_mat = np.eye(4)
303
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
304
- new_affine_mats.append(affine_mat)
305
-
306
- camera_o = c2w[:3, 3]
307
- dist = np.sqrt(np.sum(camera_o ** 2))
308
- near = dist - 1
309
- far = dist + 1
310
-
311
- new_near_fars.append([0.95 * near, 1.05 * far])
312
- new_depths_h.append(depth * scale_factor)
313
-
314
- # print(new_near_fars)
315
- imgs = torch.stack(imgs).float()
316
- depths_h = np.stack(new_depths_h)
317
- masks_h = np.stack(masks_h)
318
-
319
- affine_mats = np.stack(new_affine_mats)
320
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
321
- new_near_fars)
322
-
323
- if self.split == 'train':
324
- start_idx = 0
325
- else:
326
- start_idx = 1
327
-
328
- view_ids = [idx] + list(src_views)
329
- sample['origin_idx'] = origin_idx
330
- sample['images'] = imgs # (V, 3, H, W)
331
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
332
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
333
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
334
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
335
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
336
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
337
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
338
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
339
-
340
- # sample['light_idx'] = torch.tensor(light_idx)
341
- sample['scan'] = folder_id
342
-
343
- sample['scale_factor'] = torch.tensor(scale_factor)
344
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
345
- sample['render_img_idx'] = torch.tensor(image_perm)
346
- sample['partial_vol_origin'] = self.partial_vol_origin
347
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
348
-
349
-
350
- # - image to render
351
- sample['query_image'] = sample['images'][0]
352
- sample['query_c2w'] = sample['c2ws'][0]
353
- sample['query_w2c'] = sample['w2cs'][0]
354
- sample['query_intrinsic'] = sample['intrinsics'][0]
355
- sample['query_depth'] = sample['depths_h'][0]
356
- sample['query_mask'] = sample['masks_h'][0]
357
- sample['query_near_far'] = sample['near_fars'][0]
358
-
359
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
360
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
361
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
362
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
363
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
364
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
365
- sample['view_ids'] = sample['view_ids'][start_idx:]
366
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
367
-
368
- sample['scale_mat'] = torch.from_numpy(scale_mat)
369
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
370
-
371
- # - generate rays
372
- if ('val' in self.split) or ('test' in self.split):
373
- sample_rays = gen_rays_from_single_image(
374
- img_wh[1], img_wh[0],
375
- sample['query_image'],
376
- sample['query_intrinsic'],
377
- sample['query_c2w'],
378
- depth=sample['query_depth'],
379
- mask=sample['query_mask'] if self.clean_image else None)
380
- else:
381
- sample_rays = gen_random_rays_from_single_image(
382
- img_wh[1], img_wh[0],
383
- self.N_rays,
384
- sample['query_image'],
385
- sample['query_intrinsic'],
386
- sample['query_c2w'],
387
- depth=sample['query_depth'],
388
- mask=sample['query_mask'] if self.clean_image else None,
389
- dilated_mask=mask_dilated,
390
- importance_sample=self.importance_sample)
391
-
392
-
393
- sample['rays'] = sample_rays
394
-
395
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/blender_gt_32.py DELETED
@@ -1,419 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
- import json
13
- from termcolor import colored
14
- import imageio
15
- from kornia import create_meshgrid
16
- import open3d as o3d
17
- def get_ray_directions(H, W, focal, center=None):
18
- """
19
- Get ray directions for all pixels in camera coordinate.
20
- Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
- ray-tracing-generating-camera-rays/standard-coordinate-systems
22
- Inputs:
23
- H, W, focal: image height, width and focal length
24
- Outputs:
25
- directions: (H, W, 3), the direction of the rays in camera coordinate
26
- """
27
- grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
-
29
- i, j = grid.unbind(-1)
30
- # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
- # see https://github.com/bmild/nerf/issues/24
32
- cent = center if center is not None else [W / 2, H / 2]
33
- directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
-
35
- return directions
36
-
37
- import os, json
38
- import numpy as np
39
- def calc_pose(phis, thetas, size, radius = 1.2):
40
- import torch
41
- def normalize(vectors):
42
- return vectors / (torch.norm(vectors, dim=-1, keepdim=True) + 1e-10)
43
- # device = torch.device('cuda')
44
- thetas = torch.FloatTensor(thetas)
45
- phis = torch.FloatTensor(phis)
46
-
47
- centers = torch.stack([
48
- radius * torch.sin(thetas) * torch.sin(phis),
49
- -radius * torch.cos(thetas) * torch.sin(phis),
50
- radius * torch.cos(phis),
51
- ], dim=-1) # [B, 3]
52
-
53
- # lookat
54
- forward_vector = normalize(centers).squeeze(0)
55
- up_vector = torch.FloatTensor([0, 0, 1]).unsqueeze(0).repeat(size, 1)
56
- right_vector = normalize(torch.cross(up_vector, forward_vector, dim=-1))
57
- if right_vector.pow(2).sum() < 0.01:
58
- right_vector = torch.FloatTensor([0, 1, 0]).unsqueeze(0).repeat(size, 1)
59
- up_vector = normalize(torch.cross(forward_vector, right_vector, dim=-1))
60
-
61
- poses = torch.eye(4, dtype=torch.float)[:3].unsqueeze(0).repeat(size, 1, 1)
62
- poses[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), dim=-1)
63
- poses[:, :3, 3] = centers
64
- return poses
65
-
66
- def load_K_Rt_from_P(filename, P=None):
67
- if P is None:
68
- lines = open(filename).read().splitlines()
69
- if len(lines) == 4:
70
- lines = lines[1:]
71
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
72
- P = np.asarray(lines).astype(np.float32).squeeze()
73
-
74
- out = cv2.decomposeProjectionMatrix(P)
75
- K = out[0]
76
- R = out[1]
77
- t = out[2]
78
-
79
- K = K / K[2, 2]
80
- intrinsics = np.eye(4)
81
- intrinsics[:3, :3] = K
82
-
83
- pose = np.eye(4, dtype=np.float32)
84
- pose[:3, :3] = R.transpose() # ? why need transpose here
85
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
86
-
87
- return intrinsics, pose # ! return cam2world matrix here
88
-
89
-
90
- # ! load one ref-image with multiple src-images in camera coordinate system
91
- class BlenderPerView(Dataset):
92
- def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
93
- split_filepath=None, pair_filepath=None,
94
- N_rays=512,
95
- vol_dims=[128, 128, 128], batch_size=1,
96
- clean_image=False, importance_sample=False, test_ref_views=[]):
97
-
98
- # print("root_dir: ", root_dir)
99
- self.root_dir = root_dir
100
- self.split = split
101
-
102
- self.n_views = n_views
103
- self.N_rays = N_rays
104
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
105
-
106
- self.clean_image = clean_image
107
- self.importance_sample = importance_sample
108
- self.test_ref_views = test_ref_views # used for testing
109
- self.scale_factor = 1.0
110
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
111
-
112
- lvis_json_path = '/objaverse-processed/zero12345_img/random32_split.json' # folder_id and uid
113
- with open(lvis_json_path, 'r') as f:
114
- lvis_paths = json.load(f)
115
- if self.split == 'train':
116
- self.lvis_paths = lvis_paths['train']
117
- else:
118
- self.lvis_paths = lvis_paths['val']
119
- if img_wh is not None:
120
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
121
- 'img_wh must both be multiples of 32!'
122
-
123
- pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
124
-
125
- with open(pose_json_path, 'r') as f:
126
- meta = json.load(f)
127
- intrinsic = np.eye(4)
128
- intrinsic[:3, :3] = np.array(meta["intrinsics"])
129
- self.intrinsic = intrinsic
130
- self.near_far = np.array(meta["near_far"])
131
- self.near_far[1] = 1.8
132
-
133
- # * bounding box for rendering
134
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
135
- self.bbox_max = np.array([1.0, 1.0, 1.0])
136
-
137
- # - used for cost volume regularization
138
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
139
- self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
140
-
141
-
142
- def define_transforms(self):
143
- self.transform = T.Compose([T.ToTensor()])
144
-
145
-
146
-
147
- def load_cam_info(self):
148
- for vid in range(self.input_poses.shape[0]):
149
- intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
150
- self.all_intrinsics.append(intrinsic)
151
- self.all_extrinsics.append(extrinsic)
152
- self.all_near_fars.append(near_far)
153
-
154
- def read_depth(self, filename):
155
- pass
156
-
157
- def read_mask(self, filename):
158
- mask_h = cv2.imread(filename, 0)
159
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
160
- interpolation=cv2.INTER_NEAREST)
161
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
162
- interpolation=cv2.INTER_NEAREST)
163
-
164
- mask[mask > 0] = 1 # the masks stored in png are not binary
165
- mask_h[mask_h > 0] = 1
166
-
167
- return mask, mask_h
168
-
169
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
170
-
171
- center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
172
- # print("center", center)
173
- # print("radius", radius)
174
- # print("bounds", bounds)
175
- # import ipdb; ipdb.set_trace()
176
- radius = radius * factor
177
- scale_mat = np.diag([radius, radius, radius, 1.0])
178
- scale_mat[:3, 3] = center.cpu().numpy()
179
- scale_mat = scale_mat.astype(np.float32)
180
-
181
- return scale_mat, 1. / radius.cpu().numpy()
182
-
183
- def __len__(self):
184
- return 32*len(self.lvis_paths)
185
-
186
-
187
- def read_depth(self, filename, near_bound, noisy_factor=1.0):
188
- pass
189
-
190
-
191
- def __getitem__(self, idx):
192
- sample = {}
193
- origin_idx = idx
194
- imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
195
- intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
196
-
197
-
198
- folder_uid_dict = self.lvis_paths[idx//32]
199
- idx = idx % 32 # [0, 7]
200
- folder_id = folder_uid_dict['folder_id']
201
- uid = folder_uid_dict['uid']
202
-
203
- pose_file = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, 'views.npz')
204
- pose_array = np.load(pose_file)
205
- pose = calc_pose(pose_array['elevations'], pose_array['azimuths'], 32) # [32, 3, 4] c2ws
206
-
207
- self.img_wh = (256, 256)
208
- self.input_poses = np.array(pose)
209
- self.input_poses = np.concatenate([self.input_poses, np.tile(np.array([0, 0, 0, 1], dtype=np.float32)[None, None, :], [self.input_poses.shape[0], 1, 1])], axis=1)
210
- self.define_transforms()
211
- self.blender2opencv = np.array(
212
- [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
213
- )
214
-
215
- self.c2ws = []
216
- self.w2cs = []
217
- self.near_fars = []
218
- # self.root_dir = root_dir
219
- for image_dix in range(pose.shape[0]):
220
- pose = self.input_poses[image_dix]
221
- c2w = pose @ self.blender2opencv
222
- self.c2ws.append(c2w)
223
- self.w2cs.append(np.linalg.inv(c2w))
224
- self.near_fars.append(self.near_far)
225
- self.c2ws = np.stack(self.c2ws, axis=0)
226
- self.w2cs = np.stack(self.w2cs, axis=0)
227
-
228
-
229
- self.all_intrinsics = [] # the cam info of the whole scene
230
- self.all_extrinsics = []
231
- self.all_near_fars = []
232
- self.load_cam_info()
233
-
234
-
235
-
236
- # target view
237
- c2w = self.c2ws[idx]
238
- w2c = np.linalg.inv(c2w)
239
- w2c_ref = w2c
240
- w2c_ref_inv = np.linalg.inv(w2c_ref)
241
-
242
- w2cs.append(w2c @ w2c_ref_inv)
243
- c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
244
-
245
- img_filename = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{idx}.png')
246
-
247
- depth_filename = os.path.join(os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{idx}_depth_mm.png'))
248
-
249
-
250
- img = Image.open(img_filename)
251
-
252
- img = self.transform(img) # (4, h, w)
253
-
254
-
255
- if img.shape[0] == 4:
256
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
257
- imgs += [img]
258
-
259
- depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
260
- mask_h = depth_h > 0
261
-
262
- directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
263
- surface_points = directions * depth_h[..., None] # [H, W, 3]
264
- distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
265
- depth_h = distance
266
-
267
-
268
- depths_h.append(depth_h)
269
- masks_h.append(mask_h)
270
-
271
- intrinsic = self.intrinsic
272
- intrinsics.append(intrinsic)
273
-
274
-
275
- near_fars.append(self.near_fars[idx])
276
- image_perm = 0 # only supervised on reference view
277
-
278
- mask_dilated = None
279
-
280
- # src_views = range(8+idx*4, 8+(idx+1)*4)
281
- src_views = range(0, 8 * 4)
282
-
283
- for vid in src_views:
284
- img_filename = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{vid}.png')
285
-
286
- img = Image.open(img_filename)
287
- img_wh = self.img_wh
288
-
289
- img = self.transform(img)
290
- if img.shape[0] == 4:
291
- img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
292
-
293
- imgs += [img]
294
- depth_h = np.ones(img.shape[1:], dtype=np.float32)
295
- depths_h.append(depth_h)
296
- masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
297
-
298
- near_fars.append(self.all_near_fars[vid])
299
- intrinsics.append(self.all_intrinsics[vid])
300
-
301
- w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
302
-
303
-
304
- # ! estimate scale_mat
305
- scale_mat, scale_factor = self.cal_scale_mat(
306
- img_hw=[img_wh[1], img_wh[0]],
307
- intrinsics=intrinsics, extrinsics=w2cs,
308
- near_fars=near_fars, factor=1.1
309
- )
310
-
311
-
312
- new_near_fars = []
313
- new_w2cs = []
314
- new_c2ws = []
315
- new_affine_mats = []
316
- new_depths_h = []
317
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
318
-
319
- P = intrinsic @ extrinsic @ scale_mat
320
- P = P[:3, :4]
321
- # - should use load_K_Rt_from_P() to obtain c2w
322
- c2w = load_K_Rt_from_P(None, P)[1]
323
- w2c = np.linalg.inv(c2w)
324
- new_w2cs.append(w2c)
325
- new_c2ws.append(c2w)
326
- affine_mat = np.eye(4)
327
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
328
- new_affine_mats.append(affine_mat)
329
-
330
- camera_o = c2w[:3, 3]
331
- dist = np.sqrt(np.sum(camera_o ** 2))
332
- near = dist - 1
333
- far = dist + 1
334
-
335
- new_near_fars.append([0.95 * near, 1.05 * far])
336
- new_depths_h.append(depth * scale_factor)
337
-
338
- # print(new_near_fars)
339
- imgs = torch.stack(imgs).float()
340
- depths_h = np.stack(new_depths_h)
341
- masks_h = np.stack(masks_h)
342
-
343
- affine_mats = np.stack(new_affine_mats)
344
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
345
- new_near_fars)
346
-
347
- if self.split == 'train':
348
- start_idx = 0
349
- else:
350
- start_idx = 1
351
-
352
- view_ids = [idx] + list(src_views)
353
- sample['origin_idx'] = origin_idx
354
- sample['images'] = imgs # (V, 3, H, W)
355
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
356
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
357
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
358
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
359
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
360
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
361
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
362
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
363
-
364
- # sample['light_idx'] = torch.tensor(light_idx)
365
- sample['scan'] = folder_id
366
-
367
- sample['scale_factor'] = torch.tensor(scale_factor)
368
- sample['img_wh'] = torch.from_numpy(np.array(img_wh))
369
- sample['render_img_idx'] = torch.tensor(image_perm)
370
- sample['partial_vol_origin'] = self.partial_vol_origin
371
- sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
372
-
373
-
374
- # - image to render
375
- sample['query_image'] = sample['images'][0]
376
- sample['query_c2w'] = sample['c2ws'][0]
377
- sample['query_w2c'] = sample['w2cs'][0]
378
- sample['query_intrinsic'] = sample['intrinsics'][0]
379
- sample['query_depth'] = sample['depths_h'][0]
380
- sample['query_mask'] = sample['masks_h'][0]
381
- sample['query_near_far'] = sample['near_fars'][0]
382
-
383
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
384
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
385
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
386
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
387
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
388
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
389
- sample['view_ids'] = sample['view_ids'][start_idx:]
390
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
391
-
392
- sample['scale_mat'] = torch.from_numpy(scale_mat)
393
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
394
-
395
- # - generate rays
396
- if ('val' in self.split) or ('test' in self.split):
397
- sample_rays = gen_rays_from_single_image(
398
- img_wh[1], img_wh[0],
399
- sample['query_image'],
400
- sample['query_intrinsic'],
401
- sample['query_c2w'],
402
- depth=sample['query_depth'],
403
- mask=sample['query_mask'] if self.clean_image else None)
404
- else:
405
- sample_rays = gen_random_rays_from_single_image(
406
- img_wh[1], img_wh[0],
407
- self.N_rays,
408
- sample['query_image'],
409
- sample['query_intrinsic'],
410
- sample['query_c2w'],
411
- depth=sample['query_depth'],
412
- mask=sample['query_mask'] if self.clean_image else None,
413
- dilated_mask=mask_dilated,
414
- importance_sample=self.importance_sample)
415
-
416
-
417
- sample['rays'] = sample_rays
418
-
419
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/dtu/dtu_pairs.txt DELETED
@@ -1,93 +0,0 @@
1
- 46
2
- 0
3
- 10 10 2346.410000 1 2036.530000 9 1243.890000 12 1052.870000 11 1000.840000 13 703.583000 2 604.456000 8 439.759000 14 327.419000 27 249.278000
4
- 1
5
- 10 9 2850.870000 10 2583.940000 2 2105.590000 0 2052.840000 8 1868.240000 13 1184.230000 14 1017.510000 12 961.966000 7 670.208000 15 657.218000
6
- 2
7
- 10 8 2501.240000 1 2106.880000 7 1856.500000 9 1782.340000 3 1141.770000 15 1061.760000 14 815.457000 16 762.153000 6 709.789000 10 699.921000
8
- 3
9
- 10 7 1294.390000 6 1159.130000 2 1134.270000 4 905.717000 8 687.320000 5 600.015000 17 496.958000 16 481.969000 1 379.011000 15 307.450000
10
- 4
11
- 10 5 1333.740000 6 1145.150000 3 895.254000 7 486.504000 18 446.420000 2 418.517000 17 326.528000 8 161.115000 16 149.154000 1 103.626000
12
- 5
13
- 10 6 1676.060000 18 1555.060000 4 1335.550000 17 868.416000 3 593.755000 7 467.816000 20 440.579000 19 428.255000 16 242.327000 21 210.253000
14
- 6
15
- 10 17 2332.350000 7 1848.240000 18 1812.740000 5 1696.070000 16 1273.000000 3 1157.990000 4 1155.410000 20 771.624000 21 744.945000 2 700.368000
16
- 7
17
- 10 16 2709.460000 8 2439.700000 15 2078.210000 6 1864.160000 2 1846.600000 17 1791.710000 3 1296.860000 22 957.793000 9 879.088000 21 782.277000
18
- 8
19
- 10 15 3124.010000 9 3099.920000 14 2756.290000 2 2501.220000 7 2449.320000 1 1875.940000 16 1726.040000 13 1325.760000 23 1177.090000 24 1108.820000
20
- 9
21
- 10 13 3355.620000 14 3226.070000 8 3098.800000 10 3097.070000 1 2861.420000 12 1873.630000 2 1785.980000 15 1753.320000 25 1365.450000 0 1261.590000
22
- 10
23
- 10 12 3750.700000 9 3085.870000 13 3028.390000 1 2590.550000 0 2369.790000 11 2266.670000 14 1524.160000 26 1448.150000 27 1293.600000 8 1041.840000
24
- 11
25
- 10 12 3543.760000 27 3056.050000 10 2248.070000 26 1524.280000 28 1273.330000 13 1265.900000 29 1129.550000 0 998.164000 9 591.176000 30 572.919000
26
- 12
27
- 10 27 3889.870000 10 3754.540000 13 3745.210000 11 3584.260000 26 3574.560000 25 1877.110000 9 1866.340000 29 1482.720000 30 1418.510000 14 1341.860000
28
- 13
29
- 10 12 3773.140000 26 3699.280000 25 3657.170000 14 3652.040000 9 3356.290000 10 3049.270000 24 2098.910000 27 1900.960000 31 1460.960000 30 1349.620000
30
- 14
31
- 10 13 3663.520000 24 3610.690000 9 3232.550000 25 3216.400000 15 3128.840000 8 2758.040000 23 2219.910000 26 1567.450000 10 1536.600000 32 1419.330000
32
- 15
33
- 10 23 3194.920000 14 3126.000000 8 3120.430000 16 2897.020000 24 2562.490000 7 2084.050000 22 2041.630000 9 1752.080000 33 1232.290000 13 1137.550000
34
- 16
35
- 10 15 2884.140000 7 2713.880000 22 2708.570000 17 2448.500000 21 2173.300000 23 1908.030000 8 1718.790000 6 1281.960000 35 1047.380000 34 980.064000
36
- 17
37
- 10 21 2632.480000 16 2428.000000 6 2343.570000 18 2250.230000 20 2149.750000 7 1779.420000 22 1380.250000 36 957.046000 5 878.398000 15 789.068000
38
- 18
39
- 9 17 2219.150000 20 2173.020000 6 1802.390000 19 1575.770000 5 1564.810000 21 1160.130000 16 660.317000 7 589.484000 36 559.983000
40
- 19
41
- 7 20 1828.970000 18 1564.630000 17 685.249000 36 613.420000 21 572.770000 5 427.597000 6 368.651000
42
- 20
43
- 8 21 2569.790000 36 2258.330000 18 2186.710000 17 2130.670000 19 1865.060000 35 996.122000 16 799.808000 40 778.721000
44
- 21
45
- 9 36 2704.590000 35 2639.690000 17 2638.190000 20 2605.430000 22 2604.260000 16 2158.250000 34 1239.250000 18 1178.240000 40 1128.570000
46
- 22
47
- 10 23 3232.680000 34 3175.150000 35 2831.090000 16 2712.510000 21 2632.190000 15 2033.390000 33 1712.670000 17 1393.860000 36 1290.960000 24 1195.330000
48
- 23
49
- 10 24 3710.900000 33 3603.070000 22 3244.200000 15 3190.620000 34 3086.490000 14 2220.110000 32 2100.000000 16 1917.100000 35 1359.790000 25 1356.710000
50
- 24
51
- 10 25 3844.600000 32 3750.750000 23 3710.600000 14 3609.090000 33 3091.040000 15 2559.240000 31 2423.710000 13 2109.360000 26 1440.580000 34 1410.030000
52
- 25
53
- 10 26 3951.740000 31 3888.570000 24 3833.070000 13 3667.350000 14 3208.210000 32 2993.460000 30 2681.520000 12 1900.230000 45 1484.030000 27 1462.880000
54
- 26
55
- 10 30 4033.350000 27 3970.470000 25 3925.250000 13 3686.340000 12 3595.590000 29 2943.870000 31 2917.000000 14 1556.340000 11 1554.750000 46 1503.840000
56
- 27
57
- 10 29 4027.840000 26 3929.940000 12 3875.580000 11 3085.030000 28 2908.600000 30 2792.670000 13 1878.420000 25 1438.550000 47 1425.200000 10 1290.250000
58
- 28
59
- 10 29 3687.020000 48 3209.130000 27 2872.860000 47 2014.530000 30 1361.950000 11 1273.600000 26 1062.850000 12 840.841000 46 672.985000 31 271.952000
60
- 29
61
- 10 27 4029.430000 30 3909.550000 28 3739.930000 47 3695.230000 48 3135.870000 26 2910.970000 46 2229.550000 12 1479.160000 31 1430.260000 11 1144.560000
62
- 30
63
- 10 26 4029.860000 29 3953.720000 31 3811.120000 46 3630.460000 47 3105.960000 27 2824.430000 25 2657.890000 45 2347.750000 32 1459.110000 12 1429.620000
64
- 31
65
- 10 25 3882.210000 30 3841.880000 32 3808.500000 45 3649.820000 46 3000.670000 26 2939.940000 24 2409.930000 44 2381.300000 13 1467.590000 29 1459.560000
66
- 32
67
- 10 31 3826.500000 24 3744.140000 33 3613.240000 44 3552.040000 25 3004.600000 45 2884.590000 43 2393.340000 23 2095.270000 30 1478.600000 14 1420.780000
68
- 33
69
- 10 32 3618.110000 23 3598.100000 34 3530.530000 43 3462.370000 24 3091.530000 44 2608.080000 42 2426.000000 22 1717.940000 31 1407.650000 25 1324.780000
70
- 34
71
- 10 33 3523.370000 42 3356.550000 35 3210.340000 22 3178.850000 23 3079.030000 43 2396.450000 41 2386.860000 24 1408.020000 32 1301.340000 21 1256.450000
72
- 35
73
- 10 34 3187.880000 41 3106.440000 36 2866.040000 22 2817.740000 21 2654.870000 40 2416.980000 42 2137.810000 23 1346.860000 33 1150.330000 16 1044.660000
74
- 36
75
- 8 40 2910.700000 35 2832.660000 21 2689.960000 20 2280.460000 41 1787.970000 22 1268.490000 34 981.636000 17 954.229000
76
- 40
77
- 7 36 2918.140000 41 2852.620000 35 2392.960000 21 1124.300000 42 1056.480000 34 877.946000 20 788.701000
78
- 41
79
- 9 35 3111.050000 42 3049.710000 40 2885.360000 34 2371.020000 36 1813.690000 43 1164.710000 22 1126.900000 21 906.536000 33 903.238000
80
- 42
81
- 10 34 3356.980000 43 3183.000000 41 3070.540000 33 2421.770000 35 2155.080000 44 1278.410000 23 1183.520000 22 1147.070000 40 1077.080000 32 899.646000
82
- 43
83
- 10 33 3461.240000 44 3380.740000 42 3188.700000 34 2400.600000 32 2399.090000 45 1359.370000 23 1314.080000 41 1176.120000 24 1159.620000 31 901.556000
84
- 44
85
- 10 32 3550.810000 45 3510.160000 43 3373.110000 33 2602.330000 31 2395.930000 24 1410.430000 46 1386.310000 42 1279.000000 25 1095.240000 34 968.440000
86
- 45
87
- 10 31 3650.090000 46 3555.090000 44 3491.150000 32 2868.390000 30 2373.590000 25 1485.370000 47 1405.280000 43 1349.540000 33 1104.770000 26 1046.810000
88
- 46
89
- 10 30 3635.640000 47 3562.170000 45 3524.170000 31 2976.820000 29 2264.040000 26 1508.870000 44 1367.410000 48 1352.100000 32 1211.240000 25 1102.170000
90
- 47
91
- 10 29 3705.310000 46 3519.760000 48 3450.480000 30 3074.770000 28 2054.630000 27 1434.570000 45 1377.340000 31 1268.230000 26 1223.830000 25 471.111000
92
- 48
93
- 10 47 3401.950000 28 3224.840000 29 3101.160000 46 1317.100000 30 1306.700000 27 1235.070000 26 537.731000 31 291.919000 45 276.869000 11 258.856000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/dtu/lists/test.txt DELETED
@@ -1,15 +0,0 @@
1
- scan24
2
- scan37
3
- scan40
4
- scan55
5
- scan63
6
- scan65
7
- scan69
8
- scan83
9
- scan97
10
- scan105
11
- scan106
12
- scan110
13
- scan114
14
- scan118
15
- scan122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/dtu/lists/train.txt DELETED
@@ -1,75 +0,0 @@
1
- scan1
2
- scan4
3
- scan5
4
- scan6
5
- scan8
6
- scan9
7
- scan10
8
- scan11
9
- scan12
10
- scan13
11
- scan14
12
- scan15
13
- scan16
14
- scan17
15
- scan18
16
- scan19
17
- scan20
18
- scan21
19
- scan22
20
- scan23
21
- scan28
22
- scan29
23
- scan30
24
- scan31
25
- scan32
26
- scan33
27
- scan34
28
- scan35
29
- scan36
30
- scan38
31
- scan39
32
- scan41
33
- scan42
34
- scan43
35
- scan44
36
- scan45
37
- scan46
38
- scan47
39
- scan48
40
- scan49
41
- scan50
42
- scan51
43
- scan52
44
- scan59
45
- scan60
46
- scan61
47
- scan62
48
- scan64
49
- scan74
50
- scan75
51
- scan76
52
- scan77
53
- scan84
54
- scan85
55
- scan86
56
- scan87
57
- scan88
58
- scan89
59
- scan90
60
- scan91
61
- scan92
62
- scan93
63
- scan94
64
- scan95
65
- scan96
66
- scan98
67
- scan99
68
- scan100
69
- scan101
70
- scan102
71
- scan103
72
- scan104
73
- scan126
74
- scan127
75
- scan128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/dtu_fit.py DELETED
@@ -1,278 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import cv2 as cv
4
- import numpy as np
5
- import re
6
- import os
7
- import logging
8
- from glob import glob
9
-
10
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
11
-
12
- from data.scene import get_boundingbox
13
-
14
-
15
- def load_K_Rt_from_P(filename, P=None):
16
- if P is None:
17
- lines = open(filename).read().splitlines()
18
- if len(lines) == 4:
19
- lines = lines[1:]
20
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
21
- P = np.asarray(lines).astype(np.float32).squeeze()
22
-
23
- out = cv.decomposeProjectionMatrix(P)
24
- K = out[0]
25
- R = out[1]
26
- t = out[2]
27
-
28
- K = K / K[2, 2]
29
- intrinsics = np.eye(4)
30
- intrinsics[:3, :3] = K
31
-
32
- pose = np.eye(4, dtype=np.float32)
33
- pose[:3, :3] = R.transpose() # ? why need transpose here
34
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
35
-
36
- return intrinsics, pose # ! return cam2world matrix here
37
-
38
-
39
- class DtuFit:
40
- def __init__(self, root_dir, split, scan_id, n_views, train_img_idx=[], test_img_idx=[],
41
- img_wh=[800, 600], clip_wh=[0, 0], original_img_wh=[1600, 1200],
42
- N_rays=512, h_patch_size=5, near=425, far=900):
43
- super(DtuFit, self).__init__()
44
- logging.info('Load data: Begin')
45
-
46
- self.root_dir = root_dir
47
- self.split = split
48
- self.scan_id = scan_id
49
- self.n_views = n_views
50
-
51
- self.near = near
52
- self.far = far
53
-
54
- if self.scan_id is not None:
55
- self.data_dir = os.path.join(self.root_dir, self.scan_id)
56
- else:
57
- self.data_dir = self.root_dir
58
-
59
- self.img_wh = img_wh
60
- self.clip_wh = clip_wh
61
-
62
- if len(self.clip_wh) == 2:
63
- self.clip_wh = self.clip_wh + self.clip_wh
64
-
65
- self.original_img_wh = original_img_wh
66
- self.N_rays = N_rays
67
- self.h_patch_size = h_patch_size # used to extract patch for supervision
68
- self.train_img_idx = train_img_idx
69
- self.test_img_idx = test_img_idx
70
-
71
- camera_dict = np.load(os.path.join(self.data_dir, 'cameras.npz'), allow_pickle=True)
72
- self.images_list = sorted(glob(os.path.join(self.data_dir, "image/*.png")))
73
- # world_mat: projection matrix: world to image
74
- self.world_mats_np = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in
75
- range(len(self.images_list))]
76
-
77
- self.raw_near_fars = np.stack([np.array([self.near, self.far]) for i in range(len(self.images_list))])
78
-
79
- # - reference image; transform the world system to the ref-camera system
80
- self.ref_img_idx = self.train_img_idx[0]
81
- ref_world_mat = self.world_mats_np[self.ref_img_idx]
82
- self.ref_w2c = np.linalg.inv(load_K_Rt_from_P(None, ref_world_mat[:3, :4])[1])
83
-
84
- self.all_images = []
85
- self.all_intrinsics = []
86
- self.all_w2cs = []
87
-
88
- self.load_scene() # load the scene
89
-
90
- # ! estimate scale_mat
91
- self.scale_mat, self.scale_factor = self.cal_scale_mat(
92
- img_hw=[self.img_wh[1], self.img_wh[0]],
93
- intrinsics=self.all_intrinsics[self.train_img_idx],
94
- extrinsics=self.all_w2cs[self.train_img_idx],
95
- near_fars=self.raw_near_fars[self.train_img_idx],
96
- factor=1.1)
97
-
98
- # * after scaling and translation, unit bounding box
99
- self.scaled_intrinsics, self.scaled_w2cs, self.scaled_c2ws, \
100
- self.scaled_affine_mats, self.scaled_near_fars = self.scale_cam_info()
101
- # import ipdb; ipdb.set_trace()
102
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
103
- self.bbox_max = np.array([1.0, 1.0, 1.0])
104
- self.partial_vol_origin = torch.Tensor([-1., -1., -1.])
105
-
106
- logging.info('Load data: End')
107
-
108
- def load_scene(self):
109
-
110
- scale_x = self.img_wh[0] / self.original_img_wh[0]
111
- scale_y = self.img_wh[1] / self.original_img_wh[1]
112
-
113
- for idx in range(len(self.images_list)):
114
- image = cv.imread(self.images_list[idx])
115
- image = cv.resize(image, (self.img_wh[0], self.img_wh[1])) / 255.
116
-
117
- image = image[self.clip_wh[1]:self.img_wh[1] - self.clip_wh[3],
118
- self.clip_wh[0]:self.img_wh[0] - self.clip_wh[2]]
119
- self.all_images.append(np.transpose(image[:, :, ::-1], (2, 0, 1))) # append [3,]
120
-
121
- P = self.world_mats_np[idx]
122
- P = P[:3, :4]
123
- intrinsics, c2w = load_K_Rt_from_P(None, P)
124
- w2c = np.linalg.inv(c2w)
125
-
126
- intrinsics[:1] *= scale_x
127
- intrinsics[1:2] *= scale_y
128
-
129
- intrinsics[0, 2] -= self.clip_wh[0]
130
- intrinsics[1, 2] -= self.clip_wh[1]
131
-
132
- self.all_intrinsics.append(intrinsics)
133
- # - transform from world system to ref-camera system
134
- self.all_w2cs.append(w2c @ np.linalg.inv(self.ref_w2c))
135
-
136
-
137
- self.all_images = torch.from_numpy(np.stack(self.all_images)).to(torch.float32)
138
- self.all_intrinsics = torch.from_numpy(np.stack(self.all_intrinsics)).to(torch.float32)
139
- self.all_w2cs = torch.from_numpy(np.stack(self.all_w2cs)).to(torch.float32)
140
- self.img_wh = [self.img_wh[0] - self.clip_wh[0] - self.clip_wh[2],
141
- self.img_wh[1] - self.clip_wh[1] - self.clip_wh[3]]
142
-
143
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
144
- center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
145
- radius = radius * factor
146
- scale_mat = np.diag([radius, radius, radius, 1.0])
147
- scale_mat[:3, 3] = center.cpu().numpy()
148
- scale_mat = scale_mat.astype(np.float32)
149
-
150
- return scale_mat, 1. / radius.cpu().numpy()
151
-
152
- def scale_cam_info(self):
153
- new_intrinsics = []
154
- new_near_fars = []
155
- new_w2cs = []
156
- new_c2ws = []
157
- new_affine_mats = []
158
- for idx in range(len(self.all_images)):
159
- intrinsics = self.all_intrinsics[idx]
160
- P = intrinsics @ self.all_w2cs[idx] @ self.scale_mat
161
- P = P.cpu().numpy()[:3, :4]
162
-
163
- # - should use load_K_Rt_from_P() to obtain c2w
164
- c2w = load_K_Rt_from_P(None, P)[1]
165
- w2c = np.linalg.inv(c2w)
166
- new_w2cs.append(w2c)
167
- new_c2ws.append(c2w)
168
- new_intrinsics.append(intrinsics)
169
- affine_mat = np.eye(4)
170
- affine_mat[:3, :4] = intrinsics[:3, :3] @ w2c[:3, :4]
171
- new_affine_mats.append(affine_mat)
172
-
173
- camera_o = c2w[:3, 3]
174
- dist = np.sqrt(np.sum(camera_o ** 2))
175
- near = dist - 1
176
- far = dist + 1
177
-
178
- new_near_fars.append([0.95 * near, 1.05 * far])
179
-
180
- new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars = \
181
- np.stack(new_intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), \
182
- np.stack(new_affine_mats), np.stack(new_near_fars)
183
-
184
- new_intrinsics = torch.from_numpy(np.float32(new_intrinsics))
185
- new_w2cs = torch.from_numpy(np.float32(new_w2cs))
186
- new_c2ws = torch.from_numpy(np.float32(new_c2ws))
187
- new_affine_mats = torch.from_numpy(np.float32(new_affine_mats))
188
- new_near_fars = torch.from_numpy(np.float32(new_near_fars))
189
-
190
- return new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars
191
-
192
-
193
- def get_conditional_sample(self):
194
- sample = {}
195
- support_idxs = self.train_img_idx
196
-
197
- sample['images'] = self.all_images[support_idxs] # (V, 3, H, W)
198
- sample['w2cs'] = self.scaled_w2cs[self.train_img_idx] # (V, 4, 4)
199
- sample['c2ws'] = self.scaled_c2ws[self.train_img_idx] # (V, 4, 4)
200
- sample['near_fars'] = self.scaled_near_fars[self.train_img_idx] # (V, 2)
201
- sample['intrinsics'] = self.scaled_intrinsics[self.train_img_idx][:, :3, :3] # (V, 3, 3)
202
- sample['affine_mats'] = self.scaled_affine_mats[self.train_img_idx] # ! in world space
203
-
204
- sample['scan'] = self.scan_id
205
- sample['scale_factor'] = torch.tensor(self.scale_factor)
206
- sample['scale_mat'] = torch.from_numpy(self.scale_mat)
207
- sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
208
- sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
209
- sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
210
-
211
- return sample
212
-
213
- def __len__(self):
214
- if self.split == 'train':
215
- return self.n_views * 1000
216
- else:
217
- return len(self.test_img_idx) * 1000
218
-
219
- def __getitem__(self, idx):
220
- sample = {}
221
-
222
- if self.split == 'train':
223
- render_idx = self.train_img_idx[idx % self.n_views]
224
- support_idxs = [idx for idx in self.train_img_idx if idx != render_idx]
225
- else:
226
- # render_idx = idx % self.n_test_images + self.n_train_images
227
- render_idx = self.test_img_idx[idx % len(self.test_img_idx)]
228
- support_idxs = [render_idx]
229
-
230
- sample['images'] = self.all_images[support_idxs] # (V, 3, H, W)
231
- sample['w2cs'] = self.scaled_w2cs[support_idxs] # (V, 4, 4)
232
- sample['c2ws'] = self.scaled_c2ws[support_idxs] # (V, 4, 4)
233
- sample['intrinsics'] = self.scaled_intrinsics[support_idxs][:, :3, :3] # (V, 3, 3)
234
- sample['affine_mats'] = self.scaled_affine_mats[support_idxs] # ! in world space
235
- sample['scan'] = self.scan_id
236
- sample['scale_factor'] = torch.tensor(self.scale_factor)
237
- sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
238
- sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
239
- sample['img_index'] = torch.tensor(render_idx)
240
-
241
- # - query image
242
- sample['query_image'] = self.all_images[render_idx]
243
- sample['query_c2w'] = self.scaled_c2ws[render_idx]
244
- sample['query_w2c'] = self.scaled_w2cs[render_idx]
245
- sample['query_intrinsic'] = self.scaled_intrinsics[render_idx]
246
- sample['query_near_far'] = self.scaled_near_fars[render_idx]
247
- sample['meta'] = str(self.scan_id) + "_" + os.path.basename(self.images_list[render_idx])
248
- sample['scale_mat'] = torch.from_numpy(self.scale_mat)
249
- sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
250
- sample['rendering_c2ws'] = self.scaled_c2ws[self.test_img_idx]
251
- sample['rendering_imgs_idx'] = torch.Tensor(np.array(self.test_img_idx).astype(np.int32))
252
-
253
- # - generate rays
254
- if self.split == 'val' or self.split == 'test':
255
- sample_rays = gen_rays_from_single_image(
256
- self.img_wh[1], self.img_wh[0],
257
- sample['query_image'],
258
- sample['query_intrinsic'],
259
- sample['query_c2w'],
260
- depth=None,
261
- mask=None)
262
- else:
263
- sample_rays = gen_random_rays_from_single_image(
264
- self.img_wh[1], self.img_wh[0],
265
- self.N_rays,
266
- sample['query_image'],
267
- sample['query_intrinsic'],
268
- sample['query_c2w'],
269
- depth=None,
270
- mask=None,
271
- dilated_mask=None,
272
- importance_sample=False,
273
- h_patch_size=self.h_patch_size
274
- )
275
-
276
- sample['rays'] = sample_rays
277
-
278
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/dtu_general.py DELETED
@@ -1,376 +0,0 @@
1
- from torch.utils.data import Dataset
2
- from utils.misc_utils import read_pfm
3
- import os
4
- import numpy as np
5
- import cv2
6
- from PIL import Image
7
- import torch
8
- from torchvision import transforms as T
9
- from data.scene import get_boundingbox
10
-
11
- from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
-
13
- from termcolor import colored
14
- import pdb
15
- import random
16
-
17
-
18
- def load_K_Rt_from_P(filename, P=None):
19
- if P is None:
20
- lines = open(filename).read().splitlines()
21
- if len(lines) == 4:
22
- lines = lines[1:]
23
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
24
- P = np.asarray(lines).astype(np.float32).squeeze()
25
-
26
- out = cv2.decomposeProjectionMatrix(P)
27
- K = out[0]
28
- R = out[1]
29
- t = out[2]
30
-
31
- K = K / K[2, 2]
32
- intrinsics = np.eye(4)
33
- intrinsics[:3, :3] = K
34
-
35
- pose = np.eye(4, dtype=np.float32)
36
- pose[:3, :3] = R.transpose() # ? why need transpose here
37
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
38
-
39
- return intrinsics, pose # ! return cam2world matrix here
40
-
41
-
42
- # ! load one ref-image with multiple src-images in camera coordinate system
43
- class MVSDatasetDtuPerView(Dataset):
44
- def __init__(self, root_dir, split, n_views=3, img_wh=(640, 512), downSample=1.0,
45
- split_filepath=None, pair_filepath=None,
46
- N_rays=512,
47
- vol_dims=[128, 128, 128], batch_size=1,
48
- clean_image=False, importance_sample=False, test_ref_views=[]):
49
-
50
- self.root_dir = root_dir
51
- self.split = split
52
-
53
- self.img_wh = img_wh
54
- self.downSample = downSample
55
- self.num_all_imgs = 49 # this preprocessed DTU dataset has 49 images
56
- self.n_views = n_views
57
- self.N_rays = N_rays
58
- self.batch_size = batch_size # - used for construct new metas for gru fusion training
59
-
60
- self.clean_image = clean_image
61
- self.importance_sample = importance_sample
62
- self.test_ref_views = test_ref_views # used for testing
63
- self.scale_factor = 1.0
64
- self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
65
-
66
- if img_wh is not None:
67
- assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
68
- 'img_wh must both be multiples of 32!'
69
-
70
- self.split_filepath = f'data/dtu/lists/{self.split}.txt' if split_filepath is None else split_filepath
71
- self.pair_filepath = f'data/dtu/dtu_pairs.txt' if pair_filepath is None else pair_filepath
72
-
73
- print(colored("loading all scenes together", 'red'))
74
- with open(self.split_filepath) as f:
75
- self.scans = [line.rstrip() for line in f.readlines()]
76
-
77
- self.all_intrinsics = [] # the cam info of the whole scene
78
- self.all_extrinsics = []
79
- self.all_near_fars = []
80
-
81
- self.metas, self.ref_src_pairs = self.build_metas() # load ref-srcs view pairs info of the scene
82
-
83
- self.allview_ids = [i for i in range(self.num_all_imgs)]
84
-
85
- self.load_cam_info() # load camera info of DTU, and estimate scale_mat
86
-
87
- self.build_remap()
88
- self.define_transforms()
89
- print(f'==> image down scale: {self.downSample}')
90
-
91
- # * bounding box for rendering
92
- self.bbox_min = np.array([-1.0, -1.0, -1.0])
93
- self.bbox_max = np.array([1.0, 1.0, 1.0])
94
-
95
- # - used for cost volume regularization
96
- self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
97
- self.partial_vol_origin = torch.Tensor([-1., -1., -1.])
98
-
99
- def build_remap(self):
100
- self.remap = np.zeros(np.max(self.allview_ids) + 1).astype('int')
101
- for i, item in enumerate(self.allview_ids):
102
- self.remap[item] = i
103
-
104
- def define_transforms(self):
105
- self.transform = T.Compose([T.ToTensor()])
106
-
107
- def build_metas(self):
108
- metas = []
109
- ref_src_pairs = {}
110
- # light conditions 0-6 for training
111
- # light condition 3 for testing (the brightest?)
112
- light_idxs = [3] if 'train' not in self.split else range(7)
113
-
114
- with open(self.pair_filepath) as f:
115
- num_viewpoint = int(f.readline())
116
- # viewpoints (49)
117
- for _ in range(num_viewpoint):
118
- ref_view = int(f.readline().rstrip())
119
- src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
120
-
121
- ref_src_pairs[ref_view] = src_views
122
-
123
- for light_idx in light_idxs:
124
- for scan in self.scans:
125
- with open(self.pair_filepath) as f:
126
- num_viewpoint = int(f.readline())
127
- # viewpoints (49)
128
- for _ in range(num_viewpoint):
129
- ref_view = int(f.readline().rstrip())
130
- src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
131
-
132
- # ! only for validation
133
- if len(self.test_ref_views) > 0 and ref_view not in self.test_ref_views:
134
- continue
135
-
136
- metas += [(scan, light_idx, ref_view, src_views)]
137
-
138
- return metas, ref_src_pairs
139
-
140
- def read_cam_file(self, filename):
141
- with open(filename) as f:
142
- lines = [line.rstrip() for line in f.readlines()]
143
- # extrinsics: line [1,5), 4x4 matrix
144
- extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ')
145
- extrinsics = extrinsics.reshape((4, 4))
146
- # intrinsics: line [7-10), 3x3 matrix
147
- intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ')
148
- intrinsics = intrinsics.reshape((3, 3))
149
- # depth_min & depth_interval: line 11
150
- depth_min = float(lines[11].split()[0])
151
- depth_max = depth_min + float(lines[11].split()[1]) * 192
152
- self.depth_interval = float(lines[11].split()[1])
153
- intrinsics_ = np.float32(np.diag([1, 1, 1, 1]))
154
- intrinsics_[:3, :3] = intrinsics
155
- return intrinsics_, extrinsics, [depth_min, depth_max]
156
-
157
- def load_cam_info(self):
158
- for vid in range(self.num_all_imgs):
159
- proj_mat_filename = os.path.join(self.root_dir,
160
- f'Cameras/train/{vid:08d}_cam.txt')
161
- intrinsic, extrinsic, near_far = self.read_cam_file(proj_mat_filename)
162
- intrinsic[:2] *= 4 # * the provided intrinsics is 4x downsampled, now keep the same scale with image
163
- self.all_intrinsics.append(intrinsic)
164
- self.all_extrinsics.append(extrinsic)
165
- self.all_near_fars.append(near_far)
166
-
167
- def read_depth(self, filename):
168
- # import ipdb; ipdb.set_trace()
169
- depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
170
- depth_h = np.ones((1200, 1600))
171
- # print(depth_h.shape)
172
- depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
173
- interpolation=cv2.INTER_NEAREST) # (600, 800)
174
- depth_h = depth_h[44:556, 80:720] # (512, 640)
175
- # print(depth_h.shape)
176
- # import ipdb; ipdb.set_trace()
177
- depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
178
- interpolation=cv2.INTER_NEAREST)
179
- depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
180
- interpolation=cv2.INTER_NEAREST)
181
-
182
- return depth, depth_h
183
-
184
- def read_mask(self, filename):
185
- mask_h = cv2.imread(filename, 0)
186
- mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
187
- interpolation=cv2.INTER_NEAREST)
188
- mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
189
- interpolation=cv2.INTER_NEAREST)
190
-
191
- mask[mask > 0] = 1 # the masks stored in png are not binary
192
- mask_h[mask_h > 0] = 1
193
-
194
- return mask, mask_h
195
-
196
- def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
197
- center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
198
- radius = radius * factor
199
- scale_mat = np.diag([radius, radius, radius, 1.0])
200
- scale_mat[:3, 3] = center.cpu().numpy()
201
- scale_mat = scale_mat.astype(np.float32)
202
-
203
- return scale_mat, 1. / radius.cpu().numpy()
204
-
205
- def __len__(self):
206
- return len(self.metas)
207
-
208
- def __getitem__(self, idx):
209
- sample = {}
210
- scan, light_idx, ref_view, src_views = self.metas[idx % len(self.metas)]
211
-
212
- # generalized, load some images at once
213
- view_ids = [ref_view] + src_views[:self.n_views]
214
- # * transform from world system to camera system
215
- w2c_ref = self.all_extrinsics[self.remap[ref_view]]
216
- w2c_ref_inv = np.linalg.inv(w2c_ref)
217
-
218
- image_perm = 0 # only supervised on reference view
219
-
220
- imgs, depths_h, masks_h = [], [], [] # full size (640, 512)
221
- intrinsics, w2cs, near_fars = [], [], [] # record proj mats between views
222
- mask_dilated = None
223
- for i, vid in enumerate(view_ids):
224
- # NOTE that the id in image file names is from 1 to 49 (not 0~48)
225
- img_filename = os.path.join(self.root_dir,
226
- f'Rectified/{scan}_train/rect_{vid + 1:03d}_{light_idx}_r5000.png')
227
- depth_filename = os.path.join(self.root_dir,
228
- f'Depths/{scan}_train/depth_map_{vid:04d}.pfm')
229
- # print(depth_filename)
230
- mask_filename = os.path.join(self.root_dir,
231
- f'Masks_clean_dilated/{scan}_train/mask_{vid:04d}.png')
232
-
233
- img = Image.open(img_filename)
234
- img_wh = np.round(np.array(img.size) * self.downSample).astype('int')
235
- img = img.resize(img_wh, Image.BILINEAR)
236
-
237
- if os.path.exists(mask_filename) and self.clean_image:
238
- mask_l, mask_h = self.read_mask(mask_filename)
239
- else:
240
- # print(self.split, "don't find mask file", mask_filename)
241
- mask_h = np.ones([img_wh[1], img_wh[0]])
242
- masks_h.append(mask_h)
243
-
244
- if i == 0:
245
- kernel_size = 101 # default 101
246
- kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
247
- mask_dilated = np.float32(cv2.dilate(np.uint8(mask_h * 255), kernel, iterations=1) > 128)
248
-
249
- if self.clean_image:
250
- img = np.array(img)
251
- img[mask_h < 0.5] = 0.0
252
-
253
- img = self.transform(img)
254
-
255
- imgs += [img]
256
-
257
- index_mat = self.remap[vid]
258
- near_fars.append(self.all_near_fars[index_mat])
259
- intrinsics.append(self.all_intrinsics[index_mat])
260
-
261
- w2cs.append(self.all_extrinsics[index_mat] @ w2c_ref_inv)
262
-
263
- # print(depth_filename)
264
- if os.path.exists(depth_filename): # and i == 0
265
- # print("file exists")
266
- depth_l, depth_h = self.read_depth(depth_filename)
267
- depths_h.append(depth_h)
268
- # ! estimate scale_mat
269
- scale_mat, scale_factor = self.cal_scale_mat(img_hw=[img_wh[1], img_wh[0]],
270
- intrinsics=intrinsics, extrinsics=w2cs,
271
- near_fars=near_fars, factor=1.1)
272
-
273
- # ! calculate the new w2cs after scaling
274
- new_near_fars = []
275
- new_w2cs = []
276
- new_c2ws = []
277
- new_affine_mats = []
278
- new_depths_h = []
279
- for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
280
- P = intrinsic @ extrinsic @ scale_mat
281
- P = P[:3, :4]
282
- # - should use load_K_Rt_from_P() to obtain c2w
283
- c2w = load_K_Rt_from_P(None, P)[1]
284
- w2c = np.linalg.inv(c2w)
285
- new_w2cs.append(w2c)
286
- new_c2ws.append(c2w)
287
- affine_mat = np.eye(4)
288
- affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
289
- new_affine_mats.append(affine_mat)
290
-
291
- camera_o = c2w[:3, 3]
292
- dist = np.sqrt(np.sum(camera_o ** 2))
293
- near = dist - 1
294
- far = dist + 1
295
-
296
- new_near_fars.append([0.95 * near, 1.05 * far])
297
- new_depths_h.append(depth * scale_factor)
298
-
299
- imgs = torch.stack(imgs).float()
300
- print(new_near_fars)
301
- depths_h = np.stack(new_depths_h)
302
- masks_h = np.stack(masks_h)
303
-
304
- affine_mats = np.stack(new_affine_mats)
305
- intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
306
- new_near_fars)
307
-
308
- if 'train' in self.split:
309
- start_idx = 0
310
- else:
311
- start_idx = 1
312
-
313
- sample['images'] = imgs # (V, 3, H, W)
314
- sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
315
- sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
316
- sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
317
- sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
318
- sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
319
- sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
320
- sample['view_ids'] = torch.from_numpy(np.array(view_ids))
321
- sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
322
-
323
- sample['light_idx'] = torch.tensor(light_idx)
324
- sample['scan'] = scan
325
-
326
- sample['scale_factor'] = torch.tensor(scale_factor)
327
- sample['img_wh'] = torch.from_numpy(img_wh)
328
- sample['render_img_idx'] = torch.tensor(image_perm)
329
- sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
330
- sample['meta'] = str(scan) + "_light" + str(light_idx) + "_refview" + str(ref_view)
331
-
332
- # - image to render
333
- sample['query_image'] = sample['images'][0]
334
- sample['query_c2w'] = sample['c2ws'][0]
335
- sample['query_w2c'] = sample['w2cs'][0]
336
- sample['query_intrinsic'] = sample['intrinsics'][0]
337
- sample['query_depth'] = sample['depths_h'][0]
338
- sample['query_mask'] = sample['masks_h'][0]
339
- sample['query_near_far'] = sample['near_fars'][0]
340
-
341
- sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
342
- sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
343
- sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
344
- sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
345
- sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
346
- sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
347
- sample['view_ids'] = sample['view_ids'][start_idx:]
348
- sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
349
-
350
- sample['scale_mat'] = torch.from_numpy(scale_mat)
351
- sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
352
-
353
- # - generate rays
354
- if ('val' in self.split) or ('test' in self.split):
355
- sample_rays = gen_rays_from_single_image(
356
- img_wh[1], img_wh[0],
357
- sample['query_image'],
358
- sample['query_intrinsic'],
359
- sample['query_c2w'],
360
- depth=sample['query_depth'],
361
- mask=sample['query_mask'] if self.clean_image else None)
362
- else:
363
- sample_rays = gen_random_rays_from_single_image(
364
- img_wh[1], img_wh[0],
365
- self.N_rays,
366
- sample['query_image'],
367
- sample['query_intrinsic'],
368
- sample['query_c2w'],
369
- depth=sample['query_depth'],
370
- mask=sample['query_mask'] if self.clean_image else None,
371
- dilated_mask=mask_dilated,
372
- importance_sample=self.importance_sample)
373
-
374
- sample['rays'] = sample_rays
375
-
376
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/data/scene.py CHANGED
@@ -1,6 +1,5 @@
1
  import numpy as np
2
  import torch
3
- import pdb
4
 
5
 
6
  def rigid_transform(xyz, transform):
 
1
  import numpy as np
2
  import torch
 
3
 
4
 
5
  def rigid_transform(xyz, transform):
SparseNeuS_demo_v1/evaluation/__init__.py DELETED
File without changes
SparseNeuS_demo_v1/evaluation/clean_mesh.py DELETED
@@ -1,283 +0,0 @@
1
- import numpy as np
2
- import cv2 as cv
3
- import os
4
- from glob import glob
5
- from scipy.io import loadmat
6
- import trimesh
7
- import open3d as o3d
8
- import torch
9
- from tqdm import tqdm
10
-
11
- import sys
12
-
13
- sys.path.append("../")
14
-
15
-
16
- def gen_rays_from_single_image(H, W, image, intrinsic, c2w, depth=None, mask=None):
17
- """
18
- generate rays in world space, for image image
19
- :param H:
20
- :param W:
21
- :param intrinsics: [3,3]
22
- :param c2ws: [4,4]
23
- :return:
24
- """
25
- device = image.device
26
- ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H),
27
- torch.linspace(0, W - 1, W)) # pytorch's meshgrid has indexing='ij'
28
- p = torch.stack([xs, ys, torch.ones_like(ys)], dim=-1) # H, W, 3
29
-
30
- # normalized ndc uv coordinates, (-1, 1)
31
- ndc_u = 2 * xs / (W - 1) - 1
32
- ndc_v = 2 * ys / (H - 1) - 1
33
- rays_ndc_uv = torch.stack([ndc_u, ndc_v], dim=-1).view(-1, 2).float().to(device)
34
-
35
- intrinsic_inv = torch.inverse(intrinsic)
36
-
37
- p = p.view(-1, 3).float().to(device) # N_rays, 3
38
- p = torch.matmul(intrinsic_inv[None, :3, :3], p[:, :, None]).squeeze() # N_rays, 3
39
- rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # N_rays, 3
40
- rays_v = torch.matmul(c2w[None, :3, :3], rays_v[:, :, None]).squeeze() # N_rays, 3
41
- rays_o = c2w[None, :3, 3].expand(rays_v.shape) # N_rays, 3
42
-
43
- image = image.permute(1, 2, 0)
44
- color = image.view(-1, 3)
45
- depth = depth.view(-1, 1) if depth is not None else None
46
- mask = mask.view(-1, 1) if mask is not None else torch.ones([H * W, 1]).to(device)
47
- sample = {
48
- 'rays_o': rays_o,
49
- 'rays_v': rays_v,
50
- 'rays_ndc_uv': rays_ndc_uv,
51
- 'rays_color': color,
52
- # 'rays_depth': depth,
53
- 'rays_mask': mask,
54
- 'rays_norm_XYZ_cam': p # - XYZ_cam, before multiply depth
55
- }
56
- if depth is not None:
57
- sample['rays_depth'] = depth
58
-
59
- return sample
60
-
61
-
62
- def load_K_Rt_from_P(filename, P=None):
63
- if P is None:
64
- lines = open(filename).read().splitlines()
65
- if len(lines) == 4:
66
- lines = lines[1:]
67
- lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
68
- P = np.asarray(lines).astype(np.float32).squeeze()
69
-
70
- out = cv.decomposeProjectionMatrix(P)
71
- K = out[0]
72
- R = out[1]
73
- t = out[2]
74
-
75
- K = K / K[2, 2]
76
- intrinsics = np.eye(4)
77
- intrinsics[:3, :3] = K
78
-
79
- pose = np.eye(4, dtype=np.float32)
80
- pose[:3, :3] = R.transpose() # ? why need transpose here
81
- pose[:3, 3] = (t[:3] / t[3])[:, 0]
82
-
83
- return intrinsics, pose # ! return cam2world matrix here
84
-
85
-
86
- def clean_points_by_mask(points, scan, imgs_idx=None, minimal_vis=0, mask_dilated_size=11):
87
- cameras = np.load('{}/scan{}/cameras.npz'.format(DTU_DIR, scan))
88
- mask_lis = sorted(glob('{}/scan{}/mask/*.png'.format(DTU_DIR, scan)))
89
- n_images = 49 if scan < 83 else 64
90
- inside_mask = np.zeros(len(points))
91
-
92
- if imgs_idx is None:
93
- imgs_idx = [i for i in range(n_images)]
94
-
95
- # imgs_idx = [i for i in range(n_images)]
96
- for i in imgs_idx:
97
- P = cameras['world_mat_{}'.format(i)]
98
- pts_image = np.matmul(P[None, :3, :3], points[:, :, None]).squeeze() + P[None, :3, 3]
99
- pts_image = pts_image / pts_image[:, 2:]
100
- pts_image = np.round(pts_image).astype(np.int32) + 1
101
-
102
- mask_image = cv.imread(mask_lis[i])
103
- kernel_size = mask_dilated_size # default 101
104
- kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (kernel_size, kernel_size))
105
- mask_image = cv.dilate(mask_image, kernel, iterations=1)
106
- mask_image = (mask_image[:, :, 0] > 128)
107
-
108
- mask_image = np.concatenate([np.ones([1, 1600]), mask_image, np.ones([1, 1600])], axis=0)
109
- mask_image = np.concatenate([np.ones([1202, 1]), mask_image, np.ones([1202, 1])], axis=1)
110
-
111
- in_mask = (pts_image[:, 0] >= 0) * (pts_image[:, 0] <= 1600) * (pts_image[:, 1] >= 0) * (
112
- pts_image[:, 1] <= 1200) > 0
113
- curr_mask = mask_image[(pts_image[:, 1].clip(0, 1201), pts_image[:, 0].clip(0, 1601))]
114
-
115
- curr_mask = curr_mask.astype(np.float32) * in_mask
116
-
117
- inside_mask += curr_mask
118
-
119
- return inside_mask > minimal_vis
120
-
121
-
122
- def clean_mesh_faces_by_mask(mesh_file, new_mesh_file, scan, imgs_idx, minimal_vis=0, mask_dilated_size=11):
123
- old_mesh = trimesh.load(mesh_file)
124
- old_vertices = old_mesh.vertices[:]
125
- old_faces = old_mesh.faces[:]
126
- mask = clean_points_by_mask(old_vertices, scan, imgs_idx, minimal_vis, mask_dilated_size)
127
- indexes = np.ones(len(old_vertices)) * -1
128
- indexes = indexes.astype(np.long)
129
- indexes[np.where(mask)] = np.arange(len(np.where(mask)[0]))
130
-
131
- faces_mask = mask[old_faces[:, 0]] & mask[old_faces[:, 1]] & mask[old_faces[:, 2]]
132
- new_faces = old_faces[np.where(faces_mask)]
133
- new_faces[:, 0] = indexes[new_faces[:, 0]]
134
- new_faces[:, 1] = indexes[new_faces[:, 1]]
135
- new_faces[:, 2] = indexes[new_faces[:, 2]]
136
- new_vertices = old_vertices[np.where(mask)]
137
-
138
- new_mesh = trimesh.Trimesh(new_vertices, new_faces)
139
-
140
- new_mesh.export(new_mesh_file)
141
-
142
-
143
- def clean_mesh_by_faces_num(mesh, faces_num=500):
144
- old_vertices = mesh.vertices[:]
145
- old_faces = mesh.faces[:]
146
-
147
- cc = trimesh.graph.connected_components(mesh.face_adjacency, min_len=faces_num)
148
- mask = np.zeros(len(mesh.faces), dtype=np.bool)
149
- mask[np.concatenate(cc)] = True
150
-
151
- indexes = np.ones(len(old_vertices)) * -1
152
- indexes = indexes.astype(np.long)
153
- indexes[np.where(mask)] = np.arange(len(np.where(mask)[0]))
154
-
155
- faces_mask = mask[old_faces[:, 0]] & mask[old_faces[:, 1]] & mask[old_faces[:, 2]]
156
- new_faces = old_faces[np.where(faces_mask)]
157
- new_faces[:, 0] = indexes[new_faces[:, 0]]
158
- new_faces[:, 1] = indexes[new_faces[:, 1]]
159
- new_faces[:, 2] = indexes[new_faces[:, 2]]
160
- new_vertices = old_vertices[np.where(mask)]
161
-
162
- new_mesh = trimesh.Trimesh(new_vertices, new_faces)
163
-
164
- return new_mesh
165
-
166
-
167
- def clean_mesh_faces_outside_frustum(old_mesh_file, new_mesh_file, imgs_idx, H=1200, W=1600, mask_dilated_size=11,
168
- isolated_face_num=500, keep_largest=True):
169
- '''Remove faces of mesh which cannot be orserved by all cameras
170
- '''
171
- # if path_mask_npz:
172
- # path_save_clean = IOUtils.add_file_name_suffix(path_save_clean, '_mask')
173
-
174
- cameras = np.load('{}/scan{}/cameras.npz'.format(DTU_DIR, scan))
175
- mask_lis = sorted(glob('{}/scan{}/mask/*.png'.format(DTU_DIR, scan)))
176
-
177
- mesh = trimesh.load(old_mesh_file)
178
- intersector = trimesh.ray.ray_pyembree.RayMeshIntersector(mesh)
179
-
180
- all_indices = []
181
- chunk_size = 5120
182
- for i in imgs_idx:
183
- mask_image = cv.imread(mask_lis[i])
184
- kernel_size = mask_dilated_size # default 101
185
- kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (kernel_size, kernel_size))
186
- mask_image = cv.dilate(mask_image, kernel, iterations=1)
187
-
188
- P = cameras['world_mat_{}'.format(i)]
189
-
190
- intrinsic, pose = load_K_Rt_from_P(None, P[:3, :])
191
-
192
- rays = gen_rays_from_single_image(H, W, torch.from_numpy(mask_image).permute(2, 0, 1).float(),
193
- torch.from_numpy(intrinsic)[:3, :3].float(),
194
- torch.from_numpy(pose).float())
195
- rays_o = rays['rays_o']
196
- rays_d = rays['rays_v']
197
- rays_mask = rays['rays_color']
198
-
199
- rays_o = rays_o.split(chunk_size)
200
- rays_d = rays_d.split(chunk_size)
201
- rays_mask = rays_mask.split(chunk_size)
202
-
203
- for rays_o_batch, rays_d_batch, rays_mask_batch in tqdm(zip(rays_o, rays_d, rays_mask)):
204
- rays_mask_batch = rays_mask_batch[:, 0] > 128
205
- rays_o_batch = rays_o_batch[rays_mask_batch]
206
- rays_d_batch = rays_d_batch[rays_mask_batch]
207
-
208
- idx_faces_hits = intersector.intersects_first(rays_o_batch.cpu().numpy(), rays_d_batch.cpu().numpy())
209
- all_indices.append(idx_faces_hits)
210
-
211
- values = np.unique(np.concatenate(all_indices, axis=0))
212
- mask_faces = np.ones(len(mesh.faces))
213
- mask_faces[values[1:]] = 0
214
- print(f'Surfaces/Kept: {len(mesh.faces)}/{len(values)}')
215
-
216
- mesh_o3d = o3d.io.read_triangle_mesh(old_mesh_file)
217
- print("removing triangles by mask")
218
- mesh_o3d.remove_triangles_by_mask(mask_faces)
219
-
220
- o3d.io.write_triangle_mesh(new_mesh_file, mesh_o3d)
221
-
222
- # # clean meshes
223
- new_mesh = trimesh.load(new_mesh_file)
224
- cc = trimesh.graph.connected_components(new_mesh.face_adjacency, min_len=500)
225
- mask = np.zeros(len(new_mesh.faces), dtype=np.bool)
226
- mask[np.concatenate(cc)] = True
227
- new_mesh.update_faces(mask)
228
- new_mesh.remove_unreferenced_vertices()
229
- new_mesh.export(new_mesh_file)
230
-
231
- # meshes = new_mesh.split(only_watertight=False)
232
- #
233
- # if not keep_largest:
234
- # meshes = [mesh for mesh in meshes if len(mesh.faces) > isolated_face_num]
235
- # # new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
236
- # merged_mesh = trimesh.util.concatenate(meshes)
237
- # merged_mesh.export(new_mesh_file)
238
- # else:
239
- # new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
240
- # new_mesh.export(new_mesh_file)
241
-
242
- o3d.io.write_triangle_mesh(new_mesh_file.replace(".ply", "_raw.ply"), mesh_o3d)
243
- print("finishing removing triangles")
244
-
245
-
246
- def clean_outliers(old_mesh_file, new_mesh_file):
247
- new_mesh = trimesh.load(old_mesh_file)
248
-
249
- meshes = new_mesh.split(only_watertight=False)
250
- new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
251
-
252
- new_mesh.export(new_mesh_file)
253
-
254
-
255
- if __name__ == "__main__":
256
-
257
- scans = [24, 37, 40, 55, 63, 65, 69, 83, 97, 105, 106, 110, 114, 118, 122]
258
-
259
- mask_kernel_size = 11
260
-
261
- imgs_idx = [0, 1, 2]
262
- # imgs_idx = [42, 43, 44]
263
- # imgs_idx = [1, 8, 9]
264
-
265
- DTU_DIR = "/home/xiaoxiao/dataset/DTU_IDR/DTU"
266
- # DTU_DIR = "/userhome/cs/xxlong/dataset/DTU_IDR/DTU"
267
-
268
- base_path = "/home/xiaoxiao/Workplace/nerf_reconstruction/Volume_NeuS/neus_camsys/exp/dtu/evaluation_23_24_33_new/volsdf"
269
-
270
- for scan in scans:
271
- print("processing scan%d" % scan)
272
- dir_path = os.path.join(base_path, "scan%d" % scan)
273
-
274
- old_mesh_file = glob(os.path.join(dir_path, "*.ply"))[0]
275
-
276
- clean_mesh_file = os.path.join(dir_path, "clean_%03d.ply" % scan)
277
- final_mesh_file = os.path.join(dir_path, "final_%03d.ply" % scan)
278
-
279
- clean_mesh_faces_by_mask(old_mesh_file, clean_mesh_file, scan, imgs_idx, minimal_vis=1,
280
- mask_dilated_size=mask_kernel_size)
281
- clean_mesh_faces_outside_frustum(clean_mesh_file, final_mesh_file, imgs_idx, mask_dilated_size=mask_kernel_size)
282
-
283
- print("finish processing scan%d" % scan)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/evaluation/eval_dtu_python.py DELETED
@@ -1,369 +0,0 @@
1
- import numpy as np
2
- import open3d as o3d
3
- import sklearn.neighbors as skln
4
- from tqdm import tqdm
5
- from scipy.io import loadmat
6
- import multiprocessing as mp
7
- import argparse, os, sys
8
- import cv2 as cv
9
-
10
- from pathlib import Path
11
-
12
-
13
- def get_path_components(path):
14
- path = Path(path)
15
- ppath = str(path.parent)
16
- stem = str(path.stem)
17
- ext = str(path.suffix)
18
- return ppath, stem, ext
19
-
20
-
21
- def sample_single_tri(input_):
22
- n1, n2, v1, v2, tri_vert = input_
23
- c = np.mgrid[:n1 + 1, :n2 + 1]
24
- c += 0.5
25
- c[0] /= max(n1, 1e-7)
26
- c[1] /= max(n2, 1e-7)
27
- c = np.transpose(c, (1, 2, 0))
28
- k = c[c.sum(axis=-1) < 1] # m2
29
- q = v1 * k[:, :1] + v2 * k[:, 1:] + tri_vert
30
- return q
31
-
32
-
33
- def write_vis_pcd(file, points, colors):
34
- pcd = o3d.geometry.PointCloud()
35
- pcd.points = o3d.utility.Vector3dVector(points)
36
- pcd.colors = o3d.utility.Vector3dVector(colors)
37
- o3d.io.write_point_cloud(file, pcd)
38
-
39
-
40
- def eval_cloud(args, num_cpu_cores=-1):
41
- mp.freeze_support()
42
- os.makedirs(args.vis_out_dir, exist_ok=True)
43
-
44
- thresh = args.downsample_density
45
- if args.mode == 'mesh':
46
- pbar = tqdm(total=9)
47
- pbar.set_description('read data mesh')
48
- data_mesh = o3d.io.read_triangle_mesh(args.data)
49
-
50
- vertices = np.asarray(data_mesh.vertices)
51
- triangles = np.asarray(data_mesh.triangles)
52
- tri_vert = vertices[triangles]
53
-
54
- pbar.update(1)
55
- pbar.set_description('sample pcd from mesh')
56
- v1 = tri_vert[:, 1] - tri_vert[:, 0]
57
- v2 = tri_vert[:, 2] - tri_vert[:, 0]
58
- l1 = np.linalg.norm(v1, axis=-1, keepdims=True)
59
- l2 = np.linalg.norm(v2, axis=-1, keepdims=True)
60
- area2 = np.linalg.norm(np.cross(v1, v2), axis=-1, keepdims=True)
61
- non_zero_area = (area2 > 0)[:, 0]
62
- l1, l2, area2, v1, v2, tri_vert = [
63
- arr[non_zero_area] for arr in [l1, l2, area2, v1, v2, tri_vert]
64
- ]
65
- thr = thresh * np.sqrt(l1 * l2 / area2)
66
- n1 = np.floor(l1 / thr)
67
- n2 = np.floor(l2 / thr)
68
-
69
- with mp.Pool() as mp_pool:
70
- new_pts = mp_pool.map(sample_single_tri,
71
- ((n1[i, 0], n2[i, 0], v1[i:i + 1], v2[i:i + 1], tri_vert[i:i + 1, 0]) for i in
72
- range(len(n1))), chunksize=1024)
73
-
74
- new_pts = np.concatenate(new_pts, axis=0)
75
- data_pcd = np.concatenate([vertices, new_pts], axis=0)
76
-
77
- elif args.mode == 'pcd':
78
- pbar = tqdm(total=8)
79
- pbar.set_description('read data pcd')
80
- data_pcd_o3d = o3d.io.read_point_cloud(args.data)
81
- data_pcd = np.asarray(data_pcd_o3d.points)
82
-
83
- pbar.update(1)
84
- pbar.set_description('random shuffle pcd index')
85
- shuffle_rng = np.random.default_rng()
86
- shuffle_rng.shuffle(data_pcd, axis=0)
87
-
88
- pbar.update(1)
89
- pbar.set_description('downsample pcd')
90
- nn_engine = skln.NearestNeighbors(n_neighbors=1, radius=thresh, algorithm='kd_tree', n_jobs=num_cpu_cores)
91
- nn_engine.fit(data_pcd)
92
- rnn_idxs = nn_engine.radius_neighbors(data_pcd, radius=thresh, return_distance=False)
93
- mask = np.ones(data_pcd.shape[0], dtype=np.bool_)
94
- for curr, idxs in enumerate(rnn_idxs):
95
- if mask[curr]:
96
- mask[idxs] = 0
97
- mask[curr] = 1
98
- data_down = data_pcd[mask]
99
-
100
- pbar.update(1)
101
- pbar.set_description('masking data pcd')
102
- obs_mask_file = loadmat(f'{args.dataset_dir}/ObsMask/ObsMask{args.scan}_10.mat')
103
- ObsMask, BB, Res = [obs_mask_file[attr] for attr in ['ObsMask', 'BB', 'Res']]
104
- BB = BB.astype(np.float32)
105
-
106
- patch = args.patch_size
107
- inbound = ((data_down >= BB[:1] - patch) & (data_down < BB[1:] + patch * 2)).sum(axis=-1) == 3
108
- data_in = data_down[inbound]
109
-
110
- data_grid = np.around((data_in - BB[:1]) / Res).astype(np.int32)
111
- grid_inbound = ((data_grid >= 0) & (data_grid < np.expand_dims(ObsMask.shape, 0))).sum(axis=-1) == 3
112
- data_grid_in = data_grid[grid_inbound]
113
- in_obs = ObsMask[data_grid_in[:, 0], data_grid_in[:, 1], data_grid_in[:, 2]].astype(np.bool_)
114
- data_in_obs = data_in[grid_inbound][in_obs]
115
-
116
- pbar.update(1)
117
- pbar.set_description('read STL pcd')
118
- stl_pcd = o3d.io.read_point_cloud(args.gt)
119
- stl = np.asarray(stl_pcd.points)
120
-
121
- pbar.update(1)
122
- pbar.set_description('compute data2stl')
123
- nn_engine.fit(stl)
124
- dist_d2s, idx_d2s = nn_engine.kneighbors(data_in_obs, n_neighbors=1, return_distance=True)
125
- max_dist = args.max_dist
126
- mean_d2s = dist_d2s[dist_d2s < max_dist].mean()
127
-
128
- pbar.update(1)
129
- pbar.set_description('compute stl2data')
130
- ground_plane = loadmat(f'{args.dataset_dir}/ObsMask/Plane{args.scan}.mat')['P']
131
-
132
- stl_hom = np.concatenate([stl, np.ones_like(stl[:, :1])], -1)
133
- above = (ground_plane.reshape((1, 4)) * stl_hom).sum(-1) > 0
134
- stl_above = stl[above]
135
-
136
- nn_engine.fit(data_in)
137
- dist_s2d, idx_s2d = nn_engine.kneighbors(stl_above, n_neighbors=1, return_distance=True)
138
- mean_s2d = dist_s2d[dist_s2d < max_dist].mean()
139
-
140
- pbar.update(1)
141
- pbar.set_description('visualize error')
142
- vis_dist = args.visualize_threshold
143
- R = np.array([[1, 0, 0]], dtype=np.float64)
144
- G = np.array([[0, 1, 0]], dtype=np.float64)
145
- B = np.array([[0, 0, 1]], dtype=np.float64)
146
- W = np.array([[1, 1, 1]], dtype=np.float64)
147
- data_color = np.tile(B, (data_down.shape[0], 1))
148
- data_alpha = dist_d2s.clip(max=vis_dist) / vis_dist
149
- data_color[np.where(inbound)[0][grid_inbound][in_obs]] = R * data_alpha + W * (1 - data_alpha)
150
- data_color[np.where(inbound)[0][grid_inbound][in_obs][dist_d2s[:, 0] >= max_dist]] = G
151
- write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_d2gt.ply', data_down, data_color)
152
- stl_color = np.tile(B, (stl.shape[0], 1))
153
- stl_alpha = dist_s2d.clip(max=vis_dist) / vis_dist
154
- stl_color[np.where(above)[0]] = R * stl_alpha + W * (1 - stl_alpha)
155
- stl_color[np.where(above)[0][dist_s2d[:, 0] >= max_dist]] = G
156
- write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_gt2d.ply', stl, stl_color)
157
-
158
- pbar.update(1)
159
- pbar.set_description('done')
160
- pbar.close()
161
- over_all = (mean_d2s + mean_s2d) / 2
162
- print(f'ean_d2gt: {mean_d2s}; mean_gt2d: {mean_s2d} over_all: {over_all}; .')
163
-
164
- pparent, stem, ext = get_path_components(args.data)
165
- if args.log is None:
166
- path_log = os.path.join(pparent, 'eval_result.txt')
167
- else:
168
- path_log = args.log
169
- with open(path_log, 'a+') as fLog:
170
- fLog.write(f'mean_d2gt {np.round(mean_d2s, 3)} '
171
- f'mean_gt2d {np.round(mean_s2d, 3)} '
172
- f'Over_all {np.round(over_all, 3)} '
173
- f'[{stem}] \n')
174
-
175
- return over_all, mean_d2s, mean_s2d
176
-
177
-
178
- if __name__ == '__main__':
179
- from glob import glob
180
-
181
- mp.freeze_support()
182
-
183
- parser = argparse.ArgumentParser()
184
- parser.add_argument('--data', type=str, default='data_in.ply')
185
- parser.add_argument('--gt', type=str, help='ground truth')
186
- parser.add_argument('--scan', type=int, default=1)
187
- parser.add_argument('--mode', type=str, default='mesh', choices=['mesh', 'pcd'])
188
- parser.add_argument('--dataset_dir', type=str, default='/dataset/dtu_official/SampleSet/MVS_Data')
189
- parser.add_argument('--vis_out_dir', type=str, default='.')
190
- parser.add_argument('--downsample_density', type=float, default=0.2)
191
- parser.add_argument('--patch_size', type=float, default=60)
192
- parser.add_argument('--max_dist', type=float, default=20)
193
- parser.add_argument('--visualize_threshold', type=float, default=10)
194
- parser.add_argument('--log', type=str, default=None)
195
- args = parser.parse_args()
196
-
197
- base_dir = "./exp"
198
-
199
- GT_DIR = "./gt_pcd"
200
-
201
- scans = [24, 37, 40, 55, 63, 65, 69, 83, 97, 105, 106, 110, 114, 118, 122]
202
-
203
- for scan in scans:
204
-
205
- print("processing scan%d" % scan)
206
-
207
- args.data = os.path.join(base_dir, "scan{}".format(scan), "final_%03d.ply" % scan)
208
-
209
- if not os.path.exists(args.data):
210
- continue
211
-
212
- args.gt = os.path.join(GT_DIR, "stl%03d_total.ply" % scan)
213
- args.vis_out_dir = os.path.join(base_dir, "scan{}".format(scan))
214
- args.scan = scan
215
- os.makedirs(args.vis_out_dir, exist_ok=True)
216
-
217
- dist_thred1 = 1
218
- dist_thred2 = 2
219
-
220
- thresh = args.downsample_density
221
-
222
- if args.mode == 'mesh':
223
- pbar = tqdm(total=9)
224
- pbar.set_description('read data mesh')
225
- data_mesh = o3d.io.read_triangle_mesh(args.data)
226
-
227
- vertices = np.asarray(data_mesh.vertices)
228
- triangles = np.asarray(data_mesh.triangles)
229
- tri_vert = vertices[triangles]
230
-
231
- pbar.update(1)
232
- pbar.set_description('sample pcd from mesh')
233
- v1 = tri_vert[:, 1] - tri_vert[:, 0]
234
- v2 = tri_vert[:, 2] - tri_vert[:, 0]
235
- l1 = np.linalg.norm(v1, axis=-1, keepdims=True)
236
- l2 = np.linalg.norm(v2, axis=-1, keepdims=True)
237
- area2 = np.linalg.norm(np.cross(v1, v2), axis=-1, keepdims=True)
238
- non_zero_area = (area2 > 0)[:, 0]
239
- l1, l2, area2, v1, v2, tri_vert = [
240
- arr[non_zero_area] for arr in [l1, l2, area2, v1, v2, tri_vert]
241
- ]
242
- thr = thresh * np.sqrt(l1 * l2 / area2)
243
- n1 = np.floor(l1 / thr)
244
- n2 = np.floor(l2 / thr)
245
-
246
- with mp.Pool() as mp_pool:
247
- new_pts = mp_pool.map(sample_single_tri,
248
- ((n1[i, 0], n2[i, 0], v1[i:i + 1], v2[i:i + 1], tri_vert[i:i + 1, 0]) for i in
249
- range(len(n1))), chunksize=1024)
250
-
251
- new_pts = np.concatenate(new_pts, axis=0)
252
- data_pcd = np.concatenate([vertices, new_pts], axis=0)
253
-
254
- elif args.mode == 'pcd':
255
- pbar = tqdm(total=8)
256
- pbar.set_description('read data pcd')
257
- data_pcd_o3d = o3d.io.read_point_cloud(args.data)
258
- data_pcd = np.asarray(data_pcd_o3d.points)
259
-
260
- pbar.update(1)
261
- pbar.set_description('random shuffle pcd index')
262
- shuffle_rng = np.random.default_rng()
263
- shuffle_rng.shuffle(data_pcd, axis=0)
264
-
265
- pbar.update(1)
266
- pbar.set_description('downsample pcd')
267
- nn_engine = skln.NearestNeighbors(n_neighbors=1, radius=thresh, algorithm='kd_tree', n_jobs=-1)
268
- nn_engine.fit(data_pcd)
269
- rnn_idxs = nn_engine.radius_neighbors(data_pcd, radius=thresh, return_distance=False)
270
- mask = np.ones(data_pcd.shape[0], dtype=np.bool_)
271
- for curr, idxs in enumerate(rnn_idxs):
272
- if mask[curr]:
273
- mask[idxs] = 0
274
- mask[curr] = 1
275
- data_down = data_pcd[mask]
276
-
277
- pbar.update(1)
278
- pbar.set_description('masking data pcd')
279
- obs_mask_file = loadmat(f'{args.dataset_dir}/ObsMask/ObsMask{args.scan}_10.mat')
280
- ObsMask, BB, Res = [obs_mask_file[attr] for attr in ['ObsMask', 'BB', 'Res']]
281
- BB = BB.astype(np.float32)
282
-
283
- patch = args.patch_size
284
- inbound = ((data_down >= BB[:1] - patch) & (data_down < BB[1:] + patch * 2)).sum(axis=-1) == 3
285
- data_in = data_down[inbound]
286
-
287
- data_grid = np.around((data_in - BB[:1]) / Res).astype(np.int32)
288
- grid_inbound = ((data_grid >= 0) & (data_grid < np.expand_dims(ObsMask.shape, 0))).sum(axis=-1) == 3
289
- data_grid_in = data_grid[grid_inbound]
290
- in_obs = ObsMask[data_grid_in[:, 0], data_grid_in[:, 1], data_grid_in[:, 2]].astype(np.bool_)
291
- data_in_obs = data_in[grid_inbound][in_obs]
292
-
293
- pbar.update(1)
294
- pbar.set_description('read STL pcd')
295
- stl_pcd = o3d.io.read_point_cloud(args.gt)
296
- stl = np.asarray(stl_pcd.points)
297
-
298
- pbar.update(1)
299
- pbar.set_description('compute data2stl')
300
- nn_engine.fit(stl)
301
- dist_d2s, idx_d2s = nn_engine.kneighbors(data_in_obs, n_neighbors=1, return_distance=True)
302
- max_dist = args.max_dist
303
- mean_d2s = dist_d2s[dist_d2s < max_dist].mean()
304
-
305
- precision_1 = len(dist_d2s[dist_d2s < dist_thred1]) / len(dist_d2s)
306
- precision_2 = len(dist_d2s[dist_d2s < dist_thred2]) / len(dist_d2s)
307
-
308
- pbar.update(1)
309
- pbar.set_description('compute stl2data')
310
- ground_plane = loadmat(f'{args.dataset_dir}/ObsMask/Plane{args.scan}.mat')['P']
311
-
312
- stl_hom = np.concatenate([stl, np.ones_like(stl[:, :1])], -1)
313
- above = (ground_plane.reshape((1, 4)) * stl_hom).sum(-1) > 0
314
-
315
- stl_above = stl[above]
316
-
317
- nn_engine.fit(data_in)
318
- dist_s2d, idx_s2d = nn_engine.kneighbors(stl_above, n_neighbors=1, return_distance=True)
319
- mean_s2d = dist_s2d[dist_s2d < max_dist].mean()
320
-
321
- recall_1 = len(dist_s2d[dist_s2d < dist_thred1]) / len(dist_s2d)
322
- recall_2 = len(dist_s2d[dist_s2d < dist_thred2]) / len(dist_s2d)
323
-
324
- pbar.update(1)
325
- pbar.set_description('visualize error')
326
- vis_dist = args.visualize_threshold
327
- R = np.array([[1, 0, 0]], dtype=np.float64)
328
- G = np.array([[0, 1, 0]], dtype=np.float64)
329
- B = np.array([[0, 0, 1]], dtype=np.float64)
330
- W = np.array([[1, 1, 1]], dtype=np.float64)
331
- data_color = np.tile(B, (data_down.shape[0], 1))
332
- data_alpha = dist_d2s.clip(max=vis_dist) / vis_dist
333
- data_color[np.where(inbound)[0][grid_inbound][in_obs]] = R * data_alpha + W * (1 - data_alpha)
334
- data_color[np.where(inbound)[0][grid_inbound][in_obs][dist_d2s[:, 0] >= max_dist]] = G
335
- write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_d2gt.ply', data_down, data_color)
336
- stl_color = np.tile(B, (stl.shape[0], 1))
337
- stl_alpha = dist_s2d.clip(max=vis_dist) / vis_dist
338
- stl_color[np.where(above)[0]] = R * stl_alpha + W * (1 - stl_alpha)
339
- stl_color[np.where(above)[0][dist_s2d[:, 0] >= max_dist]] = G
340
- write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_gt2d.ply', stl, stl_color)
341
-
342
- pbar.update(1)
343
- pbar.set_description('done')
344
- pbar.close()
345
- over_all = (mean_d2s + mean_s2d) / 2
346
-
347
- fscore_1 = 2 * precision_1 * recall_1 / (precision_1 + recall_1 + 1e-6)
348
- fscore_2 = 2 * precision_2 * recall_2 / (precision_2 + recall_2 + 1e-6)
349
-
350
- print(f'over_all: {over_all}; mean_d2gt: {mean_d2s}; mean_gt2d: {mean_s2d}.')
351
- print(f'precision_1mm: {precision_1}; recall_1mm: {recall_1}; fscore_1mm: {fscore_1}')
352
- print(f'precision_2mm: {precision_2}; recall_2mm: {recall_2}; fscore_2mm: {fscore_2}')
353
-
354
- pparent, stem, ext = get_path_components(args.data)
355
- if args.log is None:
356
- path_log = os.path.join(pparent, 'eval_result.txt')
357
- else:
358
- path_log = args.log
359
- with open(path_log, 'w+') as fLog:
360
- fLog.write(f'over_all {np.round(over_all, 3)} '
361
- f'mean_d2gt {np.round(mean_d2s, 3)} '
362
- f'mean_gt2d {np.round(mean_s2d, 3)} \n'
363
- f'precision_1mm {np.round(precision_1, 3)} '
364
- f'recall_1mm {np.round(recall_1, 3)} '
365
- f'fscore_1mm {np.round(fscore_1, 3)} \n'
366
- f'precision_2mm {np.round(precision_2, 3)} '
367
- f'recall_2mm {np.round(recall_2, 3)} '
368
- f'fscore_2mm {np.round(fscore_2, 3)} \n'
369
- f'[{stem}] \n')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SparseNeuS_demo_v1/exp_runner_generic_blender_val.py CHANGED
@@ -1,15 +1,12 @@
1
- import torch
2
- import torch.nn.functional as F
3
- from torch.utils.data import DataLoader
4
- import argparse
5
  import os
6
  import logging
 
7
  import numpy as np
8
- import cv2 as cv
9
- import trimesh
10
  from shutil import copyfile
 
 
11
  from torch.utils.tensorboard import SummaryWriter
12
- from icecream import ic
13
  from tqdm import tqdm
14
  from pyhocon import ConfigFactory
15
 
@@ -17,23 +14,12 @@ import sys
17
  sys.path.append(os.path.dirname(__file__))
18
 
19
  from models.fields import SingleVarianceNetwork
20
-
21
  from models.featurenet import FeatureNet
22
-
23
  from models.trainer_generic import GenericTrainer
24
-
25
  from models.sparse_sdf_network import SparseSdfNetwork
26
-
27
  from models.rendering_network import GeneralRenderingNetwork
28
-
29
- from datetime import datetime
30
-
31
- from data.dtu_general import MVSDatasetDtuPerView
32
-
33
- from utils.training_utils import tocuda
34
  from data.blender_general_narrow_all_eval_new_data import BlenderPerView
35
 
36
- from termcolor import colored
37
 
38
  from datetime import datetime
39
 
@@ -45,14 +31,14 @@ class Runner:
45
  self.device = torch.device('cuda:%d' % local_rank)
46
  # self.device = torch.device('cuda')
47
  self.num_devices = torch.cuda.device_count()
48
- self.is_continue = is_continue
49
  self.is_restore = is_restore
50
  self.restore_lod0 = restore_lod0
51
  self.mode = mode
52
  self.model_list = []
53
  self.logger = logging.getLogger('exp_logger')
54
 
55
- print(colored("detected %d GPUs" % self.num_devices, "red"))
56
 
57
  self.conf_path = conf_path
58
  self.conf = ConfigFactory.parse_file(conf_path)
@@ -63,7 +49,7 @@ class Runner:
63
  else:
64
  self.base_exp_dir = self.conf['general.base_exp_dir']
65
  self.conf['general.base_exp_dir'] = self.base_exp_dir # jha use this when testing
66
- print(colored("base_exp_dir: " + self.base_exp_dir, 'yellow'))
67
  os.makedirs(self.base_exp_dir, exist_ok=True)
68
  self.iter_step = 0
69
  self.val_step = 0
@@ -121,12 +107,11 @@ class Runner:
121
  **self.conf['model.rendering_network_lod1']).to(self.device)
122
  if self.mode == 'export_mesh' or self.mode == 'val':
123
  # base_exp_dir_to_store = os.path.join(self.base_exp_dir, '{:%Y_%m_%d_%H_%M_%S}'.format(datetime.now()))
124
- print("save mesh to:", os.path.join("../", args.specific_dataset_name))
125
  base_exp_dir_to_store = os.path.join("../", args.specific_dataset_name) #"../gradio_tmp" # MODIFIED
126
  else:
127
  base_exp_dir_to_store = self.base_exp_dir
128
 
129
- print(colored(f"Store in: {base_exp_dir_to_store}", "blue"))
130
  # Renderer model
131
  self.trainer = GenericTrainer(
132
  self.rendering_network_outside,
@@ -149,7 +134,7 @@ class Runner:
149
 
150
  # Load checkpoint
151
  latest_model_name = None
152
- if is_continue:
153
  model_list_raw = os.listdir(os.path.join(self.base_exp_dir, 'checkpoints'))
154
  model_list = []
155
  for model_name in model_list_raw:
@@ -238,7 +223,7 @@ class Runner:
238
  epochs = int(1 + res_step // len(dataloader))
239
 
240
  self.adjust_learning_rate()
241
- print(colored("starting training learning rate: {:.5f}".format(self.optimizer.param_groups[0]['lr']), "yellow"))
242
 
243
  background_rgb = None
244
  if self.use_white_bkgd:
@@ -247,7 +232,7 @@ class Runner:
247
 
248
  for epoch_i in range(epochs):
249
 
250
- print(colored("current epoch %d" % epoch_i, 'red'))
251
  dataloader = tqdm(dataloader)
252
 
253
  for batch in dataloader:
@@ -365,8 +350,8 @@ class Runner:
365
  losses_lod1['color_fine_loss'].mean() if losses_lod1 is not None else 0,
366
  self.optimizer.param_groups[0]['lr']))
367
 
368
- print(colored('alpha_inter_ratio_lod0 = {:.4f} alpha_inter_ratio_lod1 = {:.4f}\n'.format(
369
- alpha_inter_ratio_lod0, alpha_inter_ratio_lod1), 'green'))
370
 
371
  if losses_lod0 is not None:
372
  # print("[TEST]: weights_sum in print", losses_lod0['weights_sum'].mean())
@@ -469,7 +454,7 @@ class Runner:
469
  # 3. load the new state dict
470
  network.load_state_dict(pretrained_dict)
471
  except:
472
- print(colored(comment + " load fails", 'yellow'))
473
 
474
  checkpoint = torch.load(os.path.join(self.base_exp_dir, 'checkpoints', checkpoint_name),
475
  map_location=self.device)
@@ -497,7 +482,7 @@ class Runner:
497
  try:
498
  self.optimizer.load_state_dict(checkpoint['optimizer'])
499
  except:
500
- print(colored("load optimizer fails", "yellow"))
501
  self.iter_step = checkpoint['iter_step']
502
  self.val_step = checkpoint['val_step'] if 'val_step' in checkpoint.keys() else 0
503
 
@@ -532,15 +517,10 @@ class Runner:
532
  torch.save(checkpoint,
533
  os.path.join(self.base_exp_dir, 'checkpoints', 'ckpt_{:0>6d}.pth'.format(self.iter_step)))
534
 
535
- def validate(self, idx=-1, resolution_level=-1):
536
  # validate image
537
-
538
- ic(self.iter_step, idx)
539
  self.logger.info('Validate begin')
540
-
541
- if idx < 0:
542
- idx = self.val_step
543
- # idx = np.random.randint(len(self.val_dataset))
544
  self.val_step += 1
545
 
546
  try:
@@ -576,16 +556,9 @@ class Runner:
576
  )
577
 
578
 
579
- def export_mesh(self, idx=-1, resolution_level=-1):
580
- # validate image
581
-
582
- ic(self.iter_step, idx)
583
  self.logger.info('Validate begin')
584
- import time
585
- start1 = time.time()
586
- if idx < 0:
587
- idx = self.val_step
588
- # idx = np.random.randint(len(self.val_dataset))
589
  self.val_step += 1
590
 
591
  try:
@@ -598,7 +571,6 @@ class Runner:
598
 
599
  background_rgb = None
600
  if self.use_white_bkgd:
601
- # background_rgb = torch.ones([1, 3]).to(self.device)
602
  background_rgb = 1.0
603
 
604
  batch['batch_idx'] = torch.tensor([x for x in range(self.batch_size)])
@@ -609,8 +581,6 @@ class Runner:
609
  else:
610
  alpha_inter_ratio_lod0 = 1.
611
  alpha_inter_ratio_lod1 = self.get_alpha_inter_ratio(self.anneal_start_lod1, self.anneal_end_lod1)
612
- end1 = time.time()
613
- print("time for getting data", end1 - start1)
614
  self.trainer(
615
  batch,
616
  background_rgb=background_rgb,
 
 
 
 
 
1
  import os
2
  import logging
3
+ import argparse
4
  import numpy as np
 
 
5
  from shutil import copyfile
6
+ import torch
7
+ from torch.utils.data import DataLoader
8
  from torch.utils.tensorboard import SummaryWriter
9
+ from rich import print
10
  from tqdm import tqdm
11
  from pyhocon import ConfigFactory
12
 
 
14
  sys.path.append(os.path.dirname(__file__))
15
 
16
  from models.fields import SingleVarianceNetwork
 
17
  from models.featurenet import FeatureNet
 
18
  from models.trainer_generic import GenericTrainer
 
19
  from models.sparse_sdf_network import SparseSdfNetwork
 
20
  from models.rendering_network import GeneralRenderingNetwork
 
 
 
 
 
 
21
  from data.blender_general_narrow_all_eval_new_data import BlenderPerView
22
 
 
23
 
24
  from datetime import datetime
25
 
 
31
  self.device = torch.device('cuda:%d' % local_rank)
32
  # self.device = torch.device('cuda')
33
  self.num_devices = torch.cuda.device_count()
34
+ self.is_continue = is_continue or (mode == "export_mesh")
35
  self.is_restore = is_restore
36
  self.restore_lod0 = restore_lod0
37
  self.mode = mode
38
  self.model_list = []
39
  self.logger = logging.getLogger('exp_logger')
40
 
41
+ print("detected %d GPUs" % self.num_devices)
42
 
43
  self.conf_path = conf_path
44
  self.conf = ConfigFactory.parse_file(conf_path)
 
49
  else:
50
  self.base_exp_dir = self.conf['general.base_exp_dir']
51
  self.conf['general.base_exp_dir'] = self.base_exp_dir # jha use this when testing
52
+ print("base_exp_dir: " + self.base_exp_dir)
53
  os.makedirs(self.base_exp_dir, exist_ok=True)
54
  self.iter_step = 0
55
  self.val_step = 0
 
107
  **self.conf['model.rendering_network_lod1']).to(self.device)
108
  if self.mode == 'export_mesh' or self.mode == 'val':
109
  # base_exp_dir_to_store = os.path.join(self.base_exp_dir, '{:%Y_%m_%d_%H_%M_%S}'.format(datetime.now()))
 
110
  base_exp_dir_to_store = os.path.join("../", args.specific_dataset_name) #"../gradio_tmp" # MODIFIED
111
  else:
112
  base_exp_dir_to_store = self.base_exp_dir
113
 
114
+ print(f"Store in: {base_exp_dir_to_store}")
115
  # Renderer model
116
  self.trainer = GenericTrainer(
117
  self.rendering_network_outside,
 
134
 
135
  # Load checkpoint
136
  latest_model_name = None
137
+ if self.is_continue:
138
  model_list_raw = os.listdir(os.path.join(self.base_exp_dir, 'checkpoints'))
139
  model_list = []
140
  for model_name in model_list_raw:
 
223
  epochs = int(1 + res_step // len(dataloader))
224
 
225
  self.adjust_learning_rate()
226
+ print("starting training learning rate: {:.5f}".format(self.optimizer.param_groups[0]['lr']))
227
 
228
  background_rgb = None
229
  if self.use_white_bkgd:
 
232
 
233
  for epoch_i in range(epochs):
234
 
235
+ print("current epoch %d" % epoch_i)
236
  dataloader = tqdm(dataloader)
237
 
238
  for batch in dataloader:
 
350
  losses_lod1['color_fine_loss'].mean() if losses_lod1 is not None else 0,
351
  self.optimizer.param_groups[0]['lr']))
352
 
353
+ print('alpha_inter_ratio_lod0 = {:.4f} alpha_inter_ratio_lod1 = {:.4f}\n'.format(
354
+ alpha_inter_ratio_lod0, alpha_inter_ratio_lod1))
355
 
356
  if losses_lod0 is not None:
357
  # print("[TEST]: weights_sum in print", losses_lod0['weights_sum'].mean())
 
454
  # 3. load the new state dict
455
  network.load_state_dict(pretrained_dict)
456
  except:
457
+ print(comment + " load fails")
458
 
459
  checkpoint = torch.load(os.path.join(self.base_exp_dir, 'checkpoints', checkpoint_name),
460
  map_location=self.device)
 
482
  try:
483
  self.optimizer.load_state_dict(checkpoint['optimizer'])
484
  except:
485
+ print("load optimizer fails")
486
  self.iter_step = checkpoint['iter_step']
487
  self.val_step = checkpoint['val_step'] if 'val_step' in checkpoint.keys() else 0
488
 
 
517
  torch.save(checkpoint,
518
  os.path.join(self.base_exp_dir, 'checkpoints', 'ckpt_{:0>6d}.pth'.format(self.iter_step)))
519
 
520
+ def validate(self, resolution_level=-1):
521
  # validate image
522
+ print("iter_step: ", self.iter_step)
 
523
  self.logger.info('Validate begin')
 
 
 
 
524
  self.val_step += 1
525
 
526
  try:
 
556
  )
557
 
558
 
559
+ def export_mesh(self, resolution_level=-1):
560
+ print("iter_step: ", self.iter_step)
 
 
561
  self.logger.info('Validate begin')
 
 
 
 
 
562
  self.val_step += 1
563
 
564
  try:
 
571
 
572
  background_rgb = None
573
  if self.use_white_bkgd:
 
574
  background_rgb = 1.0
575
 
576
  batch['batch_idx'] = torch.tensor([x for x in range(self.batch_size)])
 
581
  else:
582
  alpha_inter_ratio_lod0 = 1.
583
  alpha_inter_ratio_lod1 = self.get_alpha_inter_ratio(self.anneal_start_lod1, self.anneal_end_lod1)
 
 
584
  self.trainer(
585
  batch,
586
  background_rgb=background_rgb,
SparseNeuS_demo_v1/models/patch_projector.py CHANGED
@@ -208,4 +208,4 @@ def normalize(flow, h, w, clamp=None):
208
 
209
  def build_patch_offset(h_patch_size):
210
  offsets = torch.arange(-h_patch_size, h_patch_size + 1)
211
- return torch.stack(torch.meshgrid(offsets, offsets)[::-1], dim=-1).view(1, -1, 2) # nb_pixels_patch * 2
 
208
 
209
  def build_patch_offset(h_patch_size):
210
  offsets = torch.arange(-h_patch_size, h_patch_size + 1)
211
+ return torch.stack(torch.meshgrid(offsets, offsets, indexing="ij")[::-1], dim=-1).view(1, -1, 2) # nb_pixels_patch * 2