Text-to-3D
image-to-3d
Chao Xu commited on
Commit
854f0d0
1 Parent(s): 1fae98d

sparseneus and elev est

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +2 -1
  2. SparseNeuS_demo_v1/confs/blender_general_lod1_val_new.conf +137 -0
  3. SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf +137 -0
  4. SparseNeuS_demo_v1/data/__init__.py +0 -0
  5. SparseNeuS_demo_v1/data/blender.py +340 -0
  6. SparseNeuS_demo_v1/data/blender_general.py +432 -0
  7. SparseNeuS_demo_v1/data/blender_general_12_narrow.py +427 -0
  8. SparseNeuS_demo_v1/data/blender_general_12_narrow_8.py +427 -0
  9. SparseNeuS_demo_v1/data/blender_general_360.py +412 -0
  10. SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_3.py +406 -0
  11. SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_4.py +411 -0
  12. SparseNeuS_demo_v1/data/blender_general_4_narrow_and_4_2_stage_mix.py +480 -0
  13. SparseNeuS_demo_v1/data/blender_general_4_narrow_and_6_2_stage_mix.py +476 -0
  14. SparseNeuS_demo_v1/data/blender_general_6_narrow_and_6_2_stage_blend_mix.py +449 -0
  15. SparseNeuS_demo_v1/data/blender_general_8_2_stage.py +396 -0
  16. SparseNeuS_demo_v1/data/blender_general_8_4_gt.py +396 -0
  17. SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_3_views.py +446 -0
  18. SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_mix.py +439 -0
  19. SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_mix.py +470 -0
  20. SparseNeuS_demo_v1/data/blender_general_8_wide_from_2_stage.py +395 -0
  21. SparseNeuS_demo_v1/data/blender_general_narrow_4_1_eval_new_data.py +418 -0
  22. SparseNeuS_demo_v1/data/blender_general_narrow_6.py +399 -0
  23. SparseNeuS_demo_v1/data/blender_general_narrow_8_3_fixed.py +393 -0
  24. SparseNeuS_demo_v1/data/blender_general_narrow_8_3_random.py +395 -0
  25. SparseNeuS_demo_v1/data/blender_general_narrow_8_4_random_shading.py +432 -0
  26. SparseNeuS_demo_v1/data/blender_general_narrow_all.py +386 -0
  27. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage.py +410 -0
  28. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage_temp.py +411 -0
  29. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py +418 -0
  30. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data3_1.py +414 -0
  31. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_32_wide.py +465 -0
  32. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_4_4.py +419 -0
  33. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_6_4.py +420 -0
  34. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_3.py +428 -0
  35. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_wide.py +420 -0
  36. SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_temp.py +417 -0
  37. SparseNeuS_demo_v1/data/blender_general_narrow_all_no_depth.py +388 -0
  38. SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4.py +389 -0
  39. SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4_and_4.py +395 -0
  40. SparseNeuS_demo_v1/data/blender_gt_32.py +419 -0
  41. SparseNeuS_demo_v1/data/dtu/dtu_pairs.txt +93 -0
  42. SparseNeuS_demo_v1/data/dtu/lists/test.txt +15 -0
  43. SparseNeuS_demo_v1/data/dtu/lists/train.txt +75 -0
  44. SparseNeuS_demo_v1/data/dtu_fit.py +278 -0
  45. SparseNeuS_demo_v1/data/dtu_general.py +376 -0
  46. SparseNeuS_demo_v1/data/scene.py +102 -0
  47. SparseNeuS_demo_v1/evaluation/__init__.py +0 -0
  48. SparseNeuS_demo_v1/evaluation/clean_mesh.py +283 -0
  49. SparseNeuS_demo_v1/evaluation/eval_dtu_python.py +369 -0
  50. SparseNeuS_demo_v1/exp/lod0/checkpoint_trash/ckpt_285000.pth +3 -0
.gitignore CHANGED
@@ -1 +1,2 @@
1
- __pycache__/
 
 
1
+ __pycache__/
2
+ *.DS_Store
SparseNeuS_demo_v1/confs/blender_general_lod1_val_new.conf ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # - for the lod1 geometry network, using adaptive cost for sparse cost regularization network
2
+ #- for lod1 rendering network, using depth-adaptive render
3
+
4
+ general {
5
+ base_exp_dir = ./exp/val/1_4_only_narrow_lod1
6
+
7
+ recording = [
8
+ ./,
9
+ ./data
10
+ ./ops
11
+ ./models
12
+ ./loss
13
+ ]
14
+ }
15
+
16
+ dataset {
17
+ # local path
18
+ trainpath = /objaverse-processed/zero12345_img/eval_selected
19
+ valpath = /objaverse-processed/zero12345_img/eval_selected
20
+ testpath = /objaverse-processed/zero12345_img/eval_selected
21
+ # trainpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/
22
+ # valpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/
23
+ # testpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/
24
+ imgScale_train = 1.0
25
+ imgScale_test = 1.0
26
+ nviews = 5
27
+ clean_image = True
28
+ importance_sample = True
29
+ test_ref_views = [23]
30
+
31
+ # test dataset
32
+ test_n_views = 2
33
+ test_img_wh = [256, 256]
34
+ test_clip_wh = [0, 0]
35
+ test_scan_id = scan110
36
+ train_img_idx = [49, 50, 52, 53, 54, 56, 58] #[21, 22, 23, 24, 25] #
37
+ test_img_idx = [51, 55, 57] #[32, 33, 34] #
38
+
39
+ test_dir_comment = train
40
+ }
41
+
42
+ train {
43
+ learning_rate = 2e-4
44
+ learning_rate_milestone = [100000, 150000, 200000]
45
+ learning_rate_factor = 0.5
46
+ end_iter = 200000
47
+ save_freq = 5000
48
+ val_freq = 1
49
+ val_mesh_freq =1
50
+ report_freq = 100
51
+
52
+ N_rays = 512
53
+
54
+ validate_resolution_level = 4
55
+ anneal_start = 0
56
+ anneal_end = 25000
57
+ anneal_start_lod1 = 0
58
+ anneal_end_lod1 = 15000
59
+
60
+ use_white_bkgd = True
61
+
62
+ # Loss
63
+ # ! for training the lod1 network, don't use this regularization in first 10k steps; then use the regularization
64
+ sdf_igr_weight = 0.1
65
+ sdf_sparse_weight = 0.02 # 0.002 for lod1 network; 0.02 for lod0 network
66
+ sdf_decay_param = 100 # cannot be too large, which decide the tsdf range
67
+ fg_bg_weight = 0.01 # first 0.01
68
+ bg_ratio = 0.3
69
+
70
+ if_fix_lod0_networks = True
71
+ }
72
+
73
+ model {
74
+ num_lods = 2
75
+
76
+ sdf_network_lod0 {
77
+ lod = 0,
78
+ ch_in = 56, # the channel num of fused pyramid features
79
+ voxel_size = 0.02105263, # 0.02083333, should be 2/95
80
+ vol_dims = [96, 96, 96],
81
+ hidden_dim = 128,
82
+ cost_type = variance_mean
83
+ d_pyramid_feature_compress = 16,
84
+ regnet_d_out = 16,
85
+ num_sdf_layers = 4,
86
+ # position embedding
87
+ multires = 6
88
+ }
89
+
90
+
91
+ sdf_network_lod1 {
92
+ lod = 1,
93
+ ch_in = 56, # the channel num of fused pyramid features
94
+ voxel_size = 0.0104712, #0.01041667, should be 2/191
95
+ vol_dims = [192, 192, 192],
96
+ hidden_dim = 128,
97
+ cost_type = variance_mean
98
+ d_pyramid_feature_compress = 8,
99
+ regnet_d_out = 8,
100
+ num_sdf_layers = 4,
101
+ # position embedding
102
+ multires = 6
103
+ }
104
+
105
+
106
+ variance_network {
107
+ init_val = 0.2
108
+ }
109
+
110
+ variance_network_lod1 {
111
+ init_val = 0.2
112
+ }
113
+
114
+ rendering_network {
115
+ in_geometry_feat_ch = 16
116
+ in_rendering_feat_ch = 56
117
+ anti_alias_pooling = True
118
+ }
119
+
120
+ rendering_network_lod1 {
121
+ in_geometry_feat_ch = 8
122
+ in_rendering_feat_ch = 56
123
+ anti_alias_pooling = True
124
+
125
+ }
126
+
127
+
128
+ trainer {
129
+ n_samples_lod0 = 64
130
+ n_importance_lod0 = 64
131
+ n_samples_lod1 = 64
132
+ n_importance_lod1 = 64
133
+ n_outside = 0 # 128 if render_outside_uniform_sampling
134
+ perturb = 1.0
135
+ alpha_type = div
136
+ }
137
+ }
SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # - for the lod1 geometry network, using adaptive cost for sparse cost regularization network
2
+ #- for lod1 rendering network, using depth-adaptive render
3
+
4
+ general {
5
+
6
+ base_exp_dir = exp/lod0 # !!! where you store the results and checkpoints to be used
7
+ recording = [
8
+ ./,
9
+ ./data
10
+ ./ops
11
+ ./models
12
+ ./loss
13
+ ]
14
+ }
15
+
16
+ dataset {
17
+ trainpath = ../
18
+ valpath = ../ # !!! where you store the validation data
19
+ testpath = ../
20
+
21
+
22
+
23
+ imgScale_train = 1.0
24
+ imgScale_test = 1.0
25
+ nviews = 5
26
+ clean_image = True
27
+ importance_sample = True
28
+ test_ref_views = [23]
29
+
30
+ # test dataset
31
+ test_n_views = 2
32
+ test_img_wh = [256, 256]
33
+ test_clip_wh = [0, 0]
34
+ test_scan_id = scan110
35
+ train_img_idx = [49, 50, 52, 53, 54, 56, 58] #[21, 22, 23, 24, 25] #
36
+ test_img_idx = [51, 55, 57] #[32, 33, 34] #
37
+
38
+ test_dir_comment = train
39
+ }
40
+
41
+ train {
42
+ learning_rate = 2e-4
43
+ learning_rate_milestone = [100000, 150000, 200000]
44
+ learning_rate_factor = 0.5
45
+ end_iter = 200000
46
+ save_freq = 5000
47
+ val_freq = 1
48
+ val_mesh_freq = 1
49
+ report_freq = 100
50
+
51
+ N_rays = 512
52
+
53
+ validate_resolution_level = 4
54
+ anneal_start = 0
55
+ anneal_end = 25000
56
+ anneal_start_lod1 = 0
57
+ anneal_end_lod1 = 15000
58
+
59
+ use_white_bkgd = True
60
+
61
+ # Loss
62
+ # ! for training the lod1 network, don't use this regularization in first 10k steps; then use the regularization
63
+ sdf_igr_weight = 0.1
64
+ sdf_sparse_weight = 0.02 # 0.002 for lod1 network; 0.02 for lod0 network
65
+ sdf_decay_param = 100 # cannot be too large, which decide the tsdf range
66
+ fg_bg_weight = 0.01 # first 0.01
67
+ bg_ratio = 0.3
68
+
69
+ if_fix_lod0_networks = False
70
+ }
71
+
72
+ model {
73
+ num_lods = 1
74
+
75
+ sdf_network_lod0 {
76
+ lod = 0,
77
+ ch_in = 56, # the channel num of fused pyramid features
78
+ voxel_size = 0.02105263, # 0.02083333, should be 2/95
79
+ vol_dims = [96, 96, 96],
80
+ hidden_dim = 128,
81
+ cost_type = variance_mean
82
+ d_pyramid_feature_compress = 16,
83
+ regnet_d_out = 16,
84
+ num_sdf_layers = 4,
85
+ # position embedding
86
+ multires = 6
87
+ }
88
+
89
+
90
+ sdf_network_lod1 {
91
+ lod = 1,
92
+ ch_in = 56, # the channel num of fused pyramid features
93
+ voxel_size = 0.0104712, #0.01041667, should be 2/191
94
+ vol_dims = [192, 192, 192],
95
+ hidden_dim = 128,
96
+ cost_type = variance_mean
97
+ d_pyramid_feature_compress = 8,
98
+ regnet_d_out = 16,
99
+ num_sdf_layers = 4,
100
+
101
+ # position embedding
102
+ multires = 6
103
+ }
104
+
105
+
106
+ variance_network {
107
+ init_val = 0.2
108
+ }
109
+
110
+ variance_network_lod1 {
111
+ init_val = 0.2
112
+ }
113
+
114
+ rendering_network {
115
+ in_geometry_feat_ch = 16
116
+ in_rendering_feat_ch = 56
117
+ anti_alias_pooling = True
118
+ }
119
+
120
+ rendering_network_lod1 {
121
+ in_geometry_feat_ch = 16 # default 8
122
+ in_rendering_feat_ch = 56
123
+ anti_alias_pooling = True
124
+
125
+ }
126
+
127
+
128
+ trainer {
129
+ n_samples_lod0 = 64
130
+ n_importance_lod0 = 64
131
+ n_samples_lod1 = 64
132
+ n_importance_lod1 = 64
133
+ n_outside = 0 # 128 if render_outside_uniform_sampling
134
+ perturb = 1.0
135
+ alpha_type = div
136
+ }
137
+ }
SparseNeuS_demo_v1/data/__init__.py ADDED
File without changes
SparseNeuS_demo_v1/data/blender.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.utils.data import Dataset
3
+ import json
4
+ import numpy as np
5
+ import os
6
+ from PIL import Image
7
+ from torchvision import transforms as T
8
+ from kornia import create_meshgrid
9
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
10
+ import cv2 as cv
11
+ from data.scene import get_boundingbox
12
+
13
+
14
+ def get_ray_directions(H, W, focal, center=None):
15
+ """
16
+ Get ray directions for all pixels in camera coordinate.
17
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
18
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
19
+ Inputs:
20
+ H, W, focal: image height, width and focal length
21
+ Outputs:
22
+ directions: (H, W, 3), the direction of the rays in camera coordinate
23
+ """
24
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0]
25
+ i, j = grid.unbind(-1)
26
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
27
+ # see https://github.com/bmild/nerf/issues/24
28
+ cent = center if center is not None else [W / 2, H / 2]
29
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
30
+
31
+ return directions
32
+
33
+ def get_rays(directions, c2w):
34
+ """
35
+ Get ray origin and normalized directions in world coordinate for all pixels in one image.
36
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
37
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
38
+ Inputs:
39
+ directions: (H, W, 3) precomputed ray directions in camera coordinate
40
+ c2w: (3, 4) transformation matrix from camera coordinate to world coordinate
41
+ Outputs:
42
+ rays_o: (H*W, 3), the origin of the rays in world coordinate
43
+ rays_d: (H*W, 3), the normalized direction of the rays in world coordinate
44
+ """
45
+ # Rotate ray directions from camera coordinate to the world coordinate
46
+ rays_d = directions @ c2w[:3, :3].T # (H, W, 3)
47
+ # rays_d = rays_d / torch.norm(rays_d, dim=-1, keepdim=True)
48
+ # The origin of all rays is the camera origin in world coordinate
49
+ rays_o = c2w[:3, 3].expand(rays_d.shape) # (H, W, 3)
50
+
51
+ rays_d = rays_d.view(-1, 3)
52
+ rays_o = rays_o.view(-1, 3)
53
+
54
+ return rays_o, rays_d
55
+
56
+
57
+ def load_K_Rt_from_P(filename, P=None):
58
+ if P is None:
59
+ lines = open(filename).read().splitlines()
60
+ if len(lines) == 4:
61
+ lines = lines[1:]
62
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
63
+ P = np.asarray(lines).astype(np.float32).squeeze()
64
+
65
+ out = cv.decomposeProjectionMatrix(P)
66
+ K = out[0]
67
+ R = out[1]
68
+ t = out[2]
69
+
70
+ K = K / K[2, 2]
71
+ intrinsics = np.eye(4)
72
+ intrinsics[:3, :3] = K
73
+
74
+ pose = np.eye(4, dtype=np.float32)
75
+ pose[:3, :3] = R.transpose() # ? why need transpose here
76
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
77
+
78
+ return intrinsics, pose # ! return cam2world matrix here
79
+
80
+
81
+ class BlenderDataset(Dataset):
82
+ def __init__(self, root_dir, split, scan_id, n_views, train_img_idx=[], test_img_idx=[],
83
+ img_wh=[800, 800], clip_wh=[0, 0], original_img_wh=[800, 800],
84
+ N_rays=512, h_patch_size=5, near=2.0, far=6.0):
85
+ self.root_dir = root_dir
86
+ self.split = split
87
+ self.img_wh = img_wh
88
+ self.clip_wh = clip_wh
89
+ self.define_transforms()
90
+ self.train_img_idx = train_img_idx
91
+ self.test_img_idx = test_img_idx
92
+ self.N_rays = N_rays
93
+ self.h_patch_size = h_patch_size # used to extract patch for supervision
94
+ self.n_views = n_views
95
+ self.near, self.far = near, far
96
+ self.blender2opencv = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
97
+
98
+ with open(os.path.join(self.root_dir, f"transforms_{self.split}.json"), 'r') as f:
99
+ self.meta = json.load(f)
100
+
101
+
102
+ self.read_meta(near, far)
103
+ # import ipdb; ipdb.set_trace()
104
+ self.raw_near_fars = np.stack([np.array([self.near, self.far]) for i in range(len(self.meta['frames']))])
105
+
106
+
107
+ # ! estimate scale_mat
108
+ self.scale_mat, self.scale_factor = self.cal_scale_mat(
109
+ img_hw=[self.img_wh[1], self.img_wh[0]],
110
+ intrinsics=self.all_intrinsics[self.train_img_idx],
111
+ extrinsics=self.all_w2cs[self.train_img_idx],
112
+ near_fars=self.raw_near_fars[self.train_img_idx],
113
+ factor=1.1)
114
+ # self.scale_mat = np.eye(4)
115
+ # self.scale_factor = 1.0
116
+ # import ipdb; ipdb.set_trace()
117
+ # * after scaling and translation, unit bounding box
118
+ self.scaled_intrinsics, self.scaled_w2cs, self.scaled_c2ws, \
119
+ self.scaled_affine_mats, self.scaled_near_fars = self.scale_cam_info()
120
+
121
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
122
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
123
+ self.partial_vol_origin = torch.Tensor([-1., -1., -1.])
124
+ self.white_back = True
125
+
126
+ def read_meta(self, near=2.0, far=6.0):
127
+
128
+
129
+ self.ref_img_idx = self.train_img_idx[0]
130
+ ref_c2w = np.array(self.meta['frames'][self.ref_img_idx]['transform_matrix']) @ self.blender2opencv
131
+ # ref_c2w = torch.FloatTensor(ref_c2w)
132
+ self.ref_c2w = ref_c2w
133
+ self.ref_w2c = np.linalg.inv(ref_c2w)
134
+
135
+
136
+ w, h = self.img_wh
137
+ self.focal = 0.5 * 800 / np.tan(0.5 * self.meta['camera_angle_x']) # original focal length
138
+ self.focal *= self.img_wh[0] / 800 # modify focal length to match size self.img_wh
139
+
140
+ # bounds, common for all scenes
141
+ self.near = near
142
+ self.far = far
143
+ self.bounds = np.array([self.near, self.far])
144
+
145
+ # ray directions for all pixels, same for all images (same H, W, focal)
146
+ self.directions = get_ray_directions(h, w, [self.focal,self.focal]) # (h, w, 3)
147
+ intrinsics = np.eye(4)
148
+ intrinsics[:3, :3] = np.array([[self.focal,0,w/2],[0,self.focal,h/2],[0,0,1]]).astype(np.float32)
149
+ self.intrinsics = intrinsics
150
+
151
+ self.image_paths = []
152
+ self.poses = []
153
+ self.all_rays = []
154
+ self.all_images = []
155
+ self.all_masks = []
156
+ self.all_w2cs = []
157
+ self.all_intrinsics = []
158
+ for frame in self.meta['frames']:
159
+ pose = np.array(frame['transform_matrix']) @ self.blender2opencv
160
+ self.poses += [pose]
161
+ c2w = torch.FloatTensor(pose)
162
+ w2c = np.linalg.inv(c2w)
163
+ image_path = os.path.join(self.root_dir, f"{frame['file_path']}.png")
164
+ self.image_paths += [image_path]
165
+ img = Image.open(image_path)
166
+ img = img.resize(self.img_wh, Image.LANCZOS)
167
+ img = self.transform(img) # (4, h, w)
168
+
169
+ self.all_masks += [img[-1:,:]>0]
170
+ # img = img[:3, :] * img[ -1:,:] + (1 - img[-1:, :]) # blend A to RGB
171
+ img = img[:3, :] * img[ -1:,:]
172
+ img = img.numpy() # (3, h, w)
173
+ self.all_images += [img]
174
+
175
+
176
+ self.all_masks += []
177
+ self.all_intrinsics.append(self.intrinsics)
178
+ # - transform from world system to ref-camera system
179
+ self.all_w2cs.append(w2c @ np.linalg.inv(self.ref_w2c))
180
+
181
+ self.all_images = torch.from_numpy(np.stack(self.all_images)).to(torch.float32)
182
+ self.all_intrinsics = torch.from_numpy(np.stack(self.all_intrinsics)).to(torch.float32)
183
+ self.all_w2cs = torch.from_numpy(np.stack(self.all_w2cs)).to(torch.float32)
184
+ # self.img_wh = [self.img_wh[0] - self.clip_wh[0] - self.clip_wh[2],
185
+ # self.img_wh[1] - self.clip_wh[1] - self.clip_wh[3]]
186
+
187
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
188
+ center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
189
+ radius = radius * factor
190
+ scale_mat = np.diag([radius, radius, radius, 1.0])
191
+ scale_mat[:3, 3] = center.cpu().numpy()
192
+ scale_mat = scale_mat.astype(np.float32)
193
+
194
+ return scale_mat, 1. / radius.cpu().numpy()
195
+
196
+ def scale_cam_info(self):
197
+ new_intrinsics = []
198
+ new_near_fars = []
199
+ new_w2cs = []
200
+ new_c2ws = []
201
+ new_affine_mats = []
202
+ for idx in range(len(self.all_images)):
203
+
204
+ intrinsics = self.all_intrinsics[idx]
205
+ # import ipdb; ipdb.set_trace()
206
+ P = intrinsics @ self.all_w2cs[idx] @ self.scale_mat
207
+ P = P.cpu().numpy()[:3, :4]
208
+
209
+ # - should use load_K_Rt_from_P() to obtain c2w
210
+ c2w = load_K_Rt_from_P(None, P)[1]
211
+ w2c = np.linalg.inv(c2w)
212
+ new_w2cs.append(w2c)
213
+ new_c2ws.append(c2w)
214
+ new_intrinsics.append(intrinsics)
215
+ affine_mat = np.eye(4)
216
+ affine_mat[:3, :4] = intrinsics[:3, :3] @ w2c[:3, :4]
217
+ new_affine_mats.append(affine_mat)
218
+
219
+ camera_o = c2w[:3, 3]
220
+ dist = np.sqrt(np.sum(camera_o ** 2))
221
+ near = dist - 1
222
+ far = dist + 1
223
+
224
+ new_near_fars.append([0.95 * near, 1.05 * far])
225
+
226
+ new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars = \
227
+ np.stack(new_intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), \
228
+ np.stack(new_affine_mats), np.stack(new_near_fars)
229
+
230
+ new_intrinsics = torch.from_numpy(np.float32(new_intrinsics))
231
+ new_w2cs = torch.from_numpy(np.float32(new_w2cs))
232
+ new_c2ws = torch.from_numpy(np.float32(new_c2ws))
233
+ new_affine_mats = torch.from_numpy(np.float32(new_affine_mats))
234
+ new_near_fars = torch.from_numpy(np.float32(new_near_fars))
235
+
236
+ return new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars
237
+
238
+ def load_poses_all(self, file=f"transforms_train.json"):
239
+ with open(os.path.join(self.root_dir, file), 'r') as f:
240
+ meta = json.load(f)
241
+
242
+ c2ws = []
243
+ for i,frame in enumerate(meta['frames']):
244
+ c2ws.append(np.array(frame['transform_matrix']) @ self.blender2opencv)
245
+ return np.stack(c2ws)
246
+
247
+ def define_transforms(self):
248
+ self.transform = T.ToTensor()
249
+
250
+
251
+
252
+ def get_conditional_sample(self):
253
+ sample = {}
254
+ support_idxs = self.train_img_idx
255
+
256
+ sample['images'] = self.all_images[support_idxs] # (V, 3, H, W)
257
+ sample['w2cs'] = self.scaled_w2cs[self.train_img_idx] # (V, 4, 4)
258
+ sample['c2ws'] = self.scaled_c2ws[self.train_img_idx] # (V, 4, 4)
259
+ sample['near_fars'] = self.scaled_near_fars[self.train_img_idx] # (V, 2)
260
+ sample['intrinsics'] = self.scaled_intrinsics[self.train_img_idx][:, :3, :3] # (V, 3, 3)
261
+ sample['affine_mats'] = self.scaled_affine_mats[self.train_img_idx] # ! in world space
262
+
263
+ # sample['scan'] = self.scan_id
264
+ sample['scale_factor'] = torch.tensor(self.scale_factor)
265
+ sample['scale_mat'] = torch.from_numpy(self.scale_mat)
266
+ sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
267
+ sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
268
+ sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
269
+
270
+ return sample
271
+
272
+
273
+
274
+ def __len__(self):
275
+ if self.split == 'train':
276
+ return self.n_views * 1000
277
+ else:
278
+ return len(self.test_img_idx) * 1000
279
+
280
+
281
+ def __getitem__(self, idx):
282
+ sample = {}
283
+
284
+ if self.split == 'train':
285
+ render_idx = self.train_img_idx[idx % self.n_views]
286
+ support_idxs = [idx for idx in self.train_img_idx if idx != render_idx]
287
+ else:
288
+ # render_idx = idx % self.n_test_images + self.n_train_images
289
+ render_idx = self.test_img_idx[idx % len(self.test_img_idx)]
290
+ support_idxs = [render_idx]
291
+
292
+ sample['images'] = self.all_images[support_idxs] # (V, 3, H, W)
293
+ sample['w2cs'] = self.scaled_w2cs[support_idxs] # (V, 4, 4)
294
+ sample['c2ws'] = self.scaled_c2ws[support_idxs] # (V, 4, 4)
295
+ sample['intrinsics'] = self.scaled_intrinsics[support_idxs][:, :3, :3] # (V, 3, 3)
296
+ sample['affine_mats'] = self.scaled_affine_mats[support_idxs] # ! in world space
297
+ # sample['scan'] = self.scan_id
298
+ sample['scale_factor'] = torch.tensor(self.scale_factor)
299
+ sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
300
+ sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
301
+ sample['img_index'] = torch.tensor(render_idx)
302
+
303
+ # - query image
304
+ sample['query_image'] = self.all_images[render_idx]
305
+ sample['query_c2w'] = self.scaled_c2ws[render_idx]
306
+ sample['query_w2c'] = self.scaled_w2cs[render_idx]
307
+ sample['query_intrinsic'] = self.scaled_intrinsics[render_idx]
308
+ sample['query_near_far'] = self.scaled_near_fars[render_idx]
309
+ # sample['meta'] = str(self.scan_id) + "_" + os.path.basename(self.images_list[render_idx])
310
+ sample['scale_mat'] = torch.from_numpy(self.scale_mat)
311
+ sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
312
+ sample['rendering_c2ws'] = self.scaled_c2ws[self.test_img_idx]
313
+ sample['rendering_imgs_idx'] = torch.Tensor(np.array(self.test_img_idx).astype(np.int32))
314
+
315
+ # - generate rays
316
+ if self.split == 'val' or self.split == 'test':
317
+ sample_rays = gen_rays_from_single_image(
318
+ self.img_wh[1], self.img_wh[0],
319
+ sample['query_image'],
320
+ sample['query_intrinsic'],
321
+ sample['query_c2w'],
322
+ depth=None,
323
+ mask=None)
324
+ else:
325
+ sample_rays = gen_random_rays_from_single_image(
326
+ self.img_wh[1], self.img_wh[0],
327
+ self.N_rays,
328
+ sample['query_image'],
329
+ sample['query_intrinsic'],
330
+ sample['query_c2w'],
331
+ depth=None,
332
+ mask=None,
333
+ dilated_mask=None,
334
+ importance_sample=False,
335
+ h_patch_size=self.h_patch_size
336
+ )
337
+
338
+ sample['rays'] = sample_rays
339
+
340
+ return sample
SparseNeuS_demo_v1/data/blender_general.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
155
+ depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
156
+ interpolation=cv2.INTER_NEAREST) # (600, 800)
157
+ depth_h = depth_h[44:556, 80:720] # (512, 640)
158
+ depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ return depth, depth_h
164
+
165
+ def read_mask(self, filename):
166
+ mask_h = cv2.imread(filename, 0)
167
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
168
+ interpolation=cv2.INTER_NEAREST)
169
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
170
+ interpolation=cv2.INTER_NEAREST)
171
+
172
+ mask[mask > 0] = 1 # the masks stored in png are not binary
173
+ mask_h[mask_h > 0] = 1
174
+
175
+ return mask, mask_h
176
+
177
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
178
+
179
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
180
+ # print("center", center)
181
+ # print("radius", radius)
182
+ # print("bounds", bounds)
183
+ # import ipdb; ipdb.set_trace()
184
+ radius = radius * factor
185
+ scale_mat = np.diag([radius, radius, radius, 1.0])
186
+ scale_mat[:3, 3] = center.cpu().numpy()
187
+ scale_mat = scale_mat.astype(np.float32)
188
+
189
+ return scale_mat, 1. / radius.cpu().numpy()
190
+
191
+ def __len__(self):
192
+ return 8*len(self.lvis_paths)
193
+
194
+
195
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
196
+ depth_h = cv2.imread(filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 65535 * 1.4 + 0.5
197
+
198
+ depth_h[depth_h < near_bound+1e-3] = 0.0
199
+
200
+ depth = {}
201
+ for l in range(3):
202
+ depth[f"level_{l}"] = cv2.resize(
203
+ depth_h,
204
+ None,
205
+ fx=1.0 / (2**l),
206
+ fy=1.0 / (2**l),
207
+ interpolation=cv2.INTER_NEAREST,
208
+ )
209
+
210
+ if self.split == "train":
211
+ cutout = np.ones_like(depth[f"level_2"])
212
+ h0 = int(np.random.randint(0, high=cutout.shape[0] // 5, size=1))
213
+ h1 = int(
214
+ np.random.randint(
215
+ 4 * cutout.shape[0] // 5, high=cutout.shape[0], size=1
216
+ )
217
+ )
218
+ w0 = int(np.random.randint(0, high=cutout.shape[1] // 5, size=1))
219
+ w1 = int(
220
+ np.random.randint(
221
+ 4 * cutout.shape[1] // 5, high=cutout.shape[1], size=1
222
+ )
223
+ )
224
+ cutout[h0:h1, w0:w1] = 0
225
+ depth_aug = depth[f"level_2"] * cutout
226
+ else:
227
+ depth_aug = depth[f"level_2"].copy()
228
+
229
+ return depth, depth_h, depth_aug
230
+
231
+
232
+ def __getitem__(self, idx):
233
+ sample = {}
234
+ origin_idx = idx
235
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
236
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
237
+
238
+
239
+ folder_uid_dict = self.lvis_paths[idx//8]
240
+ idx = idx % 8 # [0, 7]
241
+ folder_id = folder_uid_dict['folder_id']
242
+ uid = folder_uid_dict['uid']
243
+
244
+ # idx = idx % 8
245
+ # uid = 'c40d63d5d740405e91c7f5fce855076e'
246
+ # folder_id = '000-123'
247
+
248
+ # target view
249
+ c2w = self.c2ws[idx]
250
+ w2c = np.linalg.inv(c2w)
251
+ w2c_ref = w2c
252
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
253
+
254
+ w2cs.append(w2c @ w2c_ref_inv)
255
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
256
+
257
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
258
+
259
+ depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
260
+
261
+
262
+ img = Image.open(img_filename)
263
+
264
+ img = self.transform(img) # (4, h, w)
265
+
266
+
267
+ if img.shape[0] == 4:
268
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
269
+ imgs += [img]
270
+
271
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
272
+ mask_h = depth_h > 0
273
+ # print("valid pixels", np.sum(mask_h))
274
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
275
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
276
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
277
+ depth_h = distance
278
+
279
+
280
+ depths_h.append(depth_h)
281
+ masks_h.append(mask_h)
282
+
283
+ intrinsic = self.intrinsic
284
+ intrinsics.append(intrinsic)
285
+
286
+
287
+ near_fars.append(self.near_fars[idx])
288
+ image_perm = 0 # only supervised on reference view
289
+
290
+ mask_dilated = None
291
+
292
+ src_views = range(8+idx*4, 8+(idx+1)*4)
293
+
294
+
295
+ for vid in src_views:
296
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_{vid%4}_10.png')
297
+
298
+ img = Image.open(img_filename)
299
+ img_wh = self.img_wh
300
+
301
+ img = self.transform(img)
302
+ if img.shape[0] == 4:
303
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
304
+
305
+ imgs += [img]
306
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
307
+ depths_h.append(depth_h)
308
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
309
+
310
+ near_fars.append(self.all_near_fars[vid])
311
+ intrinsics.append(self.all_intrinsics[vid])
312
+
313
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
314
+
315
+
316
+ # ! estimate scale_mat
317
+ scale_mat, scale_factor = self.cal_scale_mat(
318
+ img_hw=[img_wh[1], img_wh[0]],
319
+ intrinsics=intrinsics, extrinsics=w2cs,
320
+ near_fars=near_fars, factor=1.1
321
+ )
322
+ # print(scale_mat)
323
+ # print(scale_factor)
324
+ # ! calculate the new w2cs after scaling
325
+ new_near_fars = []
326
+ new_w2cs = []
327
+ new_c2ws = []
328
+ new_affine_mats = []
329
+ new_depths_h = []
330
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
331
+
332
+ P = intrinsic @ extrinsic @ scale_mat
333
+ P = P[:3, :4]
334
+ # - should use load_K_Rt_from_P() to obtain c2w
335
+ c2w = load_K_Rt_from_P(None, P)[1]
336
+ w2c = np.linalg.inv(c2w)
337
+ new_w2cs.append(w2c)
338
+ new_c2ws.append(c2w)
339
+ affine_mat = np.eye(4)
340
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
341
+ new_affine_mats.append(affine_mat)
342
+
343
+ camera_o = c2w[:3, 3]
344
+ dist = np.sqrt(np.sum(camera_o ** 2))
345
+ near = dist - 1
346
+ far = dist + 1
347
+
348
+ new_near_fars.append([0.95 * near, 1.05 * far])
349
+ new_depths_h.append(depth * scale_factor)
350
+
351
+ # print(new_near_fars)
352
+ imgs = torch.stack(imgs).float()
353
+ depths_h = np.stack(new_depths_h)
354
+ masks_h = np.stack(masks_h)
355
+
356
+ affine_mats = np.stack(new_affine_mats)
357
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
358
+ new_near_fars)
359
+
360
+ if self.split == 'train':
361
+ start_idx = 0
362
+ else:
363
+ start_idx = 1
364
+
365
+ view_ids = [idx] + list(src_views)
366
+ sample['origin_idx'] = origin_idx
367
+ sample['images'] = imgs # (V, 3, H, W)
368
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
369
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
370
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
371
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
372
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
373
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
374
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
375
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
376
+
377
+ # sample['light_idx'] = torch.tensor(light_idx)
378
+ sample['scan'] = folder_id
379
+
380
+ sample['scale_factor'] = torch.tensor(scale_factor)
381
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
382
+ sample['render_img_idx'] = torch.tensor(image_perm)
383
+ sample['partial_vol_origin'] = self.partial_vol_origin
384
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
385
+
386
+
387
+ # - image to render
388
+ sample['query_image'] = sample['images'][0]
389
+ sample['query_c2w'] = sample['c2ws'][0]
390
+ sample['query_w2c'] = sample['w2cs'][0]
391
+ sample['query_intrinsic'] = sample['intrinsics'][0]
392
+ sample['query_depth'] = sample['depths_h'][0]
393
+ sample['query_mask'] = sample['masks_h'][0]
394
+ sample['query_near_far'] = sample['near_fars'][0]
395
+
396
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
397
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
398
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
399
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
400
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
401
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
402
+ sample['view_ids'] = sample['view_ids'][start_idx:]
403
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
404
+
405
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
406
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
407
+
408
+ # - generate rays
409
+ if ('val' in self.split) or ('test' in self.split):
410
+ sample_rays = gen_rays_from_single_image(
411
+ img_wh[1], img_wh[0],
412
+ sample['query_image'],
413
+ sample['query_intrinsic'],
414
+ sample['query_c2w'],
415
+ depth=sample['query_depth'],
416
+ mask=sample['query_mask'] if self.clean_image else None)
417
+ else:
418
+ sample_rays = gen_random_rays_from_single_image(
419
+ img_wh[1], img_wh[0],
420
+ self.N_rays,
421
+ sample['query_image'],
422
+ sample['query_intrinsic'],
423
+ sample['query_c2w'],
424
+ depth=sample['query_depth'],
425
+ mask=sample['query_mask'] if self.clean_image else None,
426
+ dilated_mask=mask_dilated,
427
+ importance_sample=self.importance_sample)
428
+
429
+
430
+ sample['rays'] = sample_rays
431
+
432
+ return sample
SparseNeuS_demo_v1/data/blender_general_12_narrow.py ADDED
@@ -0,0 +1,427 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ self.root_dir = root_dir
70
+ self.split = split
71
+ self.imgs_per_instance = 12
72
+ self.n_views = n_views
73
+ self.N_rays = N_rays
74
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
+
76
+ self.clean_image = clean_image
77
+ self.importance_sample = importance_sample
78
+ self.test_ref_views = test_ref_views # used for testing
79
+ self.scale_factor = 1.0
80
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
+
82
+ lvis_json_path = '/objaverse-processed/zero12345_img/narrow_12_split_upd.json' # folder_id and uid
83
+ with open(lvis_json_path, 'r') as f:
84
+ lvis_paths = json.load(f)
85
+ if self.split == 'train':
86
+ self.lvis_paths = lvis_paths['train']
87
+ else:
88
+ self.lvis_paths = lvis_paths['val']
89
+ if img_wh is not None:
90
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
+ 'img_wh must both be multiples of 32!'
92
+
93
+
94
+ pose_json_path_narrow_8 = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
+ with open(pose_json_path_narrow_8, 'r') as f:
96
+ narrow_8_meta = json.load(f)
97
+
98
+ pose_json_path_narrow_4 = "/objaverse-processed/zero12345_img/zero12345_2stage_12_pose.json"
99
+ with open(pose_json_path_narrow_4, 'r') as f:
100
+ narrow_4_meta = json.load(f)
101
+
102
+
103
+ self.img_ids = list(narrow_8_meta["c2ws"].keys()) + list(narrow_4_meta["c2ws"].keys()) # (8 + 8*4) + (4 + 4*4)
104
+ self.img_wh = (256, 256)
105
+ self.input_poses = np.array(list(narrow_8_meta["c2ws"].values()) + list(narrow_4_meta["c2ws"].values()))
106
+ intrinsic = np.eye(4)
107
+ assert narrow_8_meta["intrinsics"] == narrow_4_meta["intrinsics"], "intrinsics not equal"
108
+ intrinsic[:3, :3] = np.array(narrow_8_meta["intrinsics"])
109
+ self.intrinsic = intrinsic
110
+ assert narrow_8_meta["near_far"] == narrow_4_meta["near_far"], "near_far not equal"
111
+ self.near_far = np.array(narrow_8_meta["near_far"])
112
+ self.near_far[1] = 1.8
113
+ self.define_transforms()
114
+ self.blender2opencv = np.array(
115
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
+ )
117
+
118
+
119
+ self.c2ws = []
120
+ self.w2cs = []
121
+ self.near_fars = []
122
+ for idx, img_id in enumerate(self.img_ids):
123
+ pose = self.input_poses[idx]
124
+ c2w = pose @ self.blender2opencv
125
+ self.c2ws.append(c2w)
126
+ self.w2cs.append(np.linalg.inv(c2w))
127
+ self.near_fars.append(self.near_far)
128
+
129
+
130
+
131
+ self.c2ws = np.stack(self.c2ws, axis=0)
132
+ self.w2cs = np.stack(self.w2cs, axis=0)
133
+
134
+
135
+ self.all_intrinsics = [] # the cam info of the whole scene
136
+ self.all_extrinsics = []
137
+ self.all_near_fars = []
138
+ self.load_cam_info()
139
+
140
+ # * bounding box for rendering
141
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
143
+
144
+ # - used for cost volume regularization
145
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
+
148
+
149
+ def define_transforms(self):
150
+ self.transform = T.Compose([T.ToTensor()])
151
+
152
+
153
+
154
+ def load_cam_info(self):
155
+ for vid, img_id in enumerate(self.img_ids):
156
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
+ self.all_intrinsics.append(intrinsic)
158
+ self.all_extrinsics.append(extrinsic)
159
+ self.all_near_fars.append(near_far)
160
+
161
+ def read_depth(self, filename):
162
+ pass
163
+
164
+ def read_mask(self, filename):
165
+ mask_h = cv2.imread(filename, 0)
166
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
+ interpolation=cv2.INTER_NEAREST)
168
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
+ interpolation=cv2.INTER_NEAREST)
170
+
171
+ mask[mask > 0] = 1 # the masks stored in png are not binary
172
+ mask_h[mask_h > 0] = 1
173
+
174
+ return mask, mask_h
175
+
176
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
+
178
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
+
180
+ radius = radius * factor
181
+ scale_mat = np.diag([radius, radius, radius, 1.0])
182
+ scale_mat[:3, 3] = center.cpu().numpy()
183
+ scale_mat = scale_mat.astype(np.float32)
184
+
185
+ return scale_mat, 1. / radius.cpu().numpy()
186
+
187
+ def __len__(self):
188
+ return self.imgs_per_instance*len(self.lvis_paths)
189
+
190
+
191
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
+ pass
193
+
194
+
195
+ def __getitem__(self, idx):
196
+ sample = {}
197
+ origin_idx = idx
198
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
+ idx_original=idx
201
+
202
+ folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
203
+
204
+ folder_id = folder_uid_dict['folder_id']
205
+ uid = folder_uid_dict['uid']
206
+
207
+ idx = idx % self.imgs_per_instance # [0, 11]
208
+ if idx < 8:
209
+ # target view
210
+ c2w = self.c2ws[idx]
211
+ w2c = np.linalg.inv(c2w)
212
+ w2c_ref = w2c
213
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
214
+
215
+ w2cs.append(w2c @ w2c_ref_inv)
216
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
217
+
218
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
219
+
220
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
221
+
222
+ img = Image.open(img_filename)
223
+
224
+ img = self.transform(img) # (4, h, w)
225
+ else:
226
+ # target view
227
+ c2w = self.c2ws[idx-8+40]
228
+ w2c = np.linalg.inv(c2w)
229
+ w2c_ref = w2c
230
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
231
+
232
+ w2cs.append(w2c @ w2c_ref_inv)
233
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
234
+
235
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}.png')
236
+
237
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}_depth_mm.png'))
238
+
239
+ img = Image.open(img_filename)
240
+
241
+ img = self.transform(img) # (4, h, w)
242
+
243
+ if img.shape[0] == 4:
244
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
245
+ imgs += [img]
246
+
247
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
248
+ mask_h = depth_h > 0
249
+ # print("valid pixels", np.sum(mask_h))
250
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
251
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
252
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
253
+ depth_h = distance
254
+
255
+
256
+
257
+ depths_h.append(depth_h)
258
+ masks_h.append(mask_h)
259
+
260
+ intrinsic = self.intrinsic
261
+ intrinsics.append(intrinsic)
262
+
263
+
264
+ near_fars.append(self.near_fars[idx])
265
+ image_perm = 0 # only supervised on reference view
266
+
267
+ mask_dilated = None
268
+
269
+
270
+ src_views = range(8, 8 + 8 * 4 + 4 + 4*4)
271
+ src_views_used = []
272
+ skipped_idx = [40, 41, 42, 43]
273
+ for vid in src_views:
274
+ if vid in skipped_idx:
275
+ continue
276
+
277
+ src_views_used.append(vid)
278
+ cur_view_id = (vid - 8) // 4 # [0, 7]
279
+
280
+ # choose narrow
281
+ if cur_view_id < 8:
282
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
283
+ else: # choose 2-stage
284
+ cur_view_id = cur_view_id - 1
285
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12", folder_id, uid, f'view_{cur_view_id}_{vid%4}.png')
286
+
287
+ img = Image.open(img_filename)
288
+ img_wh = self.img_wh
289
+
290
+ img = self.transform(img)
291
+ if img.shape[0] == 4:
292
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
293
+
294
+ imgs += [img]
295
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
296
+ depths_h.append(depth_h)
297
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
298
+
299
+ near_fars.append(self.all_near_fars[vid])
300
+ intrinsics.append(self.all_intrinsics[vid])
301
+
302
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
303
+
304
+
305
+
306
+
307
+ scale_mat, scale_factor = self.cal_scale_mat(
308
+ img_hw=[img_wh[1], img_wh[0]],
309
+ intrinsics=intrinsics, extrinsics=w2cs,
310
+ near_fars=near_fars, factor=1.1
311
+ )
312
+
313
+
314
+ new_near_fars = []
315
+ new_w2cs = []
316
+ new_c2ws = []
317
+ new_affine_mats = []
318
+ new_depths_h = []
319
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
320
+
321
+ P = intrinsic @ extrinsic @ scale_mat
322
+ P = P[:3, :4]
323
+ # - should use load_K_Rt_from_P() to obtain c2w
324
+ c2w = load_K_Rt_from_P(None, P)[1]
325
+ w2c = np.linalg.inv(c2w)
326
+ new_w2cs.append(w2c)
327
+ new_c2ws.append(c2w)
328
+ affine_mat = np.eye(4)
329
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
330
+ new_affine_mats.append(affine_mat)
331
+
332
+ camera_o = c2w[:3, 3]
333
+ dist = np.sqrt(np.sum(camera_o ** 2))
334
+ near = dist - 1
335
+ far = dist + 1
336
+
337
+ new_near_fars.append([0.95 * near, 1.05 * far])
338
+
339
+ new_depths_h.append(depth * scale_factor)
340
+
341
+ # print(new_near_fars)
342
+ # print("img numeber: ", len(imgs))
343
+ imgs = torch.stack(imgs).float()
344
+ depths_h = np.stack(new_depths_h)
345
+ masks_h = np.stack(masks_h)
346
+
347
+ affine_mats = np.stack(new_affine_mats)
348
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
349
+ new_near_fars)
350
+
351
+ if self.split == 'train':
352
+ start_idx = 0
353
+ else:
354
+ start_idx = 1
355
+
356
+ view_ids = [idx_original % self.imgs_per_instance] + src_views_used
357
+ sample['origin_idx'] = origin_idx
358
+ sample['images'] = imgs # (V, 3, H, W)
359
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
360
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
361
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
362
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
363
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
364
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
365
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
366
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
367
+
368
+ # sample['light_idx'] = torch.tensor(light_idx)
369
+ sample['scan'] = folder_id
370
+
371
+ sample['scale_factor'] = torch.tensor(scale_factor)
372
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
373
+ sample['render_img_idx'] = torch.tensor(image_perm)
374
+ sample['partial_vol_origin'] = self.partial_vol_origin
375
+ if view_ids[0] < 8:
376
+ meta_end = "_narrow"+ "_refview" + str(view_ids[0])
377
+ else:
378
+ meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
379
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
380
+
381
+
382
+ # - image to render
383
+ sample['query_image'] = sample['images'][0]
384
+ sample['query_c2w'] = sample['c2ws'][0]
385
+ sample['query_w2c'] = sample['w2cs'][0]
386
+ sample['query_intrinsic'] = sample['intrinsics'][0]
387
+ sample['query_depth'] = sample['depths_h'][0]
388
+ sample['query_mask'] = sample['masks_h'][0]
389
+ sample['query_near_far'] = sample['near_fars'][0]
390
+
391
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
392
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
393
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
394
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
395
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
396
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
397
+ sample['view_ids'] = sample['view_ids'][start_idx:]
398
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
399
+
400
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
401
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
402
+
403
+ # - generate rays
404
+ if ('val' in self.split) or ('test' in self.split):
405
+ sample_rays = gen_rays_from_single_image(
406
+ img_wh[1], img_wh[0],
407
+ sample['query_image'],
408
+ sample['query_intrinsic'],
409
+ sample['query_c2w'],
410
+ depth=sample['query_depth'],
411
+ mask=sample['query_mask'] if self.clean_image else None)
412
+ else:
413
+ sample_rays = gen_random_rays_from_single_image(
414
+ img_wh[1], img_wh[0],
415
+ self.N_rays,
416
+ sample['query_image'],
417
+ sample['query_intrinsic'],
418
+ sample['query_c2w'],
419
+ depth=sample['query_depth'],
420
+ mask=sample['query_mask'] if self.clean_image else None,
421
+ dilated_mask=mask_dilated,
422
+ importance_sample=self.importance_sample)
423
+
424
+
425
+ sample['rays'] = sample_rays
426
+
427
+ return sample
SparseNeuS_demo_v1/data/blender_general_12_narrow_8.py ADDED
@@ -0,0 +1,427 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ self.root_dir = root_dir
70
+ self.split = split
71
+ self.imgs_per_instance = 8
72
+ self.n_views = n_views
73
+ self.N_rays = N_rays
74
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
+
76
+ self.clean_image = clean_image
77
+ self.importance_sample = importance_sample
78
+ self.test_ref_views = test_ref_views # used for testing
79
+ self.scale_factor = 1.0
80
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
+
82
+ lvis_json_path = '/objaverse-processed/zero12345_img/narrow_12_split_upd.json' # folder_id and uid
83
+ with open(lvis_json_path, 'r') as f:
84
+ lvis_paths = json.load(f)
85
+ if self.split == 'train':
86
+ self.lvis_paths = lvis_paths['train']
87
+ else:
88
+ self.lvis_paths = lvis_paths['val']
89
+ if img_wh is not None:
90
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
+ 'img_wh must both be multiples of 32!'
92
+
93
+
94
+ pose_json_path_narrow_8 = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
+ with open(pose_json_path_narrow_8, 'r') as f:
96
+ narrow_8_meta = json.load(f)
97
+
98
+ pose_json_path_narrow_4 = "/objaverse-processed/zero12345_img/zero12345_2stage_12_pose.json"
99
+ with open(pose_json_path_narrow_4, 'r') as f:
100
+ narrow_4_meta = json.load(f)
101
+
102
+
103
+ self.img_ids = list(narrow_8_meta["c2ws"].keys()) + list(narrow_4_meta["c2ws"].keys()) # (8 + 8*4) + (4 + 4*4)
104
+ self.img_wh = (256, 256)
105
+ self.input_poses = np.array(list(narrow_8_meta["c2ws"].values()) + list(narrow_4_meta["c2ws"].values()))
106
+ intrinsic = np.eye(4)
107
+ assert narrow_8_meta["intrinsics"] == narrow_4_meta["intrinsics"], "intrinsics not equal"
108
+ intrinsic[:3, :3] = np.array(narrow_8_meta["intrinsics"])
109
+ self.intrinsic = intrinsic
110
+ assert narrow_8_meta["near_far"] == narrow_4_meta["near_far"], "near_far not equal"
111
+ self.near_far = np.array(narrow_8_meta["near_far"])
112
+ self.near_far[1] = 1.8
113
+ self.define_transforms()
114
+ self.blender2opencv = np.array(
115
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
+ )
117
+
118
+
119
+ self.c2ws = []
120
+ self.w2cs = []
121
+ self.near_fars = []
122
+ for idx, img_id in enumerate(self.img_ids):
123
+ pose = self.input_poses[idx]
124
+ c2w = pose @ self.blender2opencv
125
+ self.c2ws.append(c2w)
126
+ self.w2cs.append(np.linalg.inv(c2w))
127
+ self.near_fars.append(self.near_far)
128
+
129
+
130
+
131
+ self.c2ws = np.stack(self.c2ws, axis=0)
132
+ self.w2cs = np.stack(self.w2cs, axis=0)
133
+
134
+
135
+ self.all_intrinsics = [] # the cam info of the whole scene
136
+ self.all_extrinsics = []
137
+ self.all_near_fars = []
138
+ self.load_cam_info()
139
+
140
+ # * bounding box for rendering
141
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
143
+
144
+ # - used for cost volume regularization
145
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
+
148
+
149
+ def define_transforms(self):
150
+ self.transform = T.Compose([T.ToTensor()])
151
+
152
+
153
+
154
+ def load_cam_info(self):
155
+ for vid, img_id in enumerate(self.img_ids):
156
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
+ self.all_intrinsics.append(intrinsic)
158
+ self.all_extrinsics.append(extrinsic)
159
+ self.all_near_fars.append(near_far)
160
+
161
+ def read_depth(self, filename):
162
+ pass
163
+
164
+ def read_mask(self, filename):
165
+ mask_h = cv2.imread(filename, 0)
166
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
+ interpolation=cv2.INTER_NEAREST)
168
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
+ interpolation=cv2.INTER_NEAREST)
170
+
171
+ mask[mask > 0] = 1 # the masks stored in png are not binary
172
+ mask_h[mask_h > 0] = 1
173
+
174
+ return mask, mask_h
175
+
176
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
+
178
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
+
180
+ radius = radius * factor
181
+ scale_mat = np.diag([radius, radius, radius, 1.0])
182
+ scale_mat[:3, 3] = center.cpu().numpy()
183
+ scale_mat = scale_mat.astype(np.float32)
184
+
185
+ return scale_mat, 1. / radius.cpu().numpy()
186
+
187
+ def __len__(self):
188
+ return self.imgs_per_instance*len(self.lvis_paths)
189
+
190
+
191
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
+ pass
193
+
194
+
195
+ def __getitem__(self, idx):
196
+ sample = {}
197
+ origin_idx = idx
198
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
+ idx_original=idx
201
+
202
+ folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
203
+
204
+ folder_id = folder_uid_dict['folder_id']
205
+ uid = folder_uid_dict['uid']
206
+
207
+ idx = idx % self.imgs_per_instance # [0, 11]
208
+ if idx < 8:
209
+ # target view
210
+ c2w = self.c2ws[idx]
211
+ w2c = np.linalg.inv(c2w)
212
+ w2c_ref = w2c
213
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
214
+
215
+ w2cs.append(w2c @ w2c_ref_inv)
216
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
217
+
218
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
219
+
220
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
221
+
222
+ img = Image.open(img_filename)
223
+
224
+ img = self.transform(img) # (4, h, w)
225
+ else:
226
+ # target view
227
+ c2w = self.c2ws[idx-8+40]
228
+ w2c = np.linalg.inv(c2w)
229
+ w2c_ref = w2c
230
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
231
+
232
+ w2cs.append(w2c @ w2c_ref_inv)
233
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
234
+
235
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}.png')
236
+
237
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}_depth_mm.png'))
238
+
239
+ img = Image.open(img_filename)
240
+
241
+ img = self.transform(img) # (4, h, w)
242
+
243
+ if img.shape[0] == 4:
244
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
245
+ imgs += [img]
246
+
247
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
248
+ mask_h = depth_h > 0
249
+ # print("valid pixels", np.sum(mask_h))
250
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
251
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
252
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
253
+ depth_h = distance
254
+
255
+
256
+
257
+ depths_h.append(depth_h)
258
+ masks_h.append(mask_h)
259
+
260
+ intrinsic = self.intrinsic
261
+ intrinsics.append(intrinsic)
262
+
263
+
264
+ near_fars.append(self.near_fars[idx])
265
+ image_perm = 0 # only supervised on reference view
266
+
267
+ mask_dilated = None
268
+
269
+
270
+ src_views = range(8, 8 + 8 * 4 + 4 + 4*4)
271
+ src_views_used = []
272
+ skipped_idx = [40, 41, 42, 43]
273
+ for vid in src_views:
274
+ if vid in skipped_idx:
275
+ continue
276
+
277
+ src_views_used.append(vid)
278
+ cur_view_id = (vid - 8) // 4 # [0, 7]
279
+
280
+ # choose narrow
281
+ if cur_view_id < 8:
282
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
283
+ else: # choose 2-stage
284
+ cur_view_id = cur_view_id - 1
285
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12", folder_id, uid, f'view_{cur_view_id}_{vid%4}.png')
286
+
287
+ img = Image.open(img_filename)
288
+ img_wh = self.img_wh
289
+
290
+ img = self.transform(img)
291
+ if img.shape[0] == 4:
292
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
293
+
294
+ imgs += [img]
295
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
296
+ depths_h.append(depth_h)
297
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
298
+
299
+ near_fars.append(self.all_near_fars[vid])
300
+ intrinsics.append(self.all_intrinsics[vid])
301
+
302
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
303
+
304
+
305
+
306
+
307
+ scale_mat, scale_factor = self.cal_scale_mat(
308
+ img_hw=[img_wh[1], img_wh[0]],
309
+ intrinsics=intrinsics, extrinsics=w2cs,
310
+ near_fars=near_fars, factor=1.1
311
+ )
312
+
313
+
314
+ new_near_fars = []
315
+ new_w2cs = []
316
+ new_c2ws = []
317
+ new_affine_mats = []
318
+ new_depths_h = []
319
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
320
+
321
+ P = intrinsic @ extrinsic @ scale_mat
322
+ P = P[:3, :4]
323
+ # - should use load_K_Rt_from_P() to obtain c2w
324
+ c2w = load_K_Rt_from_P(None, P)[1]
325
+ w2c = np.linalg.inv(c2w)
326
+ new_w2cs.append(w2c)
327
+ new_c2ws.append(c2w)
328
+ affine_mat = np.eye(4)
329
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
330
+ new_affine_mats.append(affine_mat)
331
+
332
+ camera_o = c2w[:3, 3]
333
+ dist = np.sqrt(np.sum(camera_o ** 2))
334
+ near = dist - 1
335
+ far = dist + 1
336
+
337
+ new_near_fars.append([0.95 * near, 1.05 * far])
338
+
339
+ new_depths_h.append(depth * scale_factor)
340
+
341
+ # print(new_near_fars)
342
+ # print("img numeber: ", len(imgs))
343
+ imgs = torch.stack(imgs).float()
344
+ depths_h = np.stack(new_depths_h)
345
+ masks_h = np.stack(masks_h)
346
+
347
+ affine_mats = np.stack(new_affine_mats)
348
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
349
+ new_near_fars)
350
+
351
+ if self.split == 'train':
352
+ start_idx = 0
353
+ else:
354
+ start_idx = 1
355
+
356
+ view_ids = [idx_original % self.imgs_per_instance] + src_views_used
357
+ sample['origin_idx'] = origin_idx
358
+ sample['images'] = imgs # (V, 3, H, W)
359
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
360
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
361
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
362
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
363
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
364
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
365
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
366
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
367
+
368
+ # sample['light_idx'] = torch.tensor(light_idx)
369
+ sample['scan'] = folder_id
370
+
371
+ sample['scale_factor'] = torch.tensor(scale_factor)
372
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
373
+ sample['render_img_idx'] = torch.tensor(image_perm)
374
+ sample['partial_vol_origin'] = self.partial_vol_origin
375
+ if view_ids[0] < 8:
376
+ meta_end = "_narrow"+ "_refview" + str(view_ids[0])
377
+ else:
378
+ meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
379
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
380
+
381
+
382
+ # - image to render
383
+ sample['query_image'] = sample['images'][0]
384
+ sample['query_c2w'] = sample['c2ws'][0]
385
+ sample['query_w2c'] = sample['w2cs'][0]
386
+ sample['query_intrinsic'] = sample['intrinsics'][0]
387
+ sample['query_depth'] = sample['depths_h'][0]
388
+ sample['query_mask'] = sample['masks_h'][0]
389
+ sample['query_near_far'] = sample['near_fars'][0]
390
+
391
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
392
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
393
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
394
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
395
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
396
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
397
+ sample['view_ids'] = sample['view_ids'][start_idx:]
398
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
399
+
400
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
401
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
402
+
403
+ # - generate rays
404
+ if ('val' in self.split) or ('test' in self.split):
405
+ sample_rays = gen_rays_from_single_image(
406
+ img_wh[1], img_wh[0],
407
+ sample['query_image'],
408
+ sample['query_intrinsic'],
409
+ sample['query_c2w'],
410
+ depth=sample['query_depth'],
411
+ mask=sample['query_mask'] if self.clean_image else None)
412
+ else:
413
+ sample_rays = gen_random_rays_from_single_image(
414
+ img_wh[1], img_wh[0],
415
+ self.N_rays,
416
+ sample['query_image'],
417
+ sample['query_intrinsic'],
418
+ sample['query_c2w'],
419
+ depth=sample['query_depth'],
420
+ mask=sample['query_mask'] if self.clean_image else None,
421
+ dilated_mask=mask_dilated,
422
+ importance_sample=self.importance_sample)
423
+
424
+
425
+ sample['rays'] = sample_rays
426
+
427
+ return sample
SparseNeuS_demo_v1/data/blender_general_360.py ADDED
@@ -0,0 +1,412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+
18
+ def get_ray_directions(H, W, focal, center=None):
19
+ """
20
+ Get ray directions for all pixels in camera coordinate.
21
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
22
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
23
+ Inputs:
24
+ H, W, focal: image height, width and focal length
25
+ Outputs:
26
+ directions: (H, W, 3), the direction of the rays in camera coordinate
27
+ """
28
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
29
+
30
+ i, j = grid.unbind(-1)
31
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
32
+ # see https://github.com/bmild/nerf/issues/24
33
+ cent = center if center is not None else [W / 2, H / 2]
34
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
35
+
36
+ return directions
37
+
38
+ def load_K_Rt_from_P(filename, P=None):
39
+ if P is None:
40
+ lines = open(filename).read().splitlines()
41
+ if len(lines) == 4:
42
+ lines = lines[1:]
43
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
44
+ P = np.asarray(lines).astype(np.float32).squeeze()
45
+
46
+ out = cv2.decomposeProjectionMatrix(P)
47
+ K = out[0]
48
+ R = out[1]
49
+ t = out[2]
50
+
51
+ K = K / K[2, 2]
52
+ intrinsics = np.eye(4)
53
+ intrinsics[:3, :3] = K
54
+
55
+ pose = np.eye(4, dtype=np.float32)
56
+ pose[:3, :3] = R.transpose() # ? why need transpose here
57
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
58
+
59
+ return intrinsics, pose # ! return cam2world matrix here
60
+
61
+
62
+ # ! load one ref-image with multiple src-images in camera coordinate system
63
+ class BlenderPerView(Dataset):
64
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
65
+ split_filepath=None, pair_filepath=None,
66
+ N_rays=512,
67
+ vol_dims=[128, 128, 128], batch_size=1,
68
+ clean_image=False, importance_sample=False, test_ref_views=[]):
69
+
70
+ # print("root_dir: ", root_dir)
71
+ self.root_dir = root_dir
72
+ self.split = split
73
+
74
+ self.n_views = n_views
75
+ self.N_rays = N_rays
76
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
77
+
78
+ self.clean_image = clean_image
79
+ self.importance_sample = importance_sample
80
+ self.test_ref_views = test_ref_views # used for testing
81
+ self.scale_factor = 1.0
82
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
83
+
84
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
85
+ with open(lvis_json_path, 'r') as f:
86
+ lvis_paths = json.load(f)
87
+ if self.split == 'train':
88
+ self.lvis_paths = lvis_paths['train']
89
+ else:
90
+ self.lvis_paths = lvis_paths['val']
91
+ if img_wh is not None:
92
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
93
+ 'img_wh must both be multiples of 32!'
94
+
95
+
96
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_wide_pose.json"
97
+ with open(pose_json_path, 'r') as f:
98
+ meta = json.load(f)
99
+
100
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7"
101
+ self.img_wh = (256, 256)
102
+ self.input_poses = np.array(list(meta["c2ws"].values()))
103
+ intrinsic = np.eye(4)
104
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
105
+ self.intrinsic = intrinsic
106
+ self.near_far = np.array(meta["near_far"])
107
+
108
+
109
+ self.define_transforms()
110
+ self.blender2opencv = np.array(
111
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
112
+ )
113
+
114
+
115
+ self.c2ws = []
116
+ self.w2cs = []
117
+ self.near_fars = []
118
+ # self.root_dir = root_dir
119
+ for idx, img_id in enumerate(self.img_ids):
120
+ pose = self.input_poses[idx]
121
+ c2w = pose @ self.blender2opencv
122
+ self.c2ws.append(c2w)
123
+ self.w2cs.append(np.linalg.inv(c2w))
124
+ self.near_fars.append(self.near_far)
125
+ self.c2ws = np.stack(self.c2ws, axis=0)
126
+ self.w2cs = np.stack(self.w2cs, axis=0)
127
+
128
+
129
+ self.all_intrinsics = [] # the cam info of the whole scene
130
+ self.all_extrinsics = []
131
+ self.all_near_fars = []
132
+ self.load_cam_info()
133
+
134
+ # * bounding box for rendering
135
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
136
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
137
+
138
+ # - used for cost volume regularization
139
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
140
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
141
+
142
+
143
+ def define_transforms(self):
144
+ self.transform = T.Compose([T.ToTensor()])
145
+
146
+
147
+
148
+ def load_cam_info(self):
149
+ for vid, img_id in enumerate(self.img_ids):
150
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
151
+ self.all_intrinsics.append(intrinsic)
152
+ self.all_extrinsics.append(extrinsic)
153
+ self.all_near_fars.append(near_far)
154
+
155
+ def read_depth(self, filename):
156
+ depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
157
+ depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
158
+ interpolation=cv2.INTER_NEAREST) # (600, 800)
159
+ depth_h = depth_h[44:556, 80:720] # (512, 640)
160
+ depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
161
+ interpolation=cv2.INTER_NEAREST)
162
+ depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
163
+ interpolation=cv2.INTER_NEAREST)
164
+
165
+ return depth, depth_h
166
+
167
+ def read_mask(self, filename):
168
+ mask_h = cv2.imread(filename, 0)
169
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
170
+ interpolation=cv2.INTER_NEAREST)
171
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
172
+ interpolation=cv2.INTER_NEAREST)
173
+
174
+ mask[mask > 0] = 1 # the masks stored in png are not binary
175
+ mask_h[mask_h > 0] = 1
176
+
177
+ return mask, mask_h
178
+
179
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
180
+
181
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
182
+ # print("center", center)
183
+ # print("radius", radius)
184
+ # print("bounds", bounds)
185
+ # import ipdb; ipdb.set_trace()
186
+ radius = radius * factor
187
+ scale_mat = np.diag([radius, radius, radius, 1.0])
188
+ scale_mat[:3, 3] = center.cpu().numpy()
189
+ scale_mat = scale_mat.astype(np.float32)
190
+
191
+ return scale_mat, 1. / radius.cpu().numpy()
192
+
193
+ def __len__(self):
194
+ return 36*len(self.lvis_paths)
195
+
196
+
197
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
198
+ pass
199
+
200
+
201
+ def __getitem__(self, idx):
202
+ sample = {}
203
+
204
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
205
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
206
+
207
+
208
+ folder_uid_dict = self.lvis_paths[idx//36]
209
+
210
+
211
+ folder_id = folder_uid_dict['folder_id']
212
+ uid = folder_uid_dict['uid']
213
+
214
+ idx = idx % 36 # [0, 35]
215
+ gt_view_idx = idx // 12 # [0, 2]
216
+ target_view_idx = idx % 12 # [0, 11]
217
+
218
+
219
+
220
+ # target view
221
+ c2w = self.c2ws[idx]
222
+ w2c = np.linalg.inv(c2w)
223
+ w2c_ref = w2c
224
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
225
+
226
+ w2cs.append(w2c @ w2c_ref_inv)
227
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
228
+
229
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}_gt.png')
230
+
231
+ depth_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}_gt_depth_mm.png')
232
+
233
+
234
+ img = Image.open(img_filename)
235
+
236
+ img = self.transform(img) # (4, h, w)
237
+
238
+
239
+ if img.shape[0] == 4:
240
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
241
+ imgs += [img]
242
+
243
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
244
+ mask_h = depth_h > 0
245
+ # print("valid pixels", np.sum(mask_h))
246
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
247
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
248
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
249
+ depth_h = distance
250
+
251
+
252
+ depths_h.append(depth_h)
253
+ masks_h.append(mask_h)
254
+
255
+ intrinsic = self.intrinsic
256
+ intrinsics.append(intrinsic)
257
+
258
+
259
+
260
+ near_fars.append(self.near_fars[idx])
261
+ image_perm = 0 # only supervised on reference view
262
+
263
+ mask_dilated = None
264
+
265
+ # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
266
+
267
+ idx_of_12 = idx - 12 * gt_view_idx # idx % 12
268
+
269
+ src_views = list(i % 12 + 12 * gt_view_idx for i in range(idx_of_12 - 1-1, idx_of_12 + 2+1))
270
+
271
+
272
+ for vid in src_views:
273
+ # if vid == idx:
274
+ # continue
275
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}.png')
276
+
277
+ img = Image.open(img_filename)
278
+ img_wh = self.img_wh
279
+
280
+ img = self.transform(img)
281
+ if img.shape[0] == 4:
282
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
283
+
284
+ imgs += [img]
285
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
286
+ depths_h.append(depth_h)
287
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
288
+
289
+ near_fars.append(self.all_near_fars[vid])
290
+ intrinsics.append(self.all_intrinsics[vid])
291
+
292
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
293
+
294
+
295
+ # ! estimate scale_mat
296
+ scale_mat, scale_factor = self.cal_scale_mat(
297
+ img_hw=[img_wh[1], img_wh[0]],
298
+ intrinsics=intrinsics, extrinsics=w2cs,
299
+ near_fars=near_fars, factor=1.1
300
+ )
301
+ # print(scale_mat)
302
+ # print(scale_factor)
303
+ # ! calculate the new w2cs after scaling
304
+ new_near_fars = []
305
+ new_w2cs = []
306
+ new_c2ws = []
307
+ new_affine_mats = []
308
+ new_depths_h = []
309
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
310
+
311
+ P = intrinsic @ extrinsic @ scale_mat
312
+ P = P[:3, :4]
313
+ # - should use load_K_Rt_from_P() to obtain c2w
314
+ c2w = load_K_Rt_from_P(None, P)[1]
315
+ w2c = np.linalg.inv(c2w)
316
+ new_w2cs.append(w2c)
317
+ new_c2ws.append(c2w)
318
+ affine_mat = np.eye(4)
319
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
320
+ new_affine_mats.append(affine_mat)
321
+
322
+ camera_o = c2w[:3, 3]
323
+ dist = np.sqrt(np.sum(camera_o ** 2))
324
+ near = dist - 1
325
+ far = dist + 1
326
+
327
+ new_near_fars.append([0.95 * near, 1.05 * far])
328
+ new_depths_h.append(depth * scale_factor)
329
+
330
+ # print(new_near_fars)
331
+ imgs = torch.stack(imgs).float()
332
+ depths_h = np.stack(new_depths_h)
333
+ masks_h = np.stack(masks_h)
334
+
335
+ affine_mats = np.stack(new_affine_mats)
336
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
337
+ new_near_fars)
338
+
339
+ if self.split == 'train':
340
+ start_idx = 0
341
+ else:
342
+ start_idx = 1
343
+
344
+ view_ids = [idx] + list(src_views)
345
+
346
+ sample['images'] = imgs # (V, 3, H, W)
347
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
348
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
349
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
350
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
351
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
352
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
353
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
354
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
355
+
356
+ # sample['light_idx'] = torch.tensor(light_idx)
357
+ sample['scan'] = folder_id
358
+
359
+ sample['scale_factor'] = torch.tensor(scale_factor)
360
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
361
+ sample['render_img_idx'] = torch.tensor(image_perm)
362
+ sample['partial_vol_origin'] = self.partial_vol_origin
363
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
364
+
365
+
366
+ # - image to render
367
+ sample['query_image'] = sample['images'][0]
368
+ sample['query_c2w'] = sample['c2ws'][0]
369
+ sample['query_w2c'] = sample['w2cs'][0]
370
+ sample['query_intrinsic'] = sample['intrinsics'][0]
371
+ sample['query_depth'] = sample['depths_h'][0]
372
+ sample['query_mask'] = sample['masks_h'][0]
373
+ sample['query_near_far'] = sample['near_fars'][0]
374
+
375
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
376
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
377
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
378
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
379
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
380
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
381
+ sample['view_ids'] = sample['view_ids'][start_idx:]
382
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
383
+
384
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
385
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
386
+
387
+ # - generate rays
388
+ if ('val' in self.split) or ('test' in self.split):
389
+ sample_rays = gen_rays_from_single_image(
390
+ img_wh[1], img_wh[0],
391
+ sample['query_image'],
392
+ sample['query_intrinsic'],
393
+ sample['query_c2w'],
394
+ depth=sample['query_depth'],
395
+ mask=sample['query_mask'] if self.clean_image else None)
396
+
397
+ else:
398
+ sample_rays = gen_random_rays_from_single_image(
399
+ img_wh[1], img_wh[0],
400
+ self.N_rays,
401
+ sample['query_image'],
402
+ sample['query_intrinsic'],
403
+ sample['query_c2w'],
404
+ depth=sample['query_depth'],
405
+ mask=sample['query_mask'] if self.clean_image else None,
406
+ dilated_mask=mask_dilated,
407
+ importance_sample=self.importance_sample)
408
+
409
+
410
+ sample['rays'] = sample_rays
411
+
412
+ return sample
SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_3.py ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_2stage_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
155
+ depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
156
+ interpolation=cv2.INTER_NEAREST) # (600, 800)
157
+ depth_h = depth_h[44:556, 80:720] # (512, 640)
158
+ depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ return depth, depth_h
164
+
165
+ def read_mask(self, filename):
166
+ mask_h = cv2.imread(filename, 0)
167
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
168
+ interpolation=cv2.INTER_NEAREST)
169
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
170
+ interpolation=cv2.INTER_NEAREST)
171
+
172
+ mask[mask > 0] = 1 # the masks stored in png are not binary
173
+ mask_h[mask_h > 0] = 1
174
+
175
+ return mask, mask_h
176
+
177
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
178
+
179
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
180
+ # print("center", center)
181
+ # print("radius", radius)
182
+ # print("bounds", bounds)
183
+ # import ipdb; ipdb.set_trace()
184
+ radius = radius * factor
185
+ scale_mat = np.diag([radius, radius, radius, 1.0])
186
+ scale_mat[:3, 3] = center.cpu().numpy()
187
+ scale_mat = scale_mat.astype(np.float32)
188
+
189
+ return scale_mat, 1. / radius.cpu().numpy()
190
+
191
+ def __len__(self):
192
+ return 6*len(self.lvis_paths)
193
+
194
+
195
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
196
+ pass
197
+
198
+
199
+ def __getitem__(self, idx):
200
+ sample = {}
201
+
202
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
203
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
204
+
205
+
206
+ folder_uid_dict = self.lvis_paths[idx//6]
207
+ idx = idx % 6
208
+
209
+ folder_id = folder_uid_dict['folder_id']
210
+ uid = folder_uid_dict['uid']
211
+
212
+ # idx = idx % 24 # [0, 23]
213
+
214
+
215
+
216
+ # target view
217
+ c2w = self.c2ws[idx]
218
+ w2c = np.linalg.inv(c2w)
219
+ w2c_ref = w2c
220
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
221
+
222
+ w2cs.append(w2c @ w2c_ref_inv)
223
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
224
+
225
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_gt.png')
226
+
227
+ depth_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_gt_depth_mm.png')
228
+
229
+
230
+ img = Image.open(img_filename)
231
+
232
+ img = self.transform(img) # (4, h, w)
233
+
234
+
235
+ if img.shape[0] == 4:
236
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
237
+ imgs += [img]
238
+
239
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
240
+ mask_h = depth_h > 0
241
+ # print("valid pixels", np.sum(mask_h))
242
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
243
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
244
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
245
+ depth_h = distance
246
+
247
+
248
+ depths_h.append(depth_h)
249
+ masks_h.append(mask_h)
250
+
251
+ intrinsic = self.intrinsic
252
+ intrinsics.append(intrinsic)
253
+
254
+
255
+
256
+ near_fars.append(self.near_fars[idx])
257
+ image_perm = 0 # only supervised on reference view
258
+
259
+ mask_dilated = None
260
+
261
+ # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
262
+
263
+
264
+ src_views = range(6+idx*4, 6+(idx+1)*4)
265
+
266
+ for vid in src_views:
267
+ # if vid == idx:
268
+ # continue
269
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_{vid % 4}.png')
270
+
271
+ img = Image.open(img_filename)
272
+ img_wh = self.img_wh
273
+
274
+ img = self.transform(img)
275
+ if img.shape[0] == 4:
276
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
277
+
278
+ imgs += [img]
279
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
280
+ depths_h.append(depth_h)
281
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
282
+
283
+ near_fars.append(self.all_near_fars[vid])
284
+ intrinsics.append(self.all_intrinsics[vid])
285
+
286
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
287
+
288
+
289
+ # ! estimate scale_mat
290
+ scale_mat, scale_factor = self.cal_scale_mat(
291
+ img_hw=[img_wh[1], img_wh[0]],
292
+ intrinsics=intrinsics, extrinsics=w2cs,
293
+ near_fars=near_fars, factor=1.1
294
+ )
295
+ # print(scale_mat)
296
+ # print(scale_factor)
297
+ # ! calculate the new w2cs after scaling
298
+ new_near_fars = []
299
+ new_w2cs = []
300
+ new_c2ws = []
301
+ new_affine_mats = []
302
+ new_depths_h = []
303
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
304
+
305
+ P = intrinsic @ extrinsic @ scale_mat
306
+ P = P[:3, :4]
307
+ # - should use load_K_Rt_from_P() to obtain c2w
308
+ c2w = load_K_Rt_from_P(None, P)[1]
309
+ w2c = np.linalg.inv(c2w)
310
+ new_w2cs.append(w2c)
311
+ new_c2ws.append(c2w)
312
+ affine_mat = np.eye(4)
313
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
314
+ new_affine_mats.append(affine_mat)
315
+
316
+ camera_o = c2w[:3, 3]
317
+ dist = np.sqrt(np.sum(camera_o ** 2))
318
+ near = dist - 1
319
+ far = dist + 1
320
+
321
+ new_near_fars.append([0.95 * near, 1.05 * far])
322
+ new_depths_h.append(depth * scale_factor)
323
+
324
+ # print(new_near_fars)
325
+ imgs = torch.stack(imgs).float()
326
+ depths_h = np.stack(new_depths_h)
327
+ masks_h = np.stack(masks_h)
328
+
329
+ affine_mats = np.stack(new_affine_mats)
330
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
331
+ new_near_fars)
332
+
333
+ if self.split == 'train':
334
+ start_idx = 0
335
+ else:
336
+ start_idx = 1
337
+
338
+ view_ids = [idx] + list(src_views)
339
+
340
+ sample['images'] = imgs # (V, 3, H, W)
341
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
342
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
343
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
344
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
345
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
346
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
347
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
348
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
349
+
350
+ # sample['light_idx'] = torch.tensor(light_idx)
351
+ sample['scan'] = folder_id
352
+
353
+ sample['scale_factor'] = torch.tensor(scale_factor)
354
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
355
+ sample['render_img_idx'] = torch.tensor(image_perm)
356
+ sample['partial_vol_origin'] = self.partial_vol_origin
357
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
358
+
359
+
360
+ # - image to render
361
+ sample['query_image'] = sample['images'][0]
362
+ sample['query_c2w'] = sample['c2ws'][0]
363
+ sample['query_w2c'] = sample['w2cs'][0]
364
+ sample['query_intrinsic'] = sample['intrinsics'][0]
365
+ sample['query_depth'] = sample['depths_h'][0]
366
+ sample['query_mask'] = sample['masks_h'][0]
367
+ sample['query_near_far'] = sample['near_fars'][0]
368
+
369
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
370
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
371
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
372
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
373
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
374
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
375
+ sample['view_ids'] = sample['view_ids'][start_idx:]
376
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
377
+
378
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
379
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
380
+
381
+ # - generate rays
382
+ if ('val' in self.split) or ('test' in self.split):
383
+ sample_rays = gen_rays_from_single_image(
384
+ img_wh[1], img_wh[0],
385
+ sample['query_image'],
386
+ sample['query_intrinsic'],
387
+ sample['query_c2w'],
388
+ depth=sample['query_depth'],
389
+ mask=sample['query_mask'] if self.clean_image else None)
390
+
391
+ else:
392
+ sample_rays = gen_random_rays_from_single_image(
393
+ img_wh[1], img_wh[0],
394
+ self.N_rays,
395
+ sample['query_image'],
396
+ sample['query_intrinsic'],
397
+ sample['query_c2w'],
398
+ depth=sample['query_depth'],
399
+ mask=sample['query_mask'] if self.clean_image else None,
400
+ dilated_mask=mask_dilated,
401
+ importance_sample=self.importance_sample)
402
+
403
+
404
+ sample['rays'] = sample_rays
405
+
406
+ return sample
SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_4.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_2stage_5pred_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
155
+ depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
156
+ interpolation=cv2.INTER_NEAREST) # (600, 800)
157
+ depth_h = depth_h[44:556, 80:720] # (512, 640)
158
+ depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ return depth, depth_h
164
+
165
+ def read_mask(self, filename):
166
+ mask_h = cv2.imread(filename, 0)
167
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
168
+ interpolation=cv2.INTER_NEAREST)
169
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
170
+ interpolation=cv2.INTER_NEAREST)
171
+
172
+ mask[mask > 0] = 1 # the masks stored in png are not binary
173
+ mask_h[mask_h > 0] = 1
174
+
175
+ return mask, mask_h
176
+
177
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
178
+
179
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
180
+ # print("center", center)
181
+ # print("radius", radius)
182
+ # print("bounds", bounds)
183
+ # import ipdb; ipdb.set_trace()
184
+ radius = radius * factor
185
+ scale_mat = np.diag([radius, radius, radius, 1.0])
186
+ scale_mat[:3, 3] = center.cpu().numpy()
187
+ scale_mat = scale_mat.astype(np.float32)
188
+
189
+ return scale_mat, 1. / radius.cpu().numpy()
190
+
191
+ def __len__(self):
192
+ return 6*len(self.lvis_paths)
193
+
194
+
195
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
196
+ pass
197
+
198
+
199
+ def __getitem__(self, idx):
200
+ sample = {}
201
+
202
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
203
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
204
+
205
+
206
+ folder_uid_dict = self.lvis_paths[idx//6]
207
+ idx = idx % 6
208
+
209
+ folder_id = folder_uid_dict['folder_id']
210
+ uid = folder_uid_dict['uid']
211
+
212
+ # idx = idx % 24 # [0, 23]
213
+
214
+
215
+
216
+ # target view
217
+ c2w = self.c2ws[idx]
218
+ w2c = np.linalg.inv(c2w)
219
+ w2c_ref = w2c
220
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
221
+
222
+ w2cs.append(w2c @ w2c_ref_inv)
223
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
224
+
225
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage", folder_id, uid, f'view_0_{idx}_gt.png')
226
+
227
+ depth_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage", folder_id, uid, f'view_0_{idx}_gt_depth_mm.png')
228
+
229
+
230
+ img = Image.open(img_filename)
231
+
232
+ img = self.transform(img) # (4, h, w)
233
+
234
+ # print("img_pre", img.shape)
235
+ if img.shape[0] == 4:
236
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
237
+ # print("img", img.shape)
238
+ imgs += [img]
239
+
240
+
241
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
242
+ mask_h = depth_h > 0
243
+ # print("valid pixels", np.sum(mask_h))
244
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
245
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
246
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
247
+ depth_h = distance
248
+ # print("depth_h", depth_h.shape)
249
+
250
+ depths_h.append(depth_h)
251
+ masks_h.append(mask_h)
252
+
253
+ intrinsic = self.intrinsic
254
+ intrinsics.append(intrinsic)
255
+
256
+
257
+ near_fars.append(self.near_fars[idx])
258
+ image_perm = 0 # only supervised on reference view
259
+
260
+ mask_dilated = None
261
+
262
+ # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
263
+
264
+
265
+ src_views = range(6+idx*4, 6+(idx+1)*4)
266
+
267
+ for vid in src_views:
268
+ # if vid == idx:
269
+ # continue
270
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_{vid % 4 + 1}.png')
271
+
272
+ img = Image.open(img_filename)
273
+ img_wh = self.img_wh
274
+
275
+ img = self.transform(img)
276
+ # print("img shape1: ", img.shape)
277
+ if img.shape[0] == 4:
278
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
279
+ # print("img shape2: ", img.shape)
280
+ imgs += [img]
281
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
282
+ depths_h.append(depth_h)
283
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
284
+
285
+ near_fars.append(self.all_near_fars[vid])
286
+ intrinsics.append(self.all_intrinsics[vid])
287
+
288
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
289
+
290
+
291
+ # ! estimate scale_mat
292
+ scale_mat, scale_factor = self.cal_scale_mat(
293
+ img_hw=[img_wh[1], img_wh[0]],
294
+ intrinsics=intrinsics, extrinsics=w2cs,
295
+ near_fars=near_fars, factor=1.1
296
+ )
297
+ # print(scale_mat)
298
+ # print(scale_factor)
299
+ # ! calculate the new w2cs after scaling
300
+ new_near_fars = []
301
+ new_w2cs = []
302
+ new_c2ws = []
303
+ new_affine_mats = []
304
+ new_depths_h = []
305
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
306
+
307
+ P = intrinsic @ extrinsic @ scale_mat
308
+ P = P[:3, :4]
309
+ # - should use load_K_Rt_from_P() to obtain c2w
310
+ c2w = load_K_Rt_from_P(None, P)[1]
311
+ w2c = np.linalg.inv(c2w)
312
+ new_w2cs.append(w2c)
313
+ new_c2ws.append(c2w)
314
+ affine_mat = np.eye(4)
315
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
316
+ new_affine_mats.append(affine_mat)
317
+
318
+ camera_o = c2w[:3, 3]
319
+ dist = np.sqrt(np.sum(camera_o ** 2))
320
+ near = dist - 1
321
+ far = dist + 1
322
+
323
+ new_near_fars.append([0.95 * near, 1.05 * far])
324
+ new_depths_h.append(depth * scale_factor)
325
+
326
+ # print(new_near_fars)
327
+ # print("imgs: ", len(imgs))
328
+ # print("img1 shape:", imgs[0].shape)
329
+ # print("img2 shape:", imgs[1].shape)
330
+ imgs = torch.stack(imgs).float()
331
+ depths_h = np.stack(new_depths_h)
332
+ masks_h = np.stack(masks_h)
333
+
334
+ affine_mats = np.stack(new_affine_mats)
335
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
336
+ new_near_fars)
337
+
338
+ if self.split == 'train':
339
+ start_idx = 0
340
+ else:
341
+ start_idx = 1
342
+
343
+ view_ids = [idx] + list(src_views)
344
+
345
+ sample['images'] = imgs # (V, 3, H, W)
346
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
347
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
348
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
349
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
350
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
351
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
352
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
353
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
354
+
355
+ # sample['light_idx'] = torch.tensor(light_idx)
356
+ sample['scan'] = folder_id
357
+
358
+ sample['scale_factor'] = torch.tensor(scale_factor)
359
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
360
+ sample['render_img_idx'] = torch.tensor(image_perm)
361
+ sample['partial_vol_origin'] = self.partial_vol_origin
362
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
363
+
364
+
365
+ # - image to render
366
+ sample['query_image'] = sample['images'][0]
367
+ sample['query_c2w'] = sample['c2ws'][0]
368
+ sample['query_w2c'] = sample['w2cs'][0]
369
+ sample['query_intrinsic'] = sample['intrinsics'][0]
370
+ sample['query_depth'] = sample['depths_h'][0]
371
+ sample['query_mask'] = sample['masks_h'][0]
372
+ sample['query_near_far'] = sample['near_fars'][0]
373
+
374
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
375
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
376
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
377
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
378
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
379
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
380
+ sample['view_ids'] = sample['view_ids'][start_idx:]
381
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
382
+
383
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
384
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
385
+
386
+ # - generate rays
387
+ if ('val' in self.split) or ('test' in self.split):
388
+ sample_rays = gen_rays_from_single_image(
389
+ img_wh[1], img_wh[0],
390
+ sample['query_image'],
391
+ sample['query_intrinsic'],
392
+ sample['query_c2w'],
393
+ depth=sample['query_depth'],
394
+ mask=sample['query_mask'] if self.clean_image else None)
395
+
396
+ else:
397
+ sample_rays = gen_random_rays_from_single_image(
398
+ img_wh[1], img_wh[0],
399
+ self.N_rays,
400
+ sample['query_image'],
401
+ sample['query_intrinsic'],
402
+ sample['query_c2w'],
403
+ depth=sample['query_depth'],
404
+ mask=sample['query_mask'] if self.clean_image else None,
405
+ dilated_mask=mask_dilated,
406
+ importance_sample=self.importance_sample)
407
+
408
+
409
+ sample['rays'] = sample_rays
410
+
411
+ return sample
SparseNeuS_demo_v1/data/blender_general_4_narrow_and_4_2_stage_mix.py ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ self.root_dir = root_dir
70
+ self.split = split
71
+ self.imgs_per_instance = 16
72
+ self.n_views = n_views
73
+ self.N_rays = N_rays
74
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
+
76
+ self.clean_image = clean_image
77
+ self.importance_sample = importance_sample
78
+ self.test_ref_views = test_ref_views # used for testing
79
+ self.scale_factor = 1.0
80
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
+
82
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
83
+ with open(lvis_json_path, 'r') as f:
84
+ lvis_paths = json.load(f)
85
+ if self.split == 'train':
86
+ self.lvis_paths = lvis_paths['train']
87
+ else:
88
+ self.lvis_paths = lvis_paths['val']
89
+ if img_wh is not None:
90
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
+ 'img_wh must both be multiples of 32!'
92
+
93
+
94
+ pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
+ with open(pose_json_path_narrow, 'r') as f:
96
+ narrow_meta = json.load(f)
97
+
98
+ pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
99
+ with open(pose_json_path_two_stage, 'r') as f:
100
+ two_stage_meta = json.load(f)
101
+
102
+
103
+ self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4)
104
+ self.img_wh = (256, 256)
105
+ self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
106
+ intrinsic = np.eye(4)
107
+ assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
108
+ intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
109
+ self.intrinsic = intrinsic
110
+ assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
111
+ self.near_far = np.array(narrow_meta["near_far"])
112
+ self.near_far[1] = 1.8
113
+ self.define_transforms()
114
+ self.blender2opencv = np.array(
115
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
+ )
117
+
118
+
119
+ self.c2ws = []
120
+ self.w2cs = []
121
+ self.near_fars = []
122
+ for idx, img_id in enumerate(self.img_ids):
123
+ pose = self.input_poses[idx]
124
+ c2w = pose @ self.blender2opencv
125
+ self.c2ws.append(c2w)
126
+ self.w2cs.append(np.linalg.inv(c2w))
127
+ self.near_fars.append(self.near_far)
128
+
129
+
130
+
131
+ self.c2ws = np.stack(self.c2ws, axis=0)
132
+ self.w2cs = np.stack(self.w2cs, axis=0)
133
+
134
+
135
+ self.all_intrinsics = [] # the cam info of the whole scene
136
+ self.all_extrinsics = []
137
+ self.all_near_fars = []
138
+ self.load_cam_info()
139
+
140
+ # * bounding box for rendering
141
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
143
+
144
+ # - used for cost volume regularization
145
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
+
148
+
149
+ def define_transforms(self):
150
+ self.transform = T.Compose([T.ToTensor()])
151
+
152
+
153
+
154
+ def load_cam_info(self):
155
+ for vid, img_id in enumerate(self.img_ids):
156
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
+ self.all_intrinsics.append(intrinsic)
158
+ self.all_extrinsics.append(extrinsic)
159
+ self.all_near_fars.append(near_far)
160
+
161
+ def read_depth(self, filename):
162
+ pass
163
+
164
+ def read_mask(self, filename):
165
+ mask_h = cv2.imread(filename, 0)
166
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
+ interpolation=cv2.INTER_NEAREST)
168
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
+ interpolation=cv2.INTER_NEAREST)
170
+
171
+ mask[mask > 0] = 1 # the masks stored in png are not binary
172
+ mask_h[mask_h > 0] = 1
173
+
174
+ return mask, mask_h
175
+
176
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
+
178
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
+
180
+ radius = radius * factor
181
+ scale_mat = np.diag([radius, radius, radius, 1.0])
182
+ scale_mat[:3, 3] = center.cpu().numpy()
183
+ scale_mat = scale_mat.astype(np.float32)
184
+
185
+ return scale_mat, 1. / radius.cpu().numpy()
186
+
187
+ def __len__(self):
188
+ return self.imgs_per_instance * len(self.lvis_paths)
189
+
190
+
191
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
+ pass
193
+
194
+
195
+ def __getitem__(self, idx):
196
+ sample = {}
197
+ origin_idx = idx
198
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
+ idx_original=idx
201
+
202
+ folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
203
+
204
+ folder_id = folder_uid_dict['folder_id']
205
+ uid = folder_uid_dict['uid']
206
+
207
+ if idx % 2 == 0:
208
+ valid_list = [0, 2, 4, 6]
209
+ else:
210
+ valid_list = [1, 3, 5, 7]
211
+
212
+ if idx % 16 < 8:
213
+ idx = idx % 16 # [0, 7]
214
+ # target view
215
+ c2w = self.c2ws[idx]
216
+ w2c = np.linalg.inv(c2w)
217
+ w2c_ref = w2c
218
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
219
+
220
+ w2cs.append(w2c @ w2c_ref_inv)
221
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
222
+
223
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
224
+
225
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
226
+
227
+
228
+ img = Image.open(img_filename)
229
+
230
+ img = self.transform(img) # (4, h, w)
231
+
232
+
233
+ if img.shape[0] == 4:
234
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
235
+ imgs += [img]
236
+
237
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
238
+ mask_h = depth_h > 0
239
+ # print("valid pixels", np.sum(mask_h))
240
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
241
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
242
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
243
+ depth_h = distance
244
+
245
+
246
+ depths_h.append(depth_h)
247
+ masks_h.append(mask_h)
248
+
249
+ intrinsic = self.intrinsic
250
+ intrinsics.append(intrinsic)
251
+
252
+
253
+ near_fars.append(self.near_fars[idx])
254
+ image_perm = 0 # only supervised on reference view
255
+
256
+ mask_dilated = None
257
+
258
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
259
+
260
+ src_views = range(8, 8 + 8 * 4)
261
+ src_views_used = []
262
+ for vid in src_views:
263
+ view_dix_to_use = (vid - 8) // 4
264
+ if view_dix_to_use not in valid_list:
265
+ continue
266
+ src_views_used.append(vid)
267
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
268
+
269
+ img = Image.open(img_filename)
270
+ img_wh = self.img_wh
271
+
272
+ img = self.transform(img)
273
+ if img.shape[0] == 4:
274
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
275
+
276
+ imgs += [img]
277
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
278
+ depths_h.append(depth_h)
279
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
280
+
281
+ near_fars.append(self.all_near_fars[vid])
282
+ intrinsics.append(self.all_intrinsics[vid])
283
+
284
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
285
+
286
+ else:
287
+ idx = idx % 16 - 8 # [0, 7]
288
+
289
+ c2w = self.c2ws[idx + 40]
290
+ w2c = np.linalg.inv(c2w)
291
+ w2c_ref = w2c
292
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
293
+
294
+ w2cs.append(w2c @ w2c_ref_inv)
295
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
296
+
297
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
298
+
299
+
300
+
301
+ img = Image.open(img_filename)
302
+
303
+ img = self.transform(img) # (4, h, w)
304
+
305
+ # print("img_pre", img.shape)
306
+ if img.shape[0] == 4:
307
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
308
+ # print("img", img.shape)
309
+ imgs += [img]
310
+
311
+
312
+ depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
313
+ depth_h = depth_h.fill_(-1.0)
314
+ # depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0)
315
+ # print("depth_h", depth_h.shape)
316
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
317
+ depths_h.append(depth_h)
318
+ masks_h.append(mask_h)
319
+
320
+ intrinsic = self.intrinsic
321
+ intrinsics.append(intrinsic)
322
+
323
+
324
+ near_fars.append(self.near_fars[idx])
325
+ image_perm = 0 # only supervised on reference view
326
+
327
+ mask_dilated = None
328
+
329
+
330
+
331
+ src_views = range(40+8, 40+8+32)
332
+ src_views_used = []
333
+ for vid in src_views:
334
+ view_dix_to_use = (vid - 40 - 8) // 4
335
+ if view_dix_to_use not in valid_list:
336
+ continue
337
+ src_views_used.append(vid)
338
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-48) % 4 + 1}.png')
339
+
340
+ img = Image.open(img_filename)
341
+ img_wh = self.img_wh
342
+
343
+ img = self.transform(img)
344
+ # print("img shape1: ", img.shape)
345
+ if img.shape[0] == 4:
346
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
347
+ # print("img shape2: ", img.shape)
348
+ imgs += [img]
349
+ depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
350
+ depth_h = depth_h.fill_(-1.0)
351
+ depths_h.append(depth_h)
352
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
353
+
354
+ near_fars.append(self.all_near_fars[vid])
355
+ intrinsics.append(self.all_intrinsics[vid])
356
+
357
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
358
+
359
+
360
+ scale_mat, scale_factor = self.cal_scale_mat(
361
+ img_hw=[img_wh[1], img_wh[0]],
362
+ intrinsics=intrinsics, extrinsics=w2cs,
363
+ near_fars=near_fars, factor=1.1
364
+ )
365
+
366
+
367
+ new_near_fars = []
368
+ new_w2cs = []
369
+ new_c2ws = []
370
+ new_affine_mats = []
371
+ new_depths_h = []
372
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
373
+
374
+ P = intrinsic @ extrinsic @ scale_mat
375
+ P = P[:3, :4]
376
+ # - should use load_K_Rt_from_P() to obtain c2w
377
+ c2w = load_K_Rt_from_P(None, P)[1]
378
+ w2c = np.linalg.inv(c2w)
379
+ new_w2cs.append(w2c)
380
+ new_c2ws.append(c2w)
381
+ affine_mat = np.eye(4)
382
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
383
+ new_affine_mats.append(affine_mat)
384
+
385
+ camera_o = c2w[:3, 3]
386
+ dist = np.sqrt(np.sum(camera_o ** 2))
387
+ near = dist - 1
388
+ far = dist + 1
389
+
390
+ new_near_fars.append([0.95 * near, 1.05 * far])
391
+
392
+ new_depths_h.append(depth * scale_factor)
393
+
394
+ # print(new_near_fars)
395
+ # print("img numeber: ", len(imgs))
396
+ imgs = torch.stack(imgs).float()
397
+ depths_h = np.stack(new_depths_h)
398
+ masks_h = np.stack(masks_h)
399
+
400
+ affine_mats = np.stack(new_affine_mats)
401
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
402
+ new_near_fars)
403
+
404
+ if self.split == 'train':
405
+ start_idx = 0
406
+ else:
407
+ start_idx = 1
408
+
409
+ view_ids = [idx_original % self.imgs_per_instance] + src_views_used
410
+ sample['origin_idx'] = origin_idx
411
+ sample['images'] = imgs # (V, 3, H, W)
412
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
413
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
414
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
415
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
416
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
417
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
418
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
419
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
420
+
421
+ # sample['light_idx'] = torch.tensor(light_idx)
422
+ sample['scan'] = folder_id
423
+
424
+ sample['scale_factor'] = torch.tensor(scale_factor)
425
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
426
+ sample['render_img_idx'] = torch.tensor(image_perm)
427
+ sample['partial_vol_origin'] = self.partial_vol_origin
428
+ if view_ids[0] < 8:
429
+ meta_end = "_narrow"+ "_refview" + str(view_ids[0])
430
+ else:
431
+ meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
432
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
433
+
434
+
435
+ # - image to render
436
+ sample['query_image'] = sample['images'][0]
437
+ sample['query_c2w'] = sample['c2ws'][0]
438
+ sample['query_w2c'] = sample['w2cs'][0]
439
+ sample['query_intrinsic'] = sample['intrinsics'][0]
440
+ sample['query_depth'] = sample['depths_h'][0]
441
+ sample['query_mask'] = sample['masks_h'][0]
442
+ sample['query_near_far'] = sample['near_fars'][0]
443
+
444
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
445
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
446
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
447
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
448
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
449
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
450
+ sample['view_ids'] = sample['view_ids'][start_idx:]
451
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
452
+
453
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
454
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
455
+
456
+ # - generate rays
457
+ if ('val' in self.split) or ('test' in self.split):
458
+ sample_rays = gen_rays_from_single_image(
459
+ img_wh[1], img_wh[0],
460
+ sample['query_image'],
461
+ sample['query_intrinsic'],
462
+ sample['query_c2w'],
463
+ depth=sample['query_depth'],
464
+ mask=sample['query_mask'] if self.clean_image else None)
465
+ else:
466
+ sample_rays = gen_random_rays_from_single_image(
467
+ img_wh[1], img_wh[0],
468
+ self.N_rays,
469
+ sample['query_image'],
470
+ sample['query_intrinsic'],
471
+ sample['query_c2w'],
472
+ depth=sample['query_depth'],
473
+ mask=sample['query_mask'] if self.clean_image else None,
474
+ dilated_mask=mask_dilated,
475
+ importance_sample=self.importance_sample)
476
+
477
+
478
+ sample['rays'] = sample_rays
479
+
480
+ return sample
SparseNeuS_demo_v1/data/blender_general_4_narrow_and_6_2_stage_mix.py ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ self.root_dir = root_dir
70
+ self.split = split
71
+
72
+ self.n_views = n_views
73
+ self.N_rays = N_rays
74
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
+
76
+ self.clean_image = clean_image
77
+ self.importance_sample = importance_sample
78
+ self.test_ref_views = test_ref_views # used for testing
79
+ self.scale_factor = 1.0
80
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
+
82
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
83
+ with open(lvis_json_path, 'r') as f:
84
+ lvis_paths = json.load(f)
85
+ if self.split == 'train':
86
+ self.lvis_paths = lvis_paths['train']
87
+ else:
88
+ self.lvis_paths = lvis_paths['val']
89
+ if img_wh is not None:
90
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
+ 'img_wh must both be multiples of 32!'
92
+
93
+
94
+ pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
+ with open(pose_json_path_narrow, 'r') as f:
96
+ narrow_meta = json.load(f)
97
+
98
+ pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_5pred_pose.json"
99
+ with open(pose_json_path_two_stage, 'r') as f:
100
+ two_stage_meta = json.load(f)
101
+
102
+
103
+ self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (6 + 6*4)
104
+ self.img_wh = (256, 256)
105
+ self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
106
+ intrinsic = np.eye(4)
107
+ assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
108
+ intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
109
+ self.intrinsic = intrinsic
110
+ assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
111
+ self.near_far = np.array(narrow_meta["near_far"])
112
+ self.near_far[1] = 1.8
113
+ self.define_transforms()
114
+ self.blender2opencv = np.array(
115
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
+ )
117
+
118
+
119
+ self.c2ws = []
120
+ self.w2cs = []
121
+ self.near_fars = []
122
+ for idx, img_id in enumerate(self.img_ids):
123
+ pose = self.input_poses[idx]
124
+ c2w = pose @ self.blender2opencv
125
+ self.c2ws.append(c2w)
126
+ self.w2cs.append(np.linalg.inv(c2w))
127
+ self.near_fars.append(self.near_far)
128
+
129
+
130
+
131
+ self.c2ws = np.stack(self.c2ws, axis=0)
132
+ self.w2cs = np.stack(self.w2cs, axis=0)
133
+
134
+
135
+ self.all_intrinsics = [] # the cam info of the whole scene
136
+ self.all_extrinsics = []
137
+ self.all_near_fars = []
138
+ self.load_cam_info()
139
+
140
+ # * bounding box for rendering
141
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
143
+
144
+ # - used for cost volume regularization
145
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
+
148
+
149
+ def define_transforms(self):
150
+ self.transform = T.Compose([T.ToTensor()])
151
+
152
+
153
+
154
+ def load_cam_info(self):
155
+ for vid, img_id in enumerate(self.img_ids):
156
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
+ self.all_intrinsics.append(intrinsic)
158
+ self.all_extrinsics.append(extrinsic)
159
+ self.all_near_fars.append(near_far)
160
+
161
+ def read_depth(self, filename):
162
+ pass
163
+
164
+ def read_mask(self, filename):
165
+ mask_h = cv2.imread(filename, 0)
166
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
+ interpolation=cv2.INTER_NEAREST)
168
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
+ interpolation=cv2.INTER_NEAREST)
170
+
171
+ mask[mask > 0] = 1 # the masks stored in png are not binary
172
+ mask_h[mask_h > 0] = 1
173
+
174
+ return mask, mask_h
175
+
176
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
+
178
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
+
180
+ radius = radius * factor
181
+ scale_mat = np.diag([radius, radius, radius, 1.0])
182
+ scale_mat[:3, 3] = center.cpu().numpy()
183
+ scale_mat = scale_mat.astype(np.float32)
184
+
185
+ return scale_mat, 1. / radius.cpu().numpy()
186
+
187
+ def __len__(self):
188
+ return 12*len(self.lvis_paths)
189
+
190
+
191
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
+ pass
193
+
194
+
195
+ def __getitem__(self, idx):
196
+ sample = {}
197
+ origin_idx = idx
198
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
+ idx_original=idx
201
+
202
+ folder_uid_dict = self.lvis_paths[idx//12]
203
+
204
+ folder_id = folder_uid_dict['folder_id']
205
+ uid = folder_uid_dict['uid']
206
+
207
+ if idx % 12 < 8:
208
+ idx = idx % 12 # [0, 7]
209
+ # target view
210
+ c2w = self.c2ws[idx]
211
+ w2c = np.linalg.inv(c2w)
212
+ w2c_ref = w2c
213
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
214
+
215
+ w2cs.append(w2c @ w2c_ref_inv)
216
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
217
+
218
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
219
+
220
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
221
+
222
+
223
+ img = Image.open(img_filename)
224
+
225
+ img = self.transform(img) # (4, h, w)
226
+
227
+
228
+ if img.shape[0] == 4:
229
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
230
+ imgs += [img]
231
+
232
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
233
+ mask_h = depth_h > 0
234
+ # print("valid pixels", np.sum(mask_h))
235
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
237
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
+ depth_h = distance
239
+
240
+
241
+ depths_h.append(depth_h)
242
+ masks_h.append(mask_h)
243
+
244
+ intrinsic = self.intrinsic
245
+ intrinsics.append(intrinsic)
246
+
247
+
248
+ near_fars.append(self.near_fars[idx])
249
+ image_perm = 0 # only supervised on reference view
250
+
251
+ mask_dilated = None
252
+
253
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
254
+
255
+ src_views = range(8, 8 + 8 * 4)
256
+ src_views_used = []
257
+ for vid in src_views:
258
+ if (vid // 4) % 2 != idx % 2:
259
+ continue
260
+ src_views_used.append(vid)
261
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
262
+
263
+ img = Image.open(img_filename)
264
+ img_wh = self.img_wh
265
+
266
+ img = self.transform(img)
267
+ if img.shape[0] == 4:
268
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
269
+
270
+ imgs += [img]
271
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
272
+ depths_h.append(depth_h)
273
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
274
+
275
+ near_fars.append(self.all_near_fars[vid])
276
+ intrinsics.append(self.all_intrinsics[vid])
277
+
278
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
279
+
280
+ else:
281
+ idx = idx % 12 - 8 # [0, 5]
282
+ valid_list = [0, 2, 3, 5]
283
+ idx = valid_list[idx] # [0, 3]
284
+ c2w = self.c2ws[idx + 40]
285
+ w2c = np.linalg.inv(c2w)
286
+ w2c_ref = w2c
287
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
288
+
289
+ w2cs.append(w2c @ w2c_ref_inv)
290
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
291
+
292
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_5pred/", folder_id, uid, f'view_0_{idx}_0.png')
293
+
294
+
295
+
296
+ img = Image.open(img_filename)
297
+
298
+ img = self.transform(img) # (4, h, w)
299
+
300
+ # print("img_pre", img.shape)
301
+ if img.shape[0] == 4:
302
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
303
+ # print("img", img.shape)
304
+ imgs += [img]
305
+
306
+
307
+ depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
308
+ depth_h = depth_h.fill_(-1.0)
309
+ # depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0)
310
+ # print("depth_h", depth_h.shape)
311
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
312
+ depths_h.append(depth_h)
313
+ masks_h.append(mask_h)
314
+
315
+ intrinsic = self.intrinsic
316
+ intrinsics.append(intrinsic)
317
+
318
+
319
+ near_fars.append(self.near_fars[idx])
320
+ image_perm = 0 # only supervised on reference view
321
+
322
+ mask_dilated = None
323
+
324
+ # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
325
+
326
+
327
+ src_views = range(40+6, 40+6+24)
328
+ src_views_used = []
329
+ for vid in src_views:
330
+ view_dix_to_use = (vid - 40 - 6) // 4
331
+ if view_dix_to_use not in valid_list:
332
+ continue
333
+ src_views_used.append(vid)
334
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_5pred/", folder_id, uid, f'view_0_{idx}_{(vid-46) % 4 + 1}.png')
335
+
336
+ img = Image.open(img_filename)
337
+ img_wh = self.img_wh
338
+
339
+ img = self.transform(img)
340
+ # print("img shape1: ", img.shape)
341
+ if img.shape[0] == 4:
342
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
343
+ # print("img shape2: ", img.shape)
344
+ imgs += [img]
345
+ depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
346
+ depth_h = depth_h.fill_(-1.0)
347
+ depths_h.append(depth_h)
348
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
349
+
350
+ near_fars.append(self.all_near_fars[vid])
351
+ intrinsics.append(self.all_intrinsics[vid])
352
+
353
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
354
+
355
+
356
+ scale_mat, scale_factor = self.cal_scale_mat(
357
+ img_hw=[img_wh[1], img_wh[0]],
358
+ intrinsics=intrinsics, extrinsics=w2cs,
359
+ near_fars=near_fars, factor=1.1
360
+ )
361
+
362
+
363
+ new_near_fars = []
364
+ new_w2cs = []
365
+ new_c2ws = []
366
+ new_affine_mats = []
367
+ new_depths_h = []
368
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
369
+
370
+ P = intrinsic @ extrinsic @ scale_mat
371
+ P = P[:3, :4]
372
+ # - should use load_K_Rt_from_P() to obtain c2w
373
+ c2w = load_K_Rt_from_P(None, P)[1]
374
+ w2c = np.linalg.inv(c2w)
375
+ new_w2cs.append(w2c)
376
+ new_c2ws.append(c2w)
377
+ affine_mat = np.eye(4)
378
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
379
+ new_affine_mats.append(affine_mat)
380
+
381
+ camera_o = c2w[:3, 3]
382
+ dist = np.sqrt(np.sum(camera_o ** 2))
383
+ near = dist - 1
384
+ far = dist + 1
385
+
386
+ new_near_fars.append([0.95 * near, 1.05 * far])
387
+
388
+ new_depths_h.append(depth * scale_factor)
389
+
390
+ # print(new_near_fars)
391
+ # print("img numeber: ", len(imgs))
392
+ imgs = torch.stack(imgs).float()
393
+ depths_h = np.stack(new_depths_h)
394
+ masks_h = np.stack(masks_h)
395
+
396
+ affine_mats = np.stack(new_affine_mats)
397
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
398
+ new_near_fars)
399
+
400
+ if self.split == 'train':
401
+ start_idx = 0
402
+ else:
403
+ start_idx = 1
404
+
405
+ view_ids = [idx_original % 12] + src_views_used
406
+ sample['origin_idx'] = origin_idx
407
+ sample['images'] = imgs # (V, 3, H, W)
408
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
409
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
410
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
411
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
412
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
413
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
414
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
415
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
416
+
417
+ # sample['light_idx'] = torch.tensor(light_idx)
418
+ sample['scan'] = folder_id
419
+
420
+ sample['scale_factor'] = torch.tensor(scale_factor)
421
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
422
+ sample['render_img_idx'] = torch.tensor(image_perm)
423
+ sample['partial_vol_origin'] = self.partial_vol_origin
424
+ if view_ids[0] < 8:
425
+ meta_end = "_narrow"+ "_refview" + str(view_ids[0])
426
+ else:
427
+ meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
428
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
429
+
430
+
431
+ # - image to render
432
+ sample['query_image'] = sample['images'][0]
433
+ sample['query_c2w'] = sample['c2ws'][0]
434
+ sample['query_w2c'] = sample['w2cs'][0]
435
+ sample['query_intrinsic'] = sample['intrinsics'][0]
436
+ sample['query_depth'] = sample['depths_h'][0]
437
+ sample['query_mask'] = sample['masks_h'][0]
438
+ sample['query_near_far'] = sample['near_fars'][0]
439
+
440
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
441
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
442
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
443
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
444
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
445
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
446
+ sample['view_ids'] = sample['view_ids'][start_idx:]
447
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
448
+
449
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
450
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
451
+
452
+ # - generate rays
453
+ if ('val' in self.split) or ('test' in self.split):
454
+ sample_rays = gen_rays_from_single_image(
455
+ img_wh[1], img_wh[0],
456
+ sample['query_image'],
457
+ sample['query_intrinsic'],
458
+ sample['query_c2w'],
459
+ depth=sample['query_depth'],
460
+ mask=sample['query_mask'] if self.clean_image else None)
461
+ else:
462
+ sample_rays = gen_random_rays_from_single_image(
463
+ img_wh[1], img_wh[0],
464
+ self.N_rays,
465
+ sample['query_image'],
466
+ sample['query_intrinsic'],
467
+ sample['query_c2w'],
468
+ depth=sample['query_depth'],
469
+ mask=sample['query_mask'] if self.clean_image else None,
470
+ dilated_mask=mask_dilated,
471
+ importance_sample=self.importance_sample)
472
+
473
+
474
+ sample['rays'] = sample_rays
475
+
476
+ return sample
SparseNeuS_demo_v1/data/blender_general_6_narrow_and_6_2_stage_blend_mix.py ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ self.root_dir = root_dir
70
+ self.split = split
71
+ if self.split == 'train':
72
+ self.imgs_per_instance = 12
73
+ else:
74
+ self.imgs_per_instance = 16
75
+ self.n_views = n_views
76
+ self.N_rays = N_rays
77
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
78
+
79
+ self.clean_image = clean_image
80
+ self.importance_sample = importance_sample
81
+ self.test_ref_views = test_ref_views # used for testing
82
+ self.scale_factor = 1.0
83
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
84
+
85
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
86
+ with open(lvis_json_path, 'r') as f:
87
+ lvis_paths = json.load(f)
88
+ if self.split == 'train':
89
+ self.lvis_paths = lvis_paths['train']
90
+ else:
91
+ self.lvis_paths = lvis_paths['val']
92
+ if img_wh is not None:
93
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
94
+ 'img_wh must both be multiples of 32!'
95
+
96
+
97
+ pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
98
+ with open(pose_json_path_narrow, 'r') as f:
99
+ narrow_meta = json.load(f)
100
+
101
+ pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
102
+ with open(pose_json_path_two_stage, 'r') as f:
103
+ two_stage_meta = json.load(f)
104
+
105
+
106
+ self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4)
107
+ self.img_wh = (256, 256)
108
+ self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
109
+ intrinsic = np.eye(4)
110
+ assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
111
+ intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
112
+ self.intrinsic = intrinsic
113
+ assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
114
+ self.near_far = np.array(narrow_meta["near_far"])
115
+ self.near_far[1] = 1.8
116
+ self.define_transforms()
117
+ self.blender2opencv = np.array(
118
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
119
+ )
120
+
121
+
122
+ self.c2ws = []
123
+ self.w2cs = []
124
+ self.near_fars = []
125
+ for idx, img_id in enumerate(self.img_ids):
126
+ pose = self.input_poses[idx]
127
+ c2w = pose @ self.blender2opencv
128
+ self.c2ws.append(c2w)
129
+ self.w2cs.append(np.linalg.inv(c2w))
130
+ self.near_fars.append(self.near_far)
131
+
132
+
133
+
134
+ self.c2ws = np.stack(self.c2ws, axis=0)
135
+ self.w2cs = np.stack(self.w2cs, axis=0)
136
+
137
+
138
+ self.all_intrinsics = [] # the cam info of the whole scene
139
+ self.all_extrinsics = []
140
+ self.all_near_fars = []
141
+ self.load_cam_info()
142
+
143
+ # * bounding box for rendering
144
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
145
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
146
+
147
+ # - used for cost volume regularization
148
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
149
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
150
+
151
+
152
+ def define_transforms(self):
153
+ self.transform = T.Compose([T.ToTensor()])
154
+
155
+
156
+
157
+ def load_cam_info(self):
158
+ for vid, img_id in enumerate(self.img_ids):
159
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
160
+ self.all_intrinsics.append(intrinsic)
161
+ self.all_extrinsics.append(extrinsic)
162
+ self.all_near_fars.append(near_far)
163
+
164
+ def read_depth(self, filename):
165
+ pass
166
+
167
+ def read_mask(self, filename):
168
+ mask_h = cv2.imread(filename, 0)
169
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
170
+ interpolation=cv2.INTER_NEAREST)
171
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
172
+ interpolation=cv2.INTER_NEAREST)
173
+
174
+ mask[mask > 0] = 1 # the masks stored in png are not binary
175
+ mask_h[mask_h > 0] = 1
176
+
177
+ return mask, mask_h
178
+
179
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
180
+
181
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
182
+
183
+ radius = radius * factor
184
+ scale_mat = np.diag([radius, radius, radius, 1.0])
185
+ scale_mat[:3, 3] = center.cpu().numpy()
186
+ scale_mat = scale_mat.astype(np.float32)
187
+
188
+ return scale_mat, 1. / radius.cpu().numpy()
189
+
190
+ def __len__(self):
191
+ return self.imgs_per_instance*len(self.lvis_paths)
192
+
193
+
194
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
195
+ pass
196
+
197
+
198
+ def __getitem__(self, idx):
199
+ sample = {}
200
+ origin_idx = idx
201
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
202
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
203
+ idx_original=idx
204
+
205
+ folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
206
+
207
+ folder_id = folder_uid_dict['folder_id']
208
+ uid = folder_uid_dict['uid']
209
+
210
+ if self.split == 'train':
211
+ if idx == 4:
212
+ idx = 5
213
+ elif idx == 5:
214
+ idx = 7
215
+ elif idx == 10:
216
+ idx = 13
217
+ elif idx == 11:
218
+ idx = 15
219
+
220
+ if idx % 16 < 8: # narrow image as target
221
+ idx = idx % 16 # [0, 7]
222
+ # target view
223
+ c2w = self.c2ws[idx]
224
+ w2c = np.linalg.inv(c2w)
225
+ w2c_ref = w2c
226
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
227
+
228
+ w2cs.append(w2c @ w2c_ref_inv)
229
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
230
+
231
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
232
+
233
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
234
+
235
+
236
+ img = Image.open(img_filename)
237
+
238
+ img = self.transform(img) # (4, h, w)
239
+
240
+
241
+ if img.shape[0] == 4:
242
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
243
+ imgs += [img]
244
+
245
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
246
+ mask_h = depth_h > 0
247
+ # print("valid pixels", np.sum(mask_h))
248
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
249
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
250
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
251
+ depth_h = distance
252
+
253
+ else:
254
+ idx = idx % 16 - 8 # [0, 5]
255
+ c2w = self.c2ws[idx + 40]
256
+ w2c = np.linalg.inv(c2w)
257
+ w2c_ref = w2c
258
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
259
+
260
+ w2cs.append(w2c @ w2c_ref_inv)
261
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
262
+
263
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
264
+
265
+ img = Image.open(img_filename)
266
+ img = self.transform(img) # (4, h, w)
267
+
268
+ # print("img_pre", img.shape)
269
+ if img.shape[0] == 4:
270
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
271
+ # print("img", img.shape)
272
+ imgs += [img]
273
+
274
+ depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
275
+ depth_h = depth_h.fill_(-1.0)
276
+
277
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
278
+ depths_h.append(depth_h)
279
+ masks_h.append(mask_h)
280
+
281
+ intrinsic = self.intrinsic
282
+ intrinsics.append(intrinsic)
283
+
284
+
285
+ near_fars.append(self.near_fars[idx])
286
+ image_perm = 0 # only supervised on reference view
287
+
288
+ mask_dilated = None
289
+ if_use_narrow = []
290
+ if self.split == 'train':
291
+ for i in range(8):
292
+ if np.random.random() > 0.5:
293
+ if_use_narrow.append(True) # use narrow
294
+ else:
295
+ if_use_narrow.append(False) # 2-stage prediction
296
+ if_use_narrow[origin_idx % 8] = True if origin_idx < 8 else False
297
+ else:
298
+ for i in range(8):
299
+ if_use_narrow.append( True if origin_idx < 8 else False)
300
+ src_views = range(8, 8 + 8 * 4)
301
+ src_views_used = []
302
+ for vid in src_views:
303
+ if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6):
304
+ continue
305
+ src_views_used.append(vid)
306
+ cur_view_id = (vid - 8) // 4
307
+ # choose narrow
308
+ if if_use_narrow[cur_view_id]:
309
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
310
+ else: # choose 2-stage
311
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{(vid - 8) // 4}_{(vid-8) % 4 + 1}.png')
312
+
313
+ img = Image.open(img_filename)
314
+ img_wh = self.img_wh
315
+
316
+ img = self.transform(img)
317
+ if img.shape[0] == 4:
318
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
319
+
320
+ imgs += [img]
321
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
322
+ depths_h.append(depth_h)
323
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
324
+
325
+ near_fars.append(self.all_near_fars[vid])
326
+ intrinsics.append(self.all_intrinsics[vid])
327
+
328
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
329
+
330
+
331
+ scale_mat, scale_factor = self.cal_scale_mat(
332
+ img_hw=[img_wh[1], img_wh[0]],
333
+ intrinsics=intrinsics, extrinsics=w2cs,
334
+ near_fars=near_fars, factor=1.1
335
+ )
336
+
337
+
338
+ new_near_fars = []
339
+ new_w2cs = []
340
+ new_c2ws = []
341
+ new_affine_mats = []
342
+ new_depths_h = []
343
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
344
+
345
+ P = intrinsic @ extrinsic @ scale_mat
346
+ P = P[:3, :4]
347
+ # - should use load_K_Rt_from_P() to obtain c2w
348
+ c2w = load_K_Rt_from_P(None, P)[1]
349
+ w2c = np.linalg.inv(c2w)
350
+ new_w2cs.append(w2c)
351
+ new_c2ws.append(c2w)
352
+ affine_mat = np.eye(4)
353
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
354
+ new_affine_mats.append(affine_mat)
355
+
356
+ camera_o = c2w[:3, 3]
357
+ dist = np.sqrt(np.sum(camera_o ** 2))
358
+ near = dist - 1
359
+ far = dist + 1
360
+
361
+ new_near_fars.append([0.95 * near, 1.05 * far])
362
+ new_depths_h.append(depth * scale_factor)
363
+
364
+
365
+ imgs = torch.stack(imgs).float()
366
+ depths_h = np.stack(new_depths_h)
367
+ masks_h = np.stack(masks_h)
368
+
369
+ affine_mats = np.stack(new_affine_mats)
370
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
371
+ new_near_fars)
372
+
373
+ if self.split == 'train':
374
+ start_idx = 0
375
+ else:
376
+ start_idx = 1
377
+
378
+ view_ids = [idx_original % self.imgs_per_instance] + src_views_used
379
+ sample['origin_idx'] = origin_idx
380
+ sample['images'] = imgs # (V, 3, H, W)
381
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
382
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
383
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
384
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
385
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
386
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
387
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
388
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
389
+
390
+ # sample['light_idx'] = torch.tensor(light_idx)
391
+ sample['scan'] = folder_id
392
+
393
+ sample['scale_factor'] = torch.tensor(scale_factor)
394
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
395
+ sample['render_img_idx'] = torch.tensor(image_perm)
396
+ sample['partial_vol_origin'] = self.partial_vol_origin
397
+ if view_ids[0] < 8:
398
+ meta_end = "_narrow"+ "_refview" + str(view_ids[0])
399
+ else:
400
+ meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
401
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
402
+
403
+
404
+ # - image to render
405
+ sample['query_image'] = sample['images'][0]
406
+ sample['query_c2w'] = sample['c2ws'][0]
407
+ sample['query_w2c'] = sample['w2cs'][0]
408
+ sample['query_intrinsic'] = sample['intrinsics'][0]
409
+ sample['query_depth'] = sample['depths_h'][0]
410
+ sample['query_mask'] = sample['masks_h'][0]
411
+ sample['query_near_far'] = sample['near_fars'][0]
412
+
413
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
414
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
415
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
416
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
417
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
418
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
419
+ sample['view_ids'] = sample['view_ids'][start_idx:]
420
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
421
+
422
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
423
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
424
+
425
+ # - generate rays
426
+ if ('val' in self.split) or ('test' in self.split):
427
+ sample_rays = gen_rays_from_single_image(
428
+ img_wh[1], img_wh[0],
429
+ sample['query_image'],
430
+ sample['query_intrinsic'],
431
+ sample['query_c2w'],
432
+ depth=sample['query_depth'],
433
+ mask=sample['query_mask'] if self.clean_image else None)
434
+ else:
435
+ sample_rays = gen_random_rays_from_single_image(
436
+ img_wh[1], img_wh[0],
437
+ self.N_rays,
438
+ sample['query_image'],
439
+ sample['query_intrinsic'],
440
+ sample['query_c2w'],
441
+ depth=sample['query_depth'],
442
+ mask=sample['query_mask'] if self.clean_image else None,
443
+ dilated_mask=mask_dilated,
444
+ importance_sample=self.importance_sample)
445
+
446
+
447
+ sample['rays'] = sample_rays
448
+
449
+ return sample
SparseNeuS_demo_v1/data/blender_general_8_2_stage.py ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ self.root_dir = root_dir
70
+ self.split = split
71
+
72
+ self.imgs_per_instance = 8
73
+
74
+ self.n_views = n_views
75
+ self.N_rays = N_rays
76
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
77
+
78
+ self.clean_image = clean_image
79
+ self.importance_sample = importance_sample
80
+ self.test_ref_views = test_ref_views # used for testing
81
+ self.scale_factor = 1.0
82
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
83
+
84
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
85
+ with open(lvis_json_path, 'r') as f:
86
+ lvis_paths = json.load(f)
87
+ if self.split == 'train':
88
+ self.lvis_paths = lvis_paths['train']
89
+ else:
90
+ self.lvis_paths = lvis_paths['val']
91
+ if img_wh is not None:
92
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
93
+ 'img_wh must both be multiples of 32!'
94
+
95
+
96
+ pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
97
+ with open(pose_json_path_narrow, 'r') as f:
98
+ narrow_meta = json.load(f)
99
+
100
+ pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
101
+ with open(pose_json_path_two_stage, 'r') as f:
102
+ two_stage_meta = json.load(f)
103
+
104
+
105
+ self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
106
+ self.img_wh = (256, 256)
107
+ self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
108
+ intrinsic = np.eye(4)
109
+ assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
110
+ intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
111
+ self.intrinsic = intrinsic
112
+ assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
113
+ self.near_far = np.array(narrow_meta["near_far"])
114
+ self.near_far[1] = 1.8
115
+ self.define_transforms()
116
+ self.blender2opencv = np.array(
117
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
118
+ )
119
+
120
+
121
+ self.c2ws = []
122
+ self.w2cs = []
123
+ self.near_fars = []
124
+ for idx, img_id in enumerate(self.img_ids):
125
+ pose = self.input_poses[idx]
126
+ c2w = pose @ self.blender2opencv
127
+ self.c2ws.append(c2w)
128
+ self.w2cs.append(np.linalg.inv(c2w))
129
+ self.near_fars.append(self.near_far)
130
+
131
+
132
+
133
+ self.c2ws = np.stack(self.c2ws, axis=0)
134
+ self.w2cs = np.stack(self.w2cs, axis=0)
135
+
136
+
137
+ self.all_intrinsics = [] # the cam info of the whole scene
138
+ self.all_extrinsics = []
139
+ self.all_near_fars = []
140
+ self.load_cam_info()
141
+
142
+ # * bounding box for rendering
143
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
144
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
145
+
146
+ # - used for cost volume regularization
147
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
148
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
149
+
150
+
151
+ def define_transforms(self):
152
+ self.transform = T.Compose([T.ToTensor()])
153
+
154
+
155
+
156
+ def load_cam_info(self):
157
+ for vid, img_id in enumerate(self.img_ids):
158
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
159
+ self.all_intrinsics.append(intrinsic)
160
+ self.all_extrinsics.append(extrinsic)
161
+ self.all_near_fars.append(near_far)
162
+
163
+ def read_depth(self, filename):
164
+ pass
165
+
166
+ def read_mask(self, filename):
167
+ mask_h = cv2.imread(filename, 0)
168
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
169
+ interpolation=cv2.INTER_NEAREST)
170
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
171
+ interpolation=cv2.INTER_NEAREST)
172
+
173
+ mask[mask > 0] = 1 # the masks stored in png are not binary
174
+ mask_h[mask_h > 0] = 1
175
+
176
+ return mask, mask_h
177
+
178
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
179
+
180
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
181
+
182
+ radius = radius * factor
183
+ scale_mat = np.diag([radius, radius, radius, 1.0])
184
+ scale_mat[:3, 3] = center.cpu().numpy()
185
+ scale_mat = scale_mat.astype(np.float32)
186
+
187
+ return scale_mat, 1. / radius.cpu().numpy()
188
+
189
+ def __len__(self):
190
+ return self.imgs_per_instance * len(self.lvis_paths)
191
+
192
+
193
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
194
+ pass
195
+
196
+
197
+ def __getitem__(self, idx):
198
+ sample = {}
199
+ origin_idx = idx
200
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
201
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
202
+ idx_original=idx
203
+
204
+ folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
205
+
206
+ folder_id = folder_uid_dict['folder_id']
207
+ uid = folder_uid_dict['uid']
208
+
209
+ idx = idx % self.imgs_per_instance # [0, 7]
210
+ # target view
211
+ c2w = self.c2ws[idx]
212
+ w2c = np.linalg.inv(c2w)
213
+ w2c_ref = w2c
214
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
215
+
216
+ w2cs.append(w2c @ w2c_ref_inv)
217
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
218
+
219
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
220
+
221
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
222
+
223
+
224
+ img = Image.open(img_filename)
225
+
226
+ img = self.transform(img) # (4, h, w)
227
+
228
+
229
+ if img.shape[0] == 4:
230
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
231
+ imgs += [img]
232
+
233
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
234
+ mask_h = depth_h > 0
235
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
237
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
+ depth_h = distance
239
+
240
+
241
+ depths_h.append(depth_h)
242
+ masks_h.append(mask_h)
243
+
244
+ intrinsic = self.intrinsic
245
+ intrinsics.append(intrinsic)
246
+
247
+
248
+ near_fars.append(self.near_fars[idx])
249
+ image_perm = 0 # only supervised on reference view
250
+
251
+ mask_dilated = None
252
+
253
+
254
+
255
+ src_views = range(8, 8+32)
256
+ src_views_used = []
257
+ for vid in src_views:
258
+ view_dix_to_use = (vid - 8) // 4
259
+ src_views_used.append(vid)
260
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-8) % 4 + 1}.png')
261
+
262
+ img = Image.open(img_filename)
263
+ img_wh = self.img_wh
264
+
265
+ img = self.transform(img)
266
+ if img.shape[0] == 4:
267
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
268
+ imgs += [img]
269
+ depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
270
+ depth_h = depth_h.fill_(-1.0)
271
+ depths_h.append(depth_h)
272
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
273
+
274
+ near_fars.append(self.all_near_fars[vid])
275
+ intrinsics.append(self.all_intrinsics[vid])
276
+
277
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
278
+
279
+
280
+ scale_mat, scale_factor = self.cal_scale_mat(
281
+ img_hw=[img_wh[1], img_wh[0]],
282
+ intrinsics=intrinsics, extrinsics=w2cs,
283
+ near_fars=near_fars, factor=1.1
284
+ )
285
+
286
+
287
+ new_near_fars = []
288
+ new_w2cs = []
289
+ new_c2ws = []
290
+ new_affine_mats = []
291
+ new_depths_h = []
292
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
293
+
294
+ P = intrinsic @ extrinsic @ scale_mat
295
+ P = P[:3, :4]
296
+ # - should use load_K_Rt_from_P() to obtain c2w
297
+ c2w = load_K_Rt_from_P(None, P)[1]
298
+ w2c = np.linalg.inv(c2w)
299
+ new_w2cs.append(w2c)
300
+ new_c2ws.append(c2w)
301
+ affine_mat = np.eye(4)
302
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
303
+ new_affine_mats.append(affine_mat)
304
+
305
+ camera_o = c2w[:3, 3]
306
+ dist = np.sqrt(np.sum(camera_o ** 2))
307
+ near = dist - 1
308
+ far = dist + 1
309
+
310
+ new_near_fars.append([0.95 * near, 1.05 * far])
311
+
312
+ new_depths_h.append(depth * scale_factor)
313
+
314
+
315
+ imgs = torch.stack(imgs).float()
316
+ depths_h = np.stack(new_depths_h)
317
+ masks_h = np.stack(masks_h)
318
+
319
+ affine_mats = np.stack(new_affine_mats)
320
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
321
+ new_near_fars)
322
+
323
+ if self.split == 'train':
324
+ start_idx = 0
325
+ else:
326
+ start_idx = 1
327
+
328
+ view_ids = [idx_original % self.imgs_per_instance] + src_views_used
329
+ sample['origin_idx'] = origin_idx
330
+ sample['images'] = imgs # (V, 3, H, W)
331
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
332
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
333
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
334
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
335
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
336
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
337
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
338
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
339
+
340
+ # sample['light_idx'] = torch.tensor(light_idx)
341
+ sample['scan'] = folder_id
342
+
343
+ sample['scale_factor'] = torch.tensor(scale_factor)
344
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
345
+ sample['render_img_idx'] = torch.tensor(image_perm)
346
+ sample['partial_vol_origin'] = self.partial_vol_origin
347
+ meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
348
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
349
+
350
+
351
+ # - image to render
352
+ sample['query_image'] = sample['images'][0]
353
+ sample['query_c2w'] = sample['c2ws'][0]
354
+ sample['query_w2c'] = sample['w2cs'][0]
355
+ sample['query_intrinsic'] = sample['intrinsics'][0]
356
+ sample['query_depth'] = sample['depths_h'][0]
357
+ sample['query_mask'] = sample['masks_h'][0]
358
+ sample['query_near_far'] = sample['near_fars'][0]
359
+
360
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
361
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
362
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
363
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
364
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
365
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
366
+ sample['view_ids'] = sample['view_ids'][start_idx:]
367
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
368
+
369
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
370
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
371
+
372
+ # - generate rays
373
+ if ('val' in self.split) or ('test' in self.split):
374
+ sample_rays = gen_rays_from_single_image(
375
+ img_wh[1], img_wh[0],
376
+ sample['query_image'],
377
+ sample['query_intrinsic'],
378
+ sample['query_c2w'],
379
+ depth=sample['query_depth'],
380
+ mask=sample['query_mask'] if self.clean_image else None)
381
+ else:
382
+ sample_rays = gen_random_rays_from_single_image(
383
+ img_wh[1], img_wh[0],
384
+ self.N_rays,
385
+ sample['query_image'],
386
+ sample['query_intrinsic'],
387
+ sample['query_c2w'],
388
+ depth=sample['query_depth'],
389
+ mask=sample['query_mask'] if self.clean_image else None,
390
+ dilated_mask=mask_dilated,
391
+ importance_sample=self.importance_sample)
392
+
393
+
394
+ sample['rays'] = sample_rays
395
+
396
+ return sample
SparseNeuS_demo_v1/data/blender_general_8_4_gt.py ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ self.root_dir = root_dir
70
+ self.split = split
71
+
72
+ self.imgs_per_instance = 8
73
+
74
+ self.n_views = n_views
75
+ self.N_rays = N_rays
76
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
77
+
78
+ self.clean_image = clean_image
79
+ self.importance_sample = importance_sample
80
+ self.test_ref_views = test_ref_views # used for testing
81
+ self.scale_factor = 1.0
82
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
83
+
84
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
85
+ with open(lvis_json_path, 'r') as f:
86
+ lvis_paths = json.load(f)
87
+ if self.split == 'train':
88
+ self.lvis_paths = lvis_paths['train']
89
+ else:
90
+ self.lvis_paths = lvis_paths['val']
91
+ if img_wh is not None:
92
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
93
+ 'img_wh must both be multiples of 32!'
94
+
95
+
96
+ pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
97
+ with open(pose_json_path_narrow, 'r') as f:
98
+ narrow_meta = json.load(f)
99
+
100
+ pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
101
+ with open(pose_json_path_two_stage, 'r') as f:
102
+ two_stage_meta = json.load(f)
103
+
104
+
105
+ self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
106
+ self.img_wh = (256, 256)
107
+ self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
108
+ intrinsic = np.eye(4)
109
+ assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
110
+ intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
111
+ self.intrinsic = intrinsic
112
+ assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
113
+ self.near_far = np.array(narrow_meta["near_far"])
114
+ self.near_far[1] = 1.8
115
+ self.define_transforms()
116
+ self.blender2opencv = np.array(
117
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
118
+ )
119
+
120
+
121
+ self.c2ws = []
122
+ self.w2cs = []
123
+ self.near_fars = []
124
+ for idx, img_id in enumerate(self.img_ids):
125
+ pose = self.input_poses[idx]
126
+ c2w = pose @ self.blender2opencv
127
+ self.c2ws.append(c2w)
128
+ self.w2cs.append(np.linalg.inv(c2w))
129
+ self.near_fars.append(self.near_far)
130
+
131
+
132
+
133
+ self.c2ws = np.stack(self.c2ws, axis=0)
134
+ self.w2cs = np.stack(self.w2cs, axis=0)
135
+
136
+
137
+ self.all_intrinsics = [] # the cam info of the whole scene
138
+ self.all_extrinsics = []
139
+ self.all_near_fars = []
140
+ self.load_cam_info()
141
+
142
+ # * bounding box for rendering
143
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
144
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
145
+
146
+ # - used for cost volume regularization
147
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
148
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
149
+
150
+
151
+ def define_transforms(self):
152
+ self.transform = T.Compose([T.ToTensor()])
153
+
154
+
155
+
156
+ def load_cam_info(self):
157
+ for vid, img_id in enumerate(self.img_ids):
158
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
159
+ self.all_intrinsics.append(intrinsic)
160
+ self.all_extrinsics.append(extrinsic)
161
+ self.all_near_fars.append(near_far)
162
+
163
+ def read_depth(self, filename):
164
+ pass
165
+
166
+ def read_mask(self, filename):
167
+ mask_h = cv2.imread(filename, 0)
168
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
169
+ interpolation=cv2.INTER_NEAREST)
170
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
171
+ interpolation=cv2.INTER_NEAREST)
172
+
173
+ mask[mask > 0] = 1 # the masks stored in png are not binary
174
+ mask_h[mask_h > 0] = 1
175
+
176
+ return mask, mask_h
177
+
178
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
179
+
180
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
181
+
182
+ radius = radius * factor
183
+ scale_mat = np.diag([radius, radius, radius, 1.0])
184
+ scale_mat[:3, 3] = center.cpu().numpy()
185
+ scale_mat = scale_mat.astype(np.float32)
186
+
187
+ return scale_mat, 1. / radius.cpu().numpy()
188
+
189
+ def __len__(self):
190
+ return self.imgs_per_instance * len(self.lvis_paths)
191
+
192
+
193
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
194
+ pass
195
+
196
+
197
+ def __getitem__(self, idx):
198
+ sample = {}
199
+ origin_idx = idx
200
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
201
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
202
+ idx_original=idx
203
+
204
+ folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
205
+
206
+ folder_id = folder_uid_dict['folder_id']
207
+ uid = folder_uid_dict['uid']
208
+
209
+ idx = idx % self.imgs_per_instance # [0, 7]
210
+ # target view
211
+ c2w = self.c2ws[idx]
212
+ w2c = np.linalg.inv(c2w)
213
+ w2c_ref = w2c
214
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
215
+
216
+ w2cs.append(w2c @ w2c_ref_inv)
217
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
218
+
219
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
220
+
221
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
222
+
223
+
224
+ img = Image.open(img_filename)
225
+
226
+ img = self.transform(img) # (4, h, w)
227
+
228
+
229
+ if img.shape[0] == 4:
230
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
231
+ imgs += [img]
232
+
233
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
234
+ mask_h = depth_h > 0
235
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
237
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
+ depth_h = distance
239
+
240
+
241
+ depths_h.append(depth_h)
242
+ masks_h.append(mask_h)
243
+
244
+ intrinsic = self.intrinsic
245
+ intrinsics.append(intrinsic)
246
+
247
+
248
+ near_fars.append(self.near_fars[idx])
249
+ image_perm = 0 # only supervised on reference view
250
+
251
+ mask_dilated = None
252
+
253
+
254
+
255
+ src_views = range(8, 8+32)
256
+ src_views_used = []
257
+ for vid in src_views:
258
+ view_dix_to_use = (vid - 8) // 4
259
+ src_views_used.append(vid)
260
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10_gt.png')
261
+
262
+ img = Image.open(img_filename)
263
+ img_wh = self.img_wh
264
+
265
+ img = self.transform(img)
266
+ if img.shape[0] == 4:
267
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
268
+ imgs += [img]
269
+ depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
270
+ depth_h = depth_h.fill_(-1.0)
271
+ depths_h.append(depth_h)
272
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
273
+
274
+ near_fars.append(self.all_near_fars[vid])
275
+ intrinsics.append(self.all_intrinsics[vid])
276
+
277
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
278
+
279
+
280
+ scale_mat, scale_factor = self.cal_scale_mat(
281
+ img_hw=[img_wh[1], img_wh[0]],
282
+ intrinsics=intrinsics, extrinsics=w2cs,
283
+ near_fars=near_fars, factor=1.1
284
+ )
285
+
286
+
287
+ new_near_fars = []
288
+ new_w2cs = []
289
+ new_c2ws = []
290
+ new_affine_mats = []
291
+ new_depths_h = []
292
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
293
+
294
+ P = intrinsic @ extrinsic @ scale_mat
295
+ P = P[:3, :4]
296
+ # - should use load_K_Rt_from_P() to obtain c2w
297
+ c2w = load_K_Rt_from_P(None, P)[1]
298
+ w2c = np.linalg.inv(c2w)
299
+ new_w2cs.append(w2c)
300
+ new_c2ws.append(c2w)
301
+ affine_mat = np.eye(4)
302
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
303
+ new_affine_mats.append(affine_mat)
304
+
305
+ camera_o = c2w[:3, 3]
306
+ dist = np.sqrt(np.sum(camera_o ** 2))
307
+ near = dist - 1
308
+ far = dist + 1
309
+
310
+ new_near_fars.append([0.95 * near, 1.05 * far])
311
+
312
+ new_depths_h.append(depth * scale_factor)
313
+
314
+
315
+ imgs = torch.stack(imgs).float()
316
+ depths_h = np.stack(new_depths_h)
317
+ masks_h = np.stack(masks_h)
318
+
319
+ affine_mats = np.stack(new_affine_mats)
320
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
321
+ new_near_fars)
322
+
323
+ if self.split == 'train':
324
+ start_idx = 0
325
+ else:
326
+ start_idx = 1
327
+
328
+ view_ids = [idx_original % self.imgs_per_instance] + src_views_used
329
+ sample['origin_idx'] = origin_idx
330
+ sample['images'] = imgs # (V, 3, H, W)
331
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
332
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
333
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
334
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
335
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
336
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
337
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
338
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
339
+
340
+ # sample['light_idx'] = torch.tensor(light_idx)
341
+ sample['scan'] = folder_id
342
+
343
+ sample['scale_factor'] = torch.tensor(scale_factor)
344
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
345
+ sample['render_img_idx'] = torch.tensor(image_perm)
346
+ sample['partial_vol_origin'] = self.partial_vol_origin
347
+ meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
348
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
349
+
350
+
351
+ # - image to render
352
+ sample['query_image'] = sample['images'][0]
353
+ sample['query_c2w'] = sample['c2ws'][0]
354
+ sample['query_w2c'] = sample['w2cs'][0]
355
+ sample['query_intrinsic'] = sample['intrinsics'][0]
356
+ sample['query_depth'] = sample['depths_h'][0]
357
+ sample['query_mask'] = sample['masks_h'][0]
358
+ sample['query_near_far'] = sample['near_fars'][0]
359
+
360
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
361
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
362
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
363
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
364
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
365
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
366
+ sample['view_ids'] = sample['view_ids'][start_idx:]
367
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
368
+
369
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
370
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
371
+
372
+ # - generate rays
373
+ if ('val' in self.split) or ('test' in self.split):
374
+ sample_rays = gen_rays_from_single_image(
375
+ img_wh[1], img_wh[0],
376
+ sample['query_image'],
377
+ sample['query_intrinsic'],
378
+ sample['query_c2w'],
379
+ depth=sample['query_depth'],
380
+ mask=sample['query_mask'] if self.clean_image else None)
381
+ else:
382
+ sample_rays = gen_random_rays_from_single_image(
383
+ img_wh[1], img_wh[0],
384
+ self.N_rays,
385
+ sample['query_image'],
386
+ sample['query_intrinsic'],
387
+ sample['query_c2w'],
388
+ depth=sample['query_depth'],
389
+ mask=sample['query_mask'] if self.clean_image else None,
390
+ dilated_mask=mask_dilated,
391
+ importance_sample=self.importance_sample)
392
+
393
+
394
+ sample['rays'] = sample_rays
395
+
396
+ return sample
SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_3_views.py ADDED
@@ -0,0 +1,446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ self.root_dir = root_dir
70
+ self.split = split
71
+ self.imgs_per_instance = 16
72
+ self.n_views = n_views
73
+ self.N_rays = N_rays
74
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
+
76
+ self.clean_image = clean_image
77
+ self.importance_sample = importance_sample
78
+ self.test_ref_views = test_ref_views # used for testing
79
+ self.scale_factor = 1.0
80
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
+
82
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
83
+ with open(lvis_json_path, 'r') as f:
84
+ lvis_paths = json.load(f)
85
+ if self.split == 'train':
86
+ self.lvis_paths = lvis_paths['train']
87
+ else:
88
+ self.lvis_paths = lvis_paths['val']
89
+ if img_wh is not None:
90
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
+ 'img_wh must both be multiples of 32!'
92
+
93
+
94
+ pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
+ with open(pose_json_path_narrow, 'r') as f:
96
+ narrow_meta = json.load(f)
97
+
98
+ pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
99
+ with open(pose_json_path_two_stage, 'r') as f:
100
+ two_stage_meta = json.load(f)
101
+
102
+
103
+ self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4)
104
+ self.img_wh = (256, 256)
105
+ self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
106
+ intrinsic = np.eye(4)
107
+ assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
108
+ intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
109
+ self.intrinsic = intrinsic
110
+ assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
111
+ self.near_far = np.array(narrow_meta["near_far"])
112
+ self.near_far[1] = 1.8
113
+ self.define_transforms()
114
+ self.blender2opencv = np.array(
115
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
+ )
117
+
118
+
119
+ self.c2ws = []
120
+ self.w2cs = []
121
+ self.near_fars = []
122
+ for idx, img_id in enumerate(self.img_ids):
123
+ pose = self.input_poses[idx]
124
+ c2w = pose @ self.blender2opencv
125
+ self.c2ws.append(c2w)
126
+ self.w2cs.append(np.linalg.inv(c2w))
127
+ self.near_fars.append(self.near_far)
128
+
129
+
130
+
131
+ self.c2ws = np.stack(self.c2ws, axis=0)
132
+ self.w2cs = np.stack(self.w2cs, axis=0)
133
+
134
+
135
+ self.all_intrinsics = [] # the cam info of the whole scene
136
+ self.all_extrinsics = []
137
+ self.all_near_fars = []
138
+ self.load_cam_info()
139
+
140
+ # * bounding box for rendering
141
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
143
+
144
+ # - used for cost volume regularization
145
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
+
148
+
149
+ def define_transforms(self):
150
+ self.transform = T.Compose([T.ToTensor()])
151
+
152
+
153
+
154
+ def load_cam_info(self):
155
+ for vid, img_id in enumerate(self.img_ids):
156
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
+ self.all_intrinsics.append(intrinsic)
158
+ self.all_extrinsics.append(extrinsic)
159
+ self.all_near_fars.append(near_far)
160
+
161
+ def read_depth(self, filename):
162
+ pass
163
+
164
+ def read_mask(self, filename):
165
+ mask_h = cv2.imread(filename, 0)
166
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
+ interpolation=cv2.INTER_NEAREST)
168
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
+ interpolation=cv2.INTER_NEAREST)
170
+
171
+ mask[mask > 0] = 1 # the masks stored in png are not binary
172
+ mask_h[mask_h > 0] = 1
173
+
174
+ return mask, mask_h
175
+
176
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
+
178
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
+
180
+ radius = radius * factor
181
+ scale_mat = np.diag([radius, radius, radius, 1.0])
182
+ scale_mat[:3, 3] = center.cpu().numpy()
183
+ scale_mat = scale_mat.astype(np.float32)
184
+
185
+ return scale_mat, 1. / radius.cpu().numpy()
186
+
187
+ def __len__(self):
188
+ return self.imgs_per_instance*len(self.lvis_paths)
189
+
190
+
191
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
+ pass
193
+
194
+
195
+ def __getitem__(self, idx):
196
+ sample = {}
197
+ origin_idx = idx
198
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
+ idx_original=idx
201
+
202
+ folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
203
+
204
+ folder_id = folder_uid_dict['folder_id']
205
+ uid = folder_uid_dict['uid']
206
+
207
+ if idx % 16 < 8: # narrow image as target
208
+ idx = idx % self.imgs_per_instance # [0, 7]
209
+ # target view
210
+ c2w = self.c2ws[idx]
211
+ w2c = np.linalg.inv(c2w)
212
+ w2c_ref = w2c
213
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
214
+
215
+ w2cs.append(w2c @ w2c_ref_inv)
216
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
217
+
218
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
219
+
220
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
221
+
222
+
223
+ img = Image.open(img_filename)
224
+
225
+ img = self.transform(img) # (4, h, w)
226
+
227
+
228
+ if img.shape[0] == 4:
229
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
230
+ imgs += [img]
231
+
232
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
233
+ mask_h = depth_h > 0
234
+ # print("valid pixels", np.sum(mask_h))
235
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
237
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
+ depth_h = distance
239
+
240
+ else:
241
+ idx = idx % self.imgs_per_instance - 8 # [0, 5]
242
+ c2w = self.c2ws[idx + 40]
243
+ w2c = np.linalg.inv(c2w)
244
+ w2c_ref = w2c
245
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
246
+
247
+ w2cs.append(w2c @ w2c_ref_inv)
248
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
249
+
250
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
251
+
252
+
253
+ img = Image.open(img_filename)
254
+ img = self.transform(img) # (4, h, w)
255
+
256
+ # print("img_pre", img.shape)
257
+ if img.shape[0] == 4:
258
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
259
+ # print("img", img.shape)
260
+ imgs += [img]
261
+
262
+ depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
263
+ depth_h = depth_h.fill_(-1.0)
264
+
265
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
266
+ depths_h.append(depth_h)
267
+ masks_h.append(mask_h)
268
+
269
+ intrinsic = self.intrinsic
270
+ intrinsics.append(intrinsic)
271
+
272
+
273
+ near_fars.append(self.near_fars[idx])
274
+ image_perm = 0 # only supervised on reference view
275
+
276
+ mask_dilated = None
277
+ if_use_narrow = []
278
+ if self.split == 'train':
279
+ for i in range(8):
280
+ if np.random.random() > 0.5:
281
+ if_use_narrow.append(True) # use narrow
282
+ else:
283
+ if_use_narrow.append(False) # 2-stage prediction
284
+ if_use_narrow[origin_idx % 8] = True if origin_idx < 8 else False
285
+ else:
286
+ for i in range(8):
287
+ if_use_narrow.append( True if origin_idx < 8 else False)
288
+
289
+ src_views = list()
290
+ for i in range(8):
291
+ # randomly choose 3 different number from [0,3]
292
+ local_idxs = np.random.choice(4, 3, replace=False)
293
+ local_idxs = [0,1,2]
294
+ local_idxs = [8+i*4+local_idx for local_idx in local_idxs]
295
+ src_views += local_idxs
296
+ src_views_used = []
297
+ for vid in src_views:
298
+ src_views_used.append(vid)
299
+ cur_view_id = (vid - 8) // 4
300
+ # choose narrow
301
+ if if_use_narrow[cur_view_id]:
302
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
303
+ else: # choose 2-stage
304
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{(vid - 8) // 4}_{(vid-8) % 4 + 1}.png')
305
+
306
+ img = Image.open(img_filename)
307
+ img_wh = self.img_wh
308
+
309
+ img = self.transform(img)
310
+ if img.shape[0] == 4:
311
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
312
+
313
+ imgs += [img]
314
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
315
+ depths_h.append(depth_h)
316
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
317
+
318
+ near_fars.append(self.all_near_fars[vid])
319
+ intrinsics.append(self.all_intrinsics[vid])
320
+
321
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
322
+
323
+
324
+
325
+
326
+ scale_mat, scale_factor = self.cal_scale_mat(
327
+ img_hw=[img_wh[1], img_wh[0]],
328
+ intrinsics=intrinsics, extrinsics=w2cs,
329
+ near_fars=near_fars, factor=1.1
330
+ )
331
+
332
+
333
+ new_near_fars = []
334
+ new_w2cs = []
335
+ new_c2ws = []
336
+ new_affine_mats = []
337
+ new_depths_h = []
338
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
339
+
340
+ P = intrinsic @ extrinsic @ scale_mat
341
+ P = P[:3, :4]
342
+ # - should use load_K_Rt_from_P() to obtain c2w
343
+ c2w = load_K_Rt_from_P(None, P)[1]
344
+ w2c = np.linalg.inv(c2w)
345
+ new_w2cs.append(w2c)
346
+ new_c2ws.append(c2w)
347
+ affine_mat = np.eye(4)
348
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
349
+ new_affine_mats.append(affine_mat)
350
+
351
+ camera_o = c2w[:3, 3]
352
+ dist = np.sqrt(np.sum(camera_o ** 2))
353
+ near = dist - 1
354
+ far = dist + 1
355
+
356
+ new_near_fars.append([0.95 * near, 1.05 * far])
357
+
358
+ new_depths_h.append(depth * scale_factor)
359
+
360
+ # print(new_near_fars)
361
+ # print("img numeber: ", len(imgs))
362
+ imgs = torch.stack(imgs).float()
363
+ depths_h = np.stack(new_depths_h)
364
+ masks_h = np.stack(masks_h)
365
+
366
+ affine_mats = np.stack(new_affine_mats)
367
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
368
+ new_near_fars)
369
+
370
+ if self.split == 'train':
371
+ start_idx = 0
372
+ else:
373
+ start_idx = 1
374
+
375
+ view_ids = [idx_original % self.imgs_per_instance] + src_views_used
376
+ sample['origin_idx'] = origin_idx
377
+ sample['images'] = imgs # (V, 3, H, W)
378
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
379
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
380
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
381
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
382
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
383
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
384
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
385
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
386
+
387
+ # sample['light_idx'] = torch.tensor(light_idx)
388
+ sample['scan'] = folder_id
389
+
390
+ sample['scale_factor'] = torch.tensor(scale_factor)
391
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
392
+ sample['render_img_idx'] = torch.tensor(image_perm)
393
+ sample['partial_vol_origin'] = self.partial_vol_origin
394
+ if view_ids[0] < 8:
395
+ meta_end = "_narrow"+ "_refview" + str(view_ids[0])
396
+ else:
397
+ meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
398
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
399
+
400
+
401
+ # - image to render
402
+ sample['query_image'] = sample['images'][0]
403
+ sample['query_c2w'] = sample['c2ws'][0]
404
+ sample['query_w2c'] = sample['w2cs'][0]
405
+ sample['query_intrinsic'] = sample['intrinsics'][0]
406
+ sample['query_depth'] = sample['depths_h'][0]
407
+ sample['query_mask'] = sample['masks_h'][0]
408
+ sample['query_near_far'] = sample['near_fars'][0]
409
+
410
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
411
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
412
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
413
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
414
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
415
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
416
+ sample['view_ids'] = sample['view_ids'][start_idx:]
417
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
418
+
419
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
420
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
421
+
422
+ # - generate rays
423
+ if ('val' in self.split) or ('test' in self.split):
424
+ sample_rays = gen_rays_from_single_image(
425
+ img_wh[1], img_wh[0],
426
+ sample['query_image'],
427
+ sample['query_intrinsic'],
428
+ sample['query_c2w'],
429
+ depth=sample['query_depth'],
430
+ mask=sample['query_mask'] if self.clean_image else None)
431
+ else:
432
+ sample_rays = gen_random_rays_from_single_image(
433
+ img_wh[1], img_wh[0],
434
+ self.N_rays,
435
+ sample['query_image'],
436
+ sample['query_intrinsic'],
437
+ sample['query_c2w'],
438
+ depth=sample['query_depth'],
439
+ mask=sample['query_mask'] if self.clean_image else None,
440
+ dilated_mask=mask_dilated,
441
+ importance_sample=self.importance_sample)
442
+
443
+
444
+ sample['rays'] = sample_rays
445
+
446
+ return sample
SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_mix.py ADDED
@@ -0,0 +1,439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ self.root_dir = root_dir
70
+ self.split = split
71
+ self.imgs_per_instance = 16
72
+ self.n_views = n_views
73
+ self.N_rays = N_rays
74
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
+
76
+ self.clean_image = clean_image
77
+ self.importance_sample = importance_sample
78
+ self.test_ref_views = test_ref_views # used for testing
79
+ self.scale_factor = 1.0
80
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
+
82
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
83
+ with open(lvis_json_path, 'r') as f:
84
+ lvis_paths = json.load(f)
85
+ if self.split == 'train':
86
+ self.lvis_paths = lvis_paths['train']
87
+ else:
88
+ self.lvis_paths = lvis_paths['val']
89
+ if img_wh is not None:
90
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
+ 'img_wh must both be multiples of 32!'
92
+
93
+
94
+ pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
+ with open(pose_json_path_narrow, 'r') as f:
96
+ narrow_meta = json.load(f)
97
+
98
+ pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
99
+ with open(pose_json_path_two_stage, 'r') as f:
100
+ two_stage_meta = json.load(f)
101
+
102
+
103
+ self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
104
+ self.img_wh = (256, 256)
105
+ self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
106
+ intrinsic = np.eye(4)
107
+ assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
108
+ intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
109
+ self.intrinsic = intrinsic
110
+ assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
111
+ self.near_far = np.array(narrow_meta["near_far"])
112
+ self.near_far[1] = 1.8
113
+ self.define_transforms()
114
+ self.blender2opencv = np.array(
115
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
+ )
117
+
118
+
119
+ self.c2ws = []
120
+ self.w2cs = []
121
+ self.near_fars = []
122
+ for idx, img_id in enumerate(self.img_ids):
123
+ pose = self.input_poses[idx]
124
+ c2w = pose @ self.blender2opencv
125
+ self.c2ws.append(c2w)
126
+ self.w2cs.append(np.linalg.inv(c2w))
127
+ self.near_fars.append(self.near_far)
128
+
129
+
130
+
131
+ self.c2ws = np.stack(self.c2ws, axis=0)
132
+ self.w2cs = np.stack(self.w2cs, axis=0)
133
+
134
+
135
+ self.all_intrinsics = [] # the cam info of the whole scene
136
+ self.all_extrinsics = []
137
+ self.all_near_fars = []
138
+ self.load_cam_info()
139
+
140
+ # * bounding box for rendering
141
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
143
+
144
+ # - used for cost volume regularization
145
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
+
148
+
149
+ def define_transforms(self):
150
+ self.transform = T.Compose([T.ToTensor()])
151
+
152
+
153
+
154
+ def load_cam_info(self):
155
+ for vid, img_id in enumerate(self.img_ids):
156
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
+ self.all_intrinsics.append(intrinsic)
158
+ self.all_extrinsics.append(extrinsic)
159
+ self.all_near_fars.append(near_far)
160
+
161
+ def read_depth(self, filename):
162
+ pass
163
+
164
+ def read_mask(self, filename):
165
+ mask_h = cv2.imread(filename, 0)
166
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
+ interpolation=cv2.INTER_NEAREST)
168
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
+ interpolation=cv2.INTER_NEAREST)
170
+
171
+ mask[mask > 0] = 1 # the masks stored in png are not binary
172
+ mask_h[mask_h > 0] = 1
173
+
174
+ return mask, mask_h
175
+
176
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
+
178
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
+
180
+ radius = radius * factor
181
+ scale_mat = np.diag([radius, radius, radius, 1.0])
182
+ scale_mat[:3, 3] = center.cpu().numpy()
183
+ scale_mat = scale_mat.astype(np.float32)
184
+
185
+ return scale_mat, 1. / radius.cpu().numpy()
186
+
187
+ def __len__(self):
188
+ return self.imgs_per_instance*len(self.lvis_paths)
189
+
190
+
191
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
+ pass
193
+
194
+
195
+ def __getitem__(self, idx):
196
+ sample = {}
197
+ origin_idx = idx
198
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
+ idx_original=idx
201
+
202
+ folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
203
+
204
+ folder_id = folder_uid_dict['folder_id']
205
+ uid = folder_uid_dict['uid']
206
+
207
+ if idx % 16 < 8: # gt image as target
208
+ idx = idx % self.imgs_per_instance # [0, 7]
209
+ # target view
210
+ c2w = self.c2ws[idx]
211
+ w2c = np.linalg.inv(c2w)
212
+ w2c_ref = w2c
213
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
214
+
215
+ w2cs.append(w2c @ w2c_ref_inv)
216
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
217
+
218
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
219
+
220
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
221
+
222
+
223
+ img = Image.open(img_filename)
224
+
225
+ img = self.transform(img) # (4, h, w)
226
+
227
+
228
+ if img.shape[0] == 4:
229
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
230
+ imgs += [img]
231
+
232
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
233
+ mask_h = depth_h > 0
234
+ # print("valid pixels", np.sum(mask_h))
235
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
237
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
+ depth_h = distance
239
+
240
+ else:
241
+ idx = idx % self.imgs_per_instance - 8 # [0, 7]
242
+ c2w = self.c2ws[idx + 40]
243
+ w2c = np.linalg.inv(c2w)
244
+ w2c_ref = w2c
245
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
246
+
247
+ w2cs.append(w2c @ w2c_ref_inv)
248
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
249
+
250
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
251
+
252
+
253
+ img = Image.open(img_filename)
254
+ img = self.transform(img) # (4, h, w)
255
+
256
+ # print("img_pre", img.shape)
257
+ if img.shape[0] == 4:
258
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
259
+ # print("img", img.shape)
260
+ imgs += [img]
261
+
262
+ depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
263
+ depth_h = depth_h.fill_(-1.0)
264
+
265
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
266
+ depths_h.append(depth_h)
267
+ masks_h.append(mask_h)
268
+
269
+ intrinsic = self.intrinsic
270
+ intrinsics.append(intrinsic)
271
+
272
+
273
+ near_fars.append(self.near_fars[idx])
274
+ image_perm = 0 # only supervised on reference view
275
+
276
+ mask_dilated = None
277
+ if_use_narrow = []
278
+ if self.split == 'train':
279
+ for i in range(8):
280
+ if np.random.random() > 0.5:
281
+ if_use_narrow.append(True) # use narrow
282
+ else:
283
+ if_use_narrow.append(False) # 2-stage prediction
284
+ if_use_narrow[origin_idx % 8] = True if (origin_idx % 16) < 8 else False
285
+ else:
286
+ for i in range(8):
287
+ if_use_narrow.append( True if (origin_idx % 16) < 8 else False)
288
+ src_views = range(8, 8 + 8 * 4)
289
+ src_views_used = []
290
+ for vid in src_views:
291
+ src_views_used.append(vid)
292
+ cur_view_id = (vid - 8) // 4 # [0, 7]
293
+ # choose narrow
294
+ if if_use_narrow[cur_view_id]:
295
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
296
+ else: # choose 2-stage
297
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{cur_view_id}_{(vid) % 4 + 1}.png')
298
+
299
+ img = Image.open(img_filename)
300
+ img_wh = self.img_wh
301
+
302
+ img = self.transform(img)
303
+ if img.shape[0] == 4:
304
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
305
+
306
+ imgs += [img]
307
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
308
+ depths_h.append(depth_h)
309
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
310
+
311
+ near_fars.append(self.all_near_fars[vid])
312
+ intrinsics.append(self.all_intrinsics[vid])
313
+
314
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
315
+
316
+
317
+
318
+
319
+ scale_mat, scale_factor = self.cal_scale_mat(
320
+ img_hw=[img_wh[1], img_wh[0]],
321
+ intrinsics=intrinsics, extrinsics=w2cs,
322
+ near_fars=near_fars, factor=1.1
323
+ )
324
+
325
+
326
+ new_near_fars = []
327
+ new_w2cs = []
328
+ new_c2ws = []
329
+ new_affine_mats = []
330
+ new_depths_h = []
331
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
332
+
333
+ P = intrinsic @ extrinsic @ scale_mat
334
+ P = P[:3, :4]
335
+ # - should use load_K_Rt_from_P() to obtain c2w
336
+ c2w = load_K_Rt_from_P(None, P)[1]
337
+ w2c = np.linalg.inv(c2w)
338
+ new_w2cs.append(w2c)
339
+ new_c2ws.append(c2w)
340
+ affine_mat = np.eye(4)
341
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
342
+ new_affine_mats.append(affine_mat)
343
+
344
+ camera_o = c2w[:3, 3]
345
+ dist = np.sqrt(np.sum(camera_o ** 2))
346
+ near = dist - 1
347
+ far = dist + 1
348
+
349
+ new_near_fars.append([0.95 * near, 1.05 * far])
350
+
351
+ new_depths_h.append(depth * scale_factor)
352
+
353
+ # print(new_near_fars)
354
+ # print("img numeber: ", len(imgs))
355
+ imgs = torch.stack(imgs).float()
356
+ depths_h = np.stack(new_depths_h)
357
+ masks_h = np.stack(masks_h)
358
+
359
+ affine_mats = np.stack(new_affine_mats)
360
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
361
+ new_near_fars)
362
+
363
+ if self.split == 'train':
364
+ start_idx = 0
365
+ else:
366
+ start_idx = 1
367
+
368
+ view_ids = [idx_original % self.imgs_per_instance] + src_views_used
369
+ sample['origin_idx'] = origin_idx
370
+ sample['images'] = imgs # (V, 3, H, W)
371
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
372
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
373
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
374
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
375
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
376
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
377
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
378
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
379
+
380
+ # sample['light_idx'] = torch.tensor(light_idx)
381
+ sample['scan'] = folder_id
382
+
383
+ sample['scale_factor'] = torch.tensor(scale_factor)
384
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
385
+ sample['render_img_idx'] = torch.tensor(image_perm)
386
+ sample['partial_vol_origin'] = self.partial_vol_origin
387
+ if view_ids[0] < 8:
388
+ meta_end = "_narrow"+ "_refview" + str(view_ids[0])
389
+ else:
390
+ meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
391
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
392
+
393
+
394
+ # - image to render
395
+ sample['query_image'] = sample['images'][0]
396
+ sample['query_c2w'] = sample['c2ws'][0]
397
+ sample['query_w2c'] = sample['w2cs'][0]
398
+ sample['query_intrinsic'] = sample['intrinsics'][0]
399
+ sample['query_depth'] = sample['depths_h'][0]
400
+ sample['query_mask'] = sample['masks_h'][0]
401
+ sample['query_near_far'] = sample['near_fars'][0]
402
+
403
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
404
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
405
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
406
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
407
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
408
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
409
+ sample['view_ids'] = sample['view_ids'][start_idx:]
410
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
411
+
412
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
413
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
414
+
415
+ # - generate rays
416
+ if ('val' in self.split) or ('test' in self.split):
417
+ sample_rays = gen_rays_from_single_image(
418
+ img_wh[1], img_wh[0],
419
+ sample['query_image'],
420
+ sample['query_intrinsic'],
421
+ sample['query_c2w'],
422
+ depth=sample['query_depth'],
423
+ mask=sample['query_mask'] if self.clean_image else None)
424
+ else:
425
+ sample_rays = gen_random_rays_from_single_image(
426
+ img_wh[1], img_wh[0],
427
+ self.N_rays,
428
+ sample['query_image'],
429
+ sample['query_intrinsic'],
430
+ sample['query_c2w'],
431
+ depth=sample['query_depth'],
432
+ mask=sample['query_mask'] if self.clean_image else None,
433
+ dilated_mask=mask_dilated,
434
+ importance_sample=self.importance_sample)
435
+
436
+
437
+ sample['rays'] = sample_rays
438
+
439
+ return sample
SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_mix.py ADDED
@@ -0,0 +1,470 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ self.root_dir = root_dir
70
+ self.split = split
71
+ self.imgs_per_instance = 16
72
+ self.n_views = n_views
73
+ self.N_rays = N_rays
74
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
75
+
76
+ self.clean_image = clean_image
77
+ self.importance_sample = importance_sample
78
+ self.test_ref_views = test_ref_views # used for testing
79
+ self.scale_factor = 1.0
80
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
81
+
82
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
83
+ with open(lvis_json_path, 'r') as f:
84
+ lvis_paths = json.load(f)
85
+ if self.split == 'train':
86
+ self.lvis_paths = lvis_paths['train']
87
+ else:
88
+ self.lvis_paths = lvis_paths['val']
89
+ if img_wh is not None:
90
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
91
+ 'img_wh must both be multiples of 32!'
92
+
93
+
94
+ pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
95
+ with open(pose_json_path_narrow, 'r') as f:
96
+ narrow_meta = json.load(f)
97
+
98
+ pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
99
+ with open(pose_json_path_two_stage, 'r') as f:
100
+ two_stage_meta = json.load(f)
101
+
102
+
103
+ self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
104
+ self.img_wh = (256, 256)
105
+ self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
106
+ intrinsic = np.eye(4)
107
+ assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
108
+ intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
109
+ self.intrinsic = intrinsic
110
+ assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
111
+ self.near_far = np.array(narrow_meta["near_far"])
112
+ self.near_far[1] = 1.8
113
+ self.define_transforms()
114
+ self.blender2opencv = np.array(
115
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
116
+ )
117
+
118
+
119
+ self.c2ws = []
120
+ self.w2cs = []
121
+ self.near_fars = []
122
+ for idx, img_id in enumerate(self.img_ids):
123
+ pose = self.input_poses[idx]
124
+ c2w = pose @ self.blender2opencv
125
+ self.c2ws.append(c2w)
126
+ self.w2cs.append(np.linalg.inv(c2w))
127
+ self.near_fars.append(self.near_far)
128
+
129
+
130
+
131
+ self.c2ws = np.stack(self.c2ws, axis=0)
132
+ self.w2cs = np.stack(self.w2cs, axis=0)
133
+
134
+
135
+ self.all_intrinsics = [] # the cam info of the whole scene
136
+ self.all_extrinsics = []
137
+ self.all_near_fars = []
138
+ self.load_cam_info()
139
+
140
+ # * bounding box for rendering
141
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
142
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
143
+
144
+ # - used for cost volume regularization
145
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
146
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
147
+
148
+
149
+ def define_transforms(self):
150
+ self.transform = T.Compose([T.ToTensor()])
151
+
152
+
153
+
154
+ def load_cam_info(self):
155
+ for vid, img_id in enumerate(self.img_ids):
156
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
157
+ self.all_intrinsics.append(intrinsic)
158
+ self.all_extrinsics.append(extrinsic)
159
+ self.all_near_fars.append(near_far)
160
+
161
+ def read_depth(self, filename):
162
+ pass
163
+
164
+ def read_mask(self, filename):
165
+ mask_h = cv2.imread(filename, 0)
166
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
167
+ interpolation=cv2.INTER_NEAREST)
168
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
169
+ interpolation=cv2.INTER_NEAREST)
170
+
171
+ mask[mask > 0] = 1 # the masks stored in png are not binary
172
+ mask_h[mask_h > 0] = 1
173
+
174
+ return mask, mask_h
175
+
176
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
177
+
178
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
179
+
180
+ radius = radius * factor
181
+ scale_mat = np.diag([radius, radius, radius, 1.0])
182
+ scale_mat[:3, 3] = center.cpu().numpy()
183
+ scale_mat = scale_mat.astype(np.float32)
184
+
185
+ return scale_mat, 1. / radius.cpu().numpy()
186
+
187
+ def __len__(self):
188
+ return self.imgs_per_instance * len(self.lvis_paths)
189
+
190
+
191
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
192
+ pass
193
+
194
+
195
+ def __getitem__(self, idx):
196
+ sample = {}
197
+ origin_idx = idx
198
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
199
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
200
+ idx_original=idx
201
+
202
+ folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
203
+
204
+ folder_id = folder_uid_dict['folder_id']
205
+ uid = folder_uid_dict['uid']
206
+
207
+ if idx % self.imgs_per_instance < 8:
208
+ idx = idx % self.imgs_per_instance # [0, 7]
209
+ # target view
210
+ c2w = self.c2ws[idx]
211
+ w2c = np.linalg.inv(c2w)
212
+ w2c_ref = w2c
213
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
214
+
215
+ w2cs.append(w2c @ w2c_ref_inv)
216
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
217
+
218
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
219
+
220
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
221
+
222
+
223
+ img = Image.open(img_filename)
224
+
225
+ img = self.transform(img) # (4, h, w)
226
+
227
+
228
+ if img.shape[0] == 4:
229
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
230
+ imgs += [img]
231
+
232
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
233
+ mask_h = depth_h > 0
234
+ # print("valid pixels", np.sum(mask_h))
235
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
237
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
+ depth_h = distance
239
+
240
+
241
+ depths_h.append(depth_h)
242
+ masks_h.append(mask_h)
243
+
244
+ intrinsic = self.intrinsic
245
+ intrinsics.append(intrinsic)
246
+
247
+
248
+ near_fars.append(self.near_fars[idx])
249
+ image_perm = 0 # only supervised on reference view
250
+
251
+ mask_dilated = None
252
+
253
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
254
+
255
+ src_views = range(8, 8 + 8 * 4)
256
+ src_views_used = []
257
+ for vid in src_views:
258
+ src_views_used.append(vid)
259
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
260
+
261
+ img = Image.open(img_filename)
262
+ img_wh = self.img_wh
263
+
264
+ img = self.transform(img)
265
+ if img.shape[0] == 4:
266
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
267
+
268
+ imgs += [img]
269
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
270
+ depths_h.append(depth_h)
271
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
272
+
273
+ near_fars.append(self.all_near_fars[vid])
274
+ intrinsics.append(self.all_intrinsics[vid])
275
+
276
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
277
+
278
+ else:
279
+ idx = idx % self.imgs_per_instance - 8 # [0, 5]
280
+
281
+ c2w = self.c2ws[idx + 40]
282
+ w2c = np.linalg.inv(c2w)
283
+ w2c_ref = w2c
284
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
285
+
286
+ w2cs.append(w2c @ w2c_ref_inv)
287
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
288
+
289
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
290
+
291
+
292
+ img = Image.open(img_filename)
293
+
294
+ img = self.transform(img) # (4, h, w)
295
+
296
+ # print("img_pre", img.shape)
297
+ if img.shape[0] == 4:
298
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
299
+ # print("img", img.shape)
300
+ imgs += [img]
301
+
302
+
303
+ depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
304
+ depth_h = depth_h.fill_(-1.0)
305
+ # depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0)
306
+ # print("depth_h", depth_h.shape)
307
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
308
+ depths_h.append(depth_h)
309
+ masks_h.append(mask_h)
310
+
311
+ intrinsic = self.intrinsic
312
+ intrinsics.append(intrinsic)
313
+
314
+
315
+ near_fars.append(self.near_fars[idx])
316
+ image_perm = 0 # only supervised on reference view
317
+
318
+ mask_dilated = None
319
+
320
+
321
+
322
+ src_views = range(40+8, 40+8+32)
323
+ src_views_used = []
324
+ for vid in src_views:
325
+ view_dix_to_use = (vid - 40 - 8) // 4
326
+
327
+ src_views_used.append(vid)
328
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-48) % 4 + 1}.png')
329
+
330
+ img = Image.open(img_filename)
331
+ img_wh = self.img_wh
332
+
333
+ img = self.transform(img)
334
+ # print("img shape1: ", img.shape)
335
+ if img.shape[0] == 4:
336
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
337
+ # print("img shape2: ", img.shape)
338
+ imgs += [img]
339
+ depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
340
+ depth_h = depth_h.fill_(-1.0)
341
+ depths_h.append(depth_h)
342
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
343
+
344
+ near_fars.append(self.all_near_fars[vid])
345
+ intrinsics.append(self.all_intrinsics[vid])
346
+
347
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
348
+
349
+
350
+ scale_mat, scale_factor = self.cal_scale_mat(
351
+ img_hw=[img_wh[1], img_wh[0]],
352
+ intrinsics=intrinsics, extrinsics=w2cs,
353
+ near_fars=near_fars, factor=1.1
354
+ )
355
+
356
+
357
+ new_near_fars = []
358
+ new_w2cs = []
359
+ new_c2ws = []
360
+ new_affine_mats = []
361
+ new_depths_h = []
362
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
363
+
364
+ P = intrinsic @ extrinsic @ scale_mat
365
+ P = P[:3, :4]
366
+ # - should use load_K_Rt_from_P() to obtain c2w
367
+ c2w = load_K_Rt_from_P(None, P)[1]
368
+ w2c = np.linalg.inv(c2w)
369
+ new_w2cs.append(w2c)
370
+ new_c2ws.append(c2w)
371
+ affine_mat = np.eye(4)
372
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
373
+ new_affine_mats.append(affine_mat)
374
+
375
+ camera_o = c2w[:3, 3]
376
+ dist = np.sqrt(np.sum(camera_o ** 2))
377
+ near = dist - 1
378
+ far = dist + 1
379
+
380
+ new_near_fars.append([0.95 * near, 1.05 * far])
381
+
382
+ new_depths_h.append(depth * scale_factor)
383
+
384
+ # print(new_near_fars)
385
+ # print("img numeber: ", len(imgs))
386
+ imgs = torch.stack(imgs).float()
387
+ depths_h = np.stack(new_depths_h)
388
+ masks_h = np.stack(masks_h)
389
+
390
+ affine_mats = np.stack(new_affine_mats)
391
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
392
+ new_near_fars)
393
+
394
+ if self.split == 'train':
395
+ start_idx = 0
396
+ else:
397
+ start_idx = 1
398
+
399
+ view_ids = [idx_original % self.imgs_per_instance] + src_views_used
400
+ sample['origin_idx'] = origin_idx
401
+ sample['images'] = imgs # (V, 3, H, W)
402
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
403
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
404
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
405
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
406
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
407
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
408
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
409
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
410
+
411
+ # sample['light_idx'] = torch.tensor(light_idx)
412
+ sample['scan'] = folder_id
413
+
414
+ sample['scale_factor'] = torch.tensor(scale_factor)
415
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
416
+ sample['render_img_idx'] = torch.tensor(image_perm)
417
+ sample['partial_vol_origin'] = self.partial_vol_origin
418
+ if view_ids[0] < 8:
419
+ meta_end = "_narrow"+ "_refview" + str(view_ids[0])
420
+ else:
421
+ meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
422
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
423
+
424
+
425
+ # - image to render
426
+ sample['query_image'] = sample['images'][0]
427
+ sample['query_c2w'] = sample['c2ws'][0]
428
+ sample['query_w2c'] = sample['w2cs'][0]
429
+ sample['query_intrinsic'] = sample['intrinsics'][0]
430
+ sample['query_depth'] = sample['depths_h'][0]
431
+ sample['query_mask'] = sample['masks_h'][0]
432
+ sample['query_near_far'] = sample['near_fars'][0]
433
+
434
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
435
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
436
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
437
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
438
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
439
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
440
+ sample['view_ids'] = sample['view_ids'][start_idx:]
441
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
442
+
443
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
444
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
445
+
446
+ # - generate rays
447
+ if ('val' in self.split) or ('test' in self.split):
448
+ sample_rays = gen_rays_from_single_image(
449
+ img_wh[1], img_wh[0],
450
+ sample['query_image'],
451
+ sample['query_intrinsic'],
452
+ sample['query_c2w'],
453
+ depth=sample['query_depth'],
454
+ mask=sample['query_mask'] if self.clean_image else None)
455
+ else:
456
+ sample_rays = gen_random_rays_from_single_image(
457
+ img_wh[1], img_wh[0],
458
+ self.N_rays,
459
+ sample['query_image'],
460
+ sample['query_intrinsic'],
461
+ sample['query_c2w'],
462
+ depth=sample['query_depth'],
463
+ mask=sample['query_mask'] if self.clean_image else None,
464
+ dilated_mask=mask_dilated,
465
+ importance_sample=self.importance_sample)
466
+
467
+
468
+ sample['rays'] = sample_rays
469
+
470
+ return sample
SparseNeuS_demo_v1/data/blender_general_8_wide_from_2_stage.py ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ self.root_dir = root_dir
70
+ self.split = split
71
+
72
+ self.imgs_per_instance = 8
73
+
74
+ self.n_views = n_views
75
+ self.N_rays = N_rays
76
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
77
+
78
+ self.clean_image = clean_image
79
+ self.importance_sample = importance_sample
80
+ self.test_ref_views = test_ref_views # used for testing
81
+ self.scale_factor = 1.0
82
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
83
+
84
+ lvis_json_path = '/objaverse-processed/zero12345_img/random32_split.json' # folder_id and uid
85
+ with open(lvis_json_path, 'r') as f:
86
+ lvis_paths = json.load(f)
87
+ if self.split == 'train':
88
+ self.lvis_paths = lvis_paths['train']
89
+ else:
90
+ self.lvis_paths = lvis_paths['val']
91
+ if img_wh is not None:
92
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
93
+ 'img_wh must both be multiples of 32!'
94
+
95
+
96
+ pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
97
+ with open(pose_json_path_narrow, 'r') as f:
98
+ narrow_meta = json.load(f)
99
+
100
+ pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
101
+ with open(pose_json_path_two_stage, 'r') as f:
102
+ two_stage_meta = json.load(f)
103
+
104
+
105
+ self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
106
+ self.img_wh = (256, 256)
107
+ self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
108
+ intrinsic = np.eye(4)
109
+ assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
110
+ intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
111
+ self.intrinsic = intrinsic
112
+ assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
113
+ self.near_far = np.array(narrow_meta["near_far"])
114
+ self.near_far[1] = 1.8
115
+ self.define_transforms()
116
+ self.blender2opencv = np.array(
117
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
118
+ )
119
+
120
+
121
+ self.c2ws = []
122
+ self.w2cs = []
123
+ self.near_fars = []
124
+ for idx, img_id in enumerate(self.img_ids):
125
+ pose = self.input_poses[idx]
126
+ c2w = pose @ self.blender2opencv
127
+ self.c2ws.append(c2w)
128
+ self.w2cs.append(np.linalg.inv(c2w))
129
+ self.near_fars.append(self.near_far)
130
+
131
+
132
+
133
+ self.c2ws = np.stack(self.c2ws, axis=0)
134
+ self.w2cs = np.stack(self.w2cs, axis=0)
135
+
136
+
137
+ self.all_intrinsics = [] # the cam info of the whole scene
138
+ self.all_extrinsics = []
139
+ self.all_near_fars = []
140
+ self.load_cam_info()
141
+
142
+ # * bounding box for rendering
143
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
144
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
145
+
146
+ # - used for cost volume regularization
147
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
148
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
149
+
150
+
151
+ def define_transforms(self):
152
+ self.transform = T.Compose([T.ToTensor()])
153
+
154
+
155
+
156
+ def load_cam_info(self):
157
+ for vid, img_id in enumerate(self.img_ids):
158
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
159
+ self.all_intrinsics.append(intrinsic)
160
+ self.all_extrinsics.append(extrinsic)
161
+ self.all_near_fars.append(near_far)
162
+
163
+ def read_depth(self, filename):
164
+ pass
165
+
166
+ def read_mask(self, filename):
167
+ mask_h = cv2.imread(filename, 0)
168
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
169
+ interpolation=cv2.INTER_NEAREST)
170
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
171
+ interpolation=cv2.INTER_NEAREST)
172
+
173
+ mask[mask > 0] = 1 # the masks stored in png are not binary
174
+ mask_h[mask_h > 0] = 1
175
+
176
+ return mask, mask_h
177
+
178
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
179
+
180
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
181
+
182
+ radius = radius * factor
183
+ scale_mat = np.diag([radius, radius, radius, 1.0])
184
+ scale_mat[:3, 3] = center.cpu().numpy()
185
+ scale_mat = scale_mat.astype(np.float32)
186
+
187
+ return scale_mat, 1. / radius.cpu().numpy()
188
+
189
+ def __len__(self):
190
+ return self.imgs_per_instance * len(self.lvis_paths)
191
+
192
+
193
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
194
+ pass
195
+
196
+
197
+ def __getitem__(self, idx):
198
+ sample = {}
199
+ origin_idx = idx
200
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
201
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
202
+ idx_original=idx
203
+
204
+ folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
205
+
206
+ folder_id = folder_uid_dict['folder_id']
207
+ uid = folder_uid_dict['uid']
208
+
209
+ idx = idx % self.imgs_per_instance # [0, 7]
210
+ # target view
211
+ c2w = self.c2ws[idx]
212
+ w2c = np.linalg.inv(c2w)
213
+ w2c_ref = w2c
214
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
215
+
216
+ w2cs.append(w2c @ w2c_ref_inv)
217
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
218
+
219
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
220
+
221
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
222
+
223
+
224
+ img = Image.open(img_filename)
225
+
226
+ img = self.transform(img) # (4, h, w)
227
+
228
+
229
+ if img.shape[0] == 4:
230
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
231
+ imgs += [img]
232
+
233
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
234
+ mask_h = depth_h > 0
235
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
236
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
237
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
238
+ depth_h = distance
239
+
240
+
241
+ depths_h.append(depth_h)
242
+ masks_h.append(mask_h)
243
+
244
+ intrinsic = self.intrinsic
245
+ intrinsics.append(intrinsic)
246
+
247
+
248
+ near_fars.append(self.near_fars[idx])
249
+ image_perm = 0 # only supervised on reference view
250
+
251
+ mask_dilated = None
252
+
253
+
254
+
255
+ src_views = range(0, 8)
256
+ src_views_used = []
257
+ for vid in src_views:
258
+ src_views_used.append(vid)
259
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{vid}_0.png')
260
+
261
+ img = Image.open(img_filename)
262
+ img_wh = self.img_wh
263
+
264
+ img = self.transform(img)
265
+ if img.shape[0] == 4:
266
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
267
+ imgs += [img]
268
+ depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
269
+ depth_h = depth_h.fill_(-1.0)
270
+ depths_h.append(depth_h)
271
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
272
+
273
+ near_fars.append(self.all_near_fars[vid])
274
+ intrinsics.append(self.all_intrinsics[vid])
275
+
276
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
277
+
278
+
279
+ scale_mat, scale_factor = self.cal_scale_mat(
280
+ img_hw=[img_wh[1], img_wh[0]],
281
+ intrinsics=intrinsics, extrinsics=w2cs,
282
+ near_fars=near_fars, factor=1.1
283
+ )
284
+
285
+
286
+ new_near_fars = []
287
+ new_w2cs = []
288
+ new_c2ws = []
289
+ new_affine_mats = []
290
+ new_depths_h = []
291
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
292
+
293
+ P = intrinsic @ extrinsic @ scale_mat
294
+ P = P[:3, :4]
295
+ # - should use load_K_Rt_from_P() to obtain c2w
296
+ c2w = load_K_Rt_from_P(None, P)[1]
297
+ w2c = np.linalg.inv(c2w)
298
+ new_w2cs.append(w2c)
299
+ new_c2ws.append(c2w)
300
+ affine_mat = np.eye(4)
301
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
302
+ new_affine_mats.append(affine_mat)
303
+
304
+ camera_o = c2w[:3, 3]
305
+ dist = np.sqrt(np.sum(camera_o ** 2))
306
+ near = dist - 1
307
+ far = dist + 1
308
+
309
+ new_near_fars.append([0.95 * near, 1.05 * far])
310
+
311
+ new_depths_h.append(depth * scale_factor)
312
+
313
+
314
+ imgs = torch.stack(imgs).float()
315
+ depths_h = np.stack(new_depths_h)
316
+ masks_h = np.stack(masks_h)
317
+
318
+ affine_mats = np.stack(new_affine_mats)
319
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
320
+ new_near_fars)
321
+
322
+ if self.split == 'train':
323
+ start_idx = 0
324
+ else:
325
+ start_idx = 1
326
+
327
+ view_ids = [idx_original % self.imgs_per_instance] + src_views_used
328
+ sample['origin_idx'] = origin_idx
329
+ sample['images'] = imgs # (V, 3, H, W)
330
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
331
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
332
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
333
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
334
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
335
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
336
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
337
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
338
+
339
+ # sample['light_idx'] = torch.tensor(light_idx)
340
+ sample['scan'] = folder_id
341
+
342
+ sample['scale_factor'] = torch.tensor(scale_factor)
343
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
344
+ sample['render_img_idx'] = torch.tensor(image_perm)
345
+ sample['partial_vol_origin'] = self.partial_vol_origin
346
+ meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
347
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
348
+
349
+
350
+ # - image to render
351
+ sample['query_image'] = sample['images'][0]
352
+ sample['query_c2w'] = sample['c2ws'][0]
353
+ sample['query_w2c'] = sample['w2cs'][0]
354
+ sample['query_intrinsic'] = sample['intrinsics'][0]
355
+ sample['query_depth'] = sample['depths_h'][0]
356
+ sample['query_mask'] = sample['masks_h'][0]
357
+ sample['query_near_far'] = sample['near_fars'][0]
358
+
359
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
360
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
361
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
362
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
363
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
364
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
365
+ sample['view_ids'] = sample['view_ids'][start_idx:]
366
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
367
+
368
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
369
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
370
+
371
+ # - generate rays
372
+ if ('val' in self.split) or ('test' in self.split):
373
+ sample_rays = gen_rays_from_single_image(
374
+ img_wh[1], img_wh[0],
375
+ sample['query_image'],
376
+ sample['query_intrinsic'],
377
+ sample['query_c2w'],
378
+ depth=sample['query_depth'],
379
+ mask=sample['query_mask'] if self.clean_image else None)
380
+ else:
381
+ sample_rays = gen_random_rays_from_single_image(
382
+ img_wh[1], img_wh[0],
383
+ self.N_rays,
384
+ sample['query_image'],
385
+ sample['query_intrinsic'],
386
+ sample['query_c2w'],
387
+ depth=sample['query_depth'],
388
+ mask=sample['query_mask'] if self.clean_image else None,
389
+ dilated_mask=mask_dilated,
390
+ importance_sample=self.importance_sample)
391
+
392
+
393
+ sample['rays'] = sample_rays
394
+
395
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_4_1_eval_new_data.py ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+
18
+
19
+ def get_ray_directions(H, W, focal, center=None):
20
+ """
21
+ Get ray directions for all pixels in camera coordinate.
22
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
24
+ Inputs:
25
+ H, W, focal: image height, width and focal length
26
+ Outputs:
27
+ directions: (H, W, 3), the direction of the rays in camera coordinate
28
+ """
29
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
+
31
+ i, j = grid.unbind(-1)
32
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
+ # see https://github.com/bmild/nerf/issues/24
34
+ cent = center if center is not None else [W / 2, H / 2]
35
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
+
37
+ return directions
38
+
39
+ def load_K_Rt_from_P(filename, P=None):
40
+ if P is None:
41
+ lines = open(filename).read().splitlines()
42
+ if len(lines) == 4:
43
+ lines = lines[1:]
44
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
+ P = np.asarray(lines).astype(np.float32).squeeze()
46
+
47
+ out = cv2.decomposeProjectionMatrix(P)
48
+ K = out[0]
49
+ R = out[1]
50
+ t = out[2]
51
+
52
+ K = K / K[2, 2]
53
+ intrinsics = np.eye(4)
54
+ intrinsics[:3, :3] = K
55
+
56
+ pose = np.eye(4, dtype=np.float32)
57
+ pose[:3, :3] = R.transpose() # ? why need transpose here
58
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
+
60
+ return intrinsics, pose # ! return cam2world matrix here
61
+
62
+
63
+ # ! load one ref-image with multiple src-images in camera coordinate system
64
+ class BlenderPerView(Dataset):
65
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
+ split_filepath=None, pair_filepath=None,
67
+ N_rays=512,
68
+ vol_dims=[128, 128, 128], batch_size=1,
69
+ clean_image=False, importance_sample=False, test_ref_views=[],
70
+ specific_dataset_name = 'GSO'
71
+ ):
72
+
73
+ # print("root_dir: ", root_dir)
74
+ self.root_dir = root_dir
75
+ self.split = split
76
+ # self.specific_dataset_name = 'Realfusion'
77
+ # self.specific_dataset_name = 'GSO'
78
+ # self.specific_dataset_name = 'Objaverse'
79
+ # self.specific_dataset_name = 'Zero123'
80
+
81
+ self.specific_dataset_name = specific_dataset_name
82
+ self.n_views = n_views
83
+ self.N_rays = N_rays
84
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
+
86
+ self.clean_image = clean_image
87
+ self.importance_sample = importance_sample
88
+ self.test_ref_views = test_ref_views # used for testing
89
+ self.scale_factor = 1.0
90
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
+ assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
+ # find all subfolders
93
+ main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
+ self.shape_list = os.listdir(main_folder)
95
+ self.shape_list.sort()
96
+
97
+ # self.shape_list = ['barrel_render']
98
+ # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
99
+
100
+
101
+ self.lvis_paths = []
102
+ for shape_name in self.shape_list:
103
+ self.lvis_paths.append(os.path.join(main_folder, shape_name))
104
+
105
+ # print("lvis_paths: ", self.lvis_paths)
106
+
107
+ if img_wh is not None:
108
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
109
+ 'img_wh must both be multiples of 32!'
110
+
111
+
112
+ # * bounding box for rendering
113
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
114
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
115
+
116
+ # - used for cost volume regularization
117
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
118
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
119
+
120
+
121
+ def define_transforms(self):
122
+ self.transform = T.Compose([T.ToTensor()])
123
+
124
+
125
+
126
+ def load_cam_info(self):
127
+ for vid, img_id in enumerate(self.img_ids):
128
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
129
+ self.all_intrinsics.append(intrinsic)
130
+ self.all_extrinsics.append(extrinsic)
131
+ self.all_near_fars.append(near_far)
132
+
133
+ def read_depth(self, filename):
134
+ pass
135
+
136
+ def read_mask(self, filename):
137
+ mask_h = cv2.imread(filename, 0)
138
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
139
+ interpolation=cv2.INTER_NEAREST)
140
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
141
+ interpolation=cv2.INTER_NEAREST)
142
+
143
+ mask[mask > 0] = 1 # the masks stored in png are not binary
144
+ mask_h[mask_h > 0] = 1
145
+
146
+ return mask, mask_h
147
+
148
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
149
+
150
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
151
+
152
+ radius = radius * factor
153
+ scale_mat = np.diag([radius, radius, radius, 1.0])
154
+ scale_mat[:3, 3] = center.cpu().numpy()
155
+ scale_mat = scale_mat.astype(np.float32)
156
+
157
+ return scale_mat, 1. / radius.cpu().numpy()
158
+
159
+ def __len__(self):
160
+ return 8*len(self.lvis_paths)
161
+ # return len(self.lvis_paths)
162
+
163
+
164
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
165
+ pass
166
+
167
+
168
+ def __getitem__(self, idx):
169
+ sample = {}
170
+ # idx = idx * 8 # to be deleted
171
+ origin_idx = idx
172
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
173
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
174
+
175
+ folder_path = self.lvis_paths[idx//8]
176
+ idx = idx % 8 # [0, 7]
177
+
178
+ # last subdir name
179
+ shape_name = os.path.split(folder_path)[-1]
180
+
181
+ pose_json_path = os.path.join(folder_path, "pose.json")
182
+ with open(pose_json_path, 'r') as f:
183
+ meta = json.load(f)
184
+
185
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
186
+ self.img_wh = (256, 256)
187
+ self.input_poses = np.array(list(meta["c2ws"].values()))
188
+ intrinsic = np.eye(4)
189
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
190
+ self.intrinsic = intrinsic
191
+ self.near_far = np.array(meta["near_far"])
192
+ self.near_far[1] = 1.8
193
+ self.define_transforms()
194
+ self.blender2opencv = np.array(
195
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
196
+ )
197
+
198
+ self.c2ws = []
199
+ self.w2cs = []
200
+ self.near_fars = []
201
+ # self.root_dir = root_dir
202
+ for image_dix, img_id in enumerate(self.img_ids):
203
+ pose = self.input_poses[image_dix]
204
+ c2w = pose @ self.blender2opencv
205
+ self.c2ws.append(c2w)
206
+ self.w2cs.append(np.linalg.inv(c2w))
207
+ self.near_fars.append(self.near_far)
208
+ self.c2ws = np.stack(self.c2ws, axis=0)
209
+ self.w2cs = np.stack(self.w2cs, axis=0)
210
+
211
+
212
+ self.all_intrinsics = [] # the cam info of the whole scene
213
+ self.all_extrinsics = []
214
+ self.all_near_fars = []
215
+ self.load_cam_info()
216
+
217
+
218
+ # target view
219
+ c2w = self.c2ws[idx]
220
+ w2c = np.linalg.inv(c2w)
221
+ w2c_ref = w2c
222
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
223
+
224
+ w2cs.append(w2c @ w2c_ref_inv)
225
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
226
+
227
+ # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
228
+ img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
229
+
230
+ img = Image.open(img_filename)
231
+ img = self.transform(img) # (4, h, w)
232
+
233
+
234
+ if img.shape[0] == 4:
235
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
236
+ imgs += [img]
237
+
238
+
239
+ depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
240
+ depth_h = depth_h.fill_(-1.0)
241
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
242
+
243
+
244
+ depths_h.append(depth_h)
245
+ masks_h.append(mask_h)
246
+
247
+ intrinsic = self.intrinsic
248
+ intrinsics.append(intrinsic)
249
+
250
+
251
+ near_fars.append(self.near_fars[idx])
252
+ image_perm = 0 # only supervised on reference view
253
+
254
+ mask_dilated = None
255
+
256
+
257
+ # src_views = range(8, 8 + 8 * 4)
258
+ src_views = range(8+idx*4, 8+(idx+1)*4)
259
+ for vid in src_views:
260
+
261
+ # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
262
+ img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
263
+ img = Image.open(img_filename)
264
+ img_wh = self.img_wh
265
+
266
+ img = self.transform(img)
267
+ if img.shape[0] == 4:
268
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
269
+
270
+ imgs += [img]
271
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
272
+ depths_h.append(depth_h)
273
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
274
+
275
+ near_fars.append(self.all_near_fars[vid])
276
+ intrinsics.append(self.all_intrinsics[vid])
277
+
278
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
279
+
280
+
281
+ # ! estimate scale_mat
282
+ scale_mat, scale_factor = self.cal_scale_mat(
283
+ img_hw=[img_wh[1], img_wh[0]],
284
+ intrinsics=intrinsics, extrinsics=w2cs,
285
+ near_fars=near_fars, factor=1.1
286
+ )
287
+
288
+
289
+ new_near_fars = []
290
+ new_w2cs = []
291
+ new_c2ws = []
292
+ new_affine_mats = []
293
+ new_depths_h = []
294
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
295
+
296
+ P = intrinsic @ extrinsic @ scale_mat
297
+ P = P[:3, :4]
298
+ # - should use load_K_Rt_from_P() to obtain c2w
299
+ c2w = load_K_Rt_from_P(None, P)[1]
300
+ w2c = np.linalg.inv(c2w)
301
+ new_w2cs.append(w2c)
302
+ new_c2ws.append(c2w)
303
+ affine_mat = np.eye(4)
304
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
305
+ new_affine_mats.append(affine_mat)
306
+
307
+ camera_o = c2w[:3, 3]
308
+ dist = np.sqrt(np.sum(camera_o ** 2))
309
+ near = dist - 1
310
+ far = dist + 1
311
+
312
+ new_near_fars.append([0.95 * near, 1.05 * far])
313
+ new_depths_h.append(depth * scale_factor)
314
+
315
+ # print(new_near_fars)
316
+ imgs = torch.stack(imgs).float()
317
+ depths_h = np.stack(new_depths_h)
318
+ masks_h = np.stack(masks_h)
319
+
320
+ affine_mats = np.stack(new_affine_mats)
321
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
322
+ new_near_fars)
323
+
324
+ if self.split == 'train':
325
+ start_idx = 0
326
+ else:
327
+ start_idx = 1
328
+
329
+
330
+
331
+ target_w2cs = []
332
+ target_intrinsics = []
333
+ new_target_w2cs = []
334
+ for i_idx in range(8):
335
+ target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
336
+ target_intrinsics.append(self.all_intrinsics[i_idx])
337
+
338
+ for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
339
+
340
+ P = intrinsic @ extrinsic @ scale_mat
341
+ P = P[:3, :4]
342
+ # - should use load_K_Rt_from_P() to obtain c2w
343
+ c2w = load_K_Rt_from_P(None, P)[1]
344
+ w2c = np.linalg.inv(c2w)
345
+ new_target_w2cs.append(w2c)
346
+ target_w2cs = np.stack(new_target_w2cs)
347
+
348
+
349
+
350
+ view_ids = [idx] + list(src_views)
351
+ sample['origin_idx'] = origin_idx
352
+ sample['images'] = imgs # (V, 3, H, W)
353
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
354
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
355
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
356
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
357
+ sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
358
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
359
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
360
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
361
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
362
+
363
+ # sample['light_idx'] = torch.tensor(light_idx)
364
+ sample['scan'] = shape_name
365
+
366
+ sample['scale_factor'] = torch.tensor(scale_factor)
367
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
368
+ sample['render_img_idx'] = torch.tensor(image_perm)
369
+ sample['partial_vol_origin'] = self.partial_vol_origin
370
+ sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
371
+ # print("meta: ", sample['meta'])
372
+
373
+ # - image to render
374
+ sample['query_image'] = sample['images'][0]
375
+ sample['query_c2w'] = sample['c2ws'][0]
376
+ sample['query_w2c'] = sample['w2cs'][0]
377
+ sample['query_intrinsic'] = sample['intrinsics'][0]
378
+ sample['query_depth'] = sample['depths_h'][0]
379
+ sample['query_mask'] = sample['masks_h'][0]
380
+ sample['query_near_far'] = sample['near_fars'][0]
381
+
382
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
383
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
384
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
385
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
386
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
387
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
388
+ sample['view_ids'] = sample['view_ids'][start_idx:]
389
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
390
+
391
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
392
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
393
+
394
+ # - generate rays
395
+ if ('val' in self.split) or ('test' in self.split):
396
+ sample_rays = gen_rays_from_single_image(
397
+ img_wh[1], img_wh[0],
398
+ sample['query_image'],
399
+ sample['query_intrinsic'],
400
+ sample['query_c2w'],
401
+ depth=sample['query_depth'],
402
+ mask=sample['query_mask'] if self.clean_image else None)
403
+ else:
404
+ sample_rays = gen_random_rays_from_single_image(
405
+ img_wh[1], img_wh[0],
406
+ self.N_rays,
407
+ sample['query_image'],
408
+ sample['query_intrinsic'],
409
+ sample['query_c2w'],
410
+ depth=sample['query_depth'],
411
+ mask=sample['query_mask'] if self.clean_image else None,
412
+ dilated_mask=mask_dilated,
413
+ importance_sample=self.importance_sample)
414
+
415
+
416
+ sample['rays'] = sample_rays
417
+
418
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_6.py ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+ self.near_far[1] = 1.8
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ pass
155
+
156
+ def read_mask(self, filename):
157
+ mask_h = cv2.imread(filename, 0)
158
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ mask[mask > 0] = 1 # the masks stored in png are not binary
164
+ mask_h[mask_h > 0] = 1
165
+
166
+ return mask, mask_h
167
+
168
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
+
170
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
+ # print("center", center)
172
+ # print("radius", radius)
173
+ # print("bounds", bounds)
174
+ # import ipdb; ipdb.set_trace()
175
+ radius = radius * factor
176
+ scale_mat = np.diag([radius, radius, radius, 1.0])
177
+ scale_mat[:3, 3] = center.cpu().numpy()
178
+ scale_mat = scale_mat.astype(np.float32)
179
+
180
+ return scale_mat, 1. / radius.cpu().numpy()
181
+
182
+ def __len__(self):
183
+ if self.split == 'train':
184
+ return 6*len(self.lvis_paths)
185
+ else:
186
+ return 8*len(self.lvis_paths)
187
+
188
+
189
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
190
+ pass
191
+
192
+
193
+ def __getitem__(self, idx):
194
+ sample = {}
195
+ origin_idx = idx
196
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
197
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
198
+
199
+ if self.split == 'train':
200
+ folder_uid_dict = self.lvis_paths[idx//6]
201
+ idx = idx % 6 # [0, 5]
202
+ if idx == 4:
203
+ idx = 5
204
+ elif idx == 5:
205
+ idx = 7
206
+ else:
207
+ folder_uid_dict = self.lvis_paths[idx//8]
208
+ idx = idx % 8 # [0, 7]
209
+
210
+ folder_id = folder_uid_dict['folder_id']
211
+ uid = folder_uid_dict['uid']
212
+
213
+
214
+ # target view
215
+ c2w = self.c2ws[idx]
216
+ w2c = np.linalg.inv(c2w)
217
+ w2c_ref = w2c
218
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
219
+
220
+ w2cs.append(w2c @ w2c_ref_inv)
221
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
222
+
223
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
224
+
225
+ depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
226
+
227
+
228
+ img = Image.open(img_filename)
229
+
230
+ img = self.transform(img) # (4, h, w)
231
+
232
+
233
+ if img.shape[0] == 4:
234
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
235
+ imgs += [img]
236
+
237
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
238
+ mask_h = depth_h > 0
239
+ # print("valid pixels", np.sum(mask_h))
240
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
241
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
242
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
243
+ depth_h = distance
244
+
245
+
246
+ depths_h.append(depth_h)
247
+ masks_h.append(mask_h)
248
+
249
+ intrinsic = self.intrinsic
250
+ intrinsics.append(intrinsic)
251
+
252
+
253
+ near_fars.append(self.near_fars[idx])
254
+ image_perm = 0 # only supervised on reference view
255
+
256
+ mask_dilated = None
257
+
258
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
259
+ src_views = range(8, 8 + 8 * 4)
260
+
261
+ for vid in src_views:
262
+ if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6):
263
+ continue
264
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
265
+
266
+ img = Image.open(img_filename)
267
+ img_wh = self.img_wh
268
+
269
+ img = self.transform(img)
270
+ if img.shape[0] == 4:
271
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
272
+
273
+ imgs += [img]
274
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
275
+ depths_h.append(depth_h)
276
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
277
+
278
+ near_fars.append(self.all_near_fars[vid])
279
+ intrinsics.append(self.all_intrinsics[vid])
280
+
281
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
282
+ # print("len(imges)", len(imgs))
283
+
284
+ # ! estimate scale_mat
285
+ scale_mat, scale_factor = self.cal_scale_mat(
286
+ img_hw=[img_wh[1], img_wh[0]],
287
+ intrinsics=intrinsics, extrinsics=w2cs,
288
+ near_fars=near_fars, factor=1.1
289
+ )
290
+
291
+
292
+ new_near_fars = []
293
+ new_w2cs = []
294
+ new_c2ws = []
295
+ new_affine_mats = []
296
+ new_depths_h = []
297
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
298
+
299
+ P = intrinsic @ extrinsic @ scale_mat
300
+ P = P[:3, :4]
301
+ # - should use load_K_Rt_from_P() to obtain c2w
302
+ c2w = load_K_Rt_from_P(None, P)[1]
303
+ w2c = np.linalg.inv(c2w)
304
+ new_w2cs.append(w2c)
305
+ new_c2ws.append(c2w)
306
+ affine_mat = np.eye(4)
307
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
308
+ new_affine_mats.append(affine_mat)
309
+
310
+ camera_o = c2w[:3, 3]
311
+ dist = np.sqrt(np.sum(camera_o ** 2))
312
+ near = dist - 1
313
+ far = dist + 1
314
+
315
+ new_near_fars.append([0.95 * near, 1.05 * far])
316
+ new_depths_h.append(depth * scale_factor)
317
+
318
+ # print(new_near_fars)
319
+ imgs = torch.stack(imgs).float()
320
+ depths_h = np.stack(new_depths_h)
321
+ masks_h = np.stack(masks_h)
322
+
323
+ affine_mats = np.stack(new_affine_mats)
324
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
325
+ new_near_fars)
326
+
327
+ if self.split == 'train':
328
+ start_idx = 0
329
+ else:
330
+ start_idx = 1
331
+
332
+ view_ids = [idx] + list(src_views)
333
+ sample['origin_idx'] = origin_idx
334
+ sample['images'] = imgs # (V, 3, H, W)
335
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
336
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
337
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
338
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
339
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
340
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
341
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
342
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
343
+
344
+ # sample['light_idx'] = torch.tensor(light_idx)
345
+ sample['scan'] = folder_id
346
+
347
+ sample['scale_factor'] = torch.tensor(scale_factor)
348
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
349
+ sample['render_img_idx'] = torch.tensor(image_perm)
350
+ sample['partial_vol_origin'] = self.partial_vol_origin
351
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
352
+
353
+
354
+ # - image to render
355
+ sample['query_image'] = sample['images'][0]
356
+ sample['query_c2w'] = sample['c2ws'][0]
357
+ sample['query_w2c'] = sample['w2cs'][0]
358
+ sample['query_intrinsic'] = sample['intrinsics'][0]
359
+ sample['query_depth'] = sample['depths_h'][0]
360
+ sample['query_mask'] = sample['masks_h'][0]
361
+ sample['query_near_far'] = sample['near_fars'][0]
362
+
363
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
364
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
365
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
366
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
367
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
368
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
369
+ sample['view_ids'] = sample['view_ids'][start_idx:]
370
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
371
+
372
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
373
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
374
+
375
+ # - generate rays
376
+ if ('val' in self.split) or ('test' in self.split):
377
+ sample_rays = gen_rays_from_single_image(
378
+ img_wh[1], img_wh[0],
379
+ sample['query_image'],
380
+ sample['query_intrinsic'],
381
+ sample['query_c2w'],
382
+ depth=sample['query_depth'],
383
+ mask=sample['query_mask'] if self.clean_image else None)
384
+ else:
385
+ sample_rays = gen_random_rays_from_single_image(
386
+ img_wh[1], img_wh[0],
387
+ self.N_rays,
388
+ sample['query_image'],
389
+ sample['query_intrinsic'],
390
+ sample['query_c2w'],
391
+ depth=sample['query_depth'],
392
+ mask=sample['query_mask'] if self.clean_image else None,
393
+ dilated_mask=mask_dilated,
394
+ importance_sample=self.importance_sample)
395
+
396
+
397
+ sample['rays'] = sample_rays
398
+
399
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_8_3_fixed.py ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+ self.near_far[1] = 1.8
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ pass
155
+
156
+ def read_mask(self, filename):
157
+ mask_h = cv2.imread(filename, 0)
158
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ mask[mask > 0] = 1 # the masks stored in png are not binary
164
+ mask_h[mask_h > 0] = 1
165
+
166
+ return mask, mask_h
167
+
168
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
+
170
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
+ # print("center", center)
172
+ # print("radius", radius)
173
+ # print("bounds", bounds)
174
+ # import ipdb; ipdb.set_trace()
175
+ radius = radius * factor
176
+ scale_mat = np.diag([radius, radius, radius, 1.0])
177
+ scale_mat[:3, 3] = center.cpu().numpy()
178
+ scale_mat = scale_mat.astype(np.float32)
179
+
180
+ return scale_mat, 1. / radius.cpu().numpy()
181
+
182
+ def __len__(self):
183
+ return 8*len(self.lvis_paths)
184
+
185
+
186
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
+ pass
188
+
189
+
190
+ def __getitem__(self, idx):
191
+ sample = {}
192
+ origin_idx = idx
193
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
194
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
195
+
196
+
197
+ folder_uid_dict = self.lvis_paths[idx//8]
198
+ idx = idx % 8 # [0, 7]
199
+ folder_id = folder_uid_dict['folder_id']
200
+ uid = folder_uid_dict['uid']
201
+
202
+
203
+ # target view
204
+ c2w = self.c2ws[idx]
205
+ w2c = np.linalg.inv(c2w)
206
+ w2c_ref = w2c
207
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
208
+
209
+ w2cs.append(w2c @ w2c_ref_inv)
210
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
211
+
212
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
213
+
214
+ depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
215
+
216
+
217
+ img = Image.open(img_filename)
218
+
219
+ img = self.transform(img) # (4, h, w)
220
+
221
+
222
+ if img.shape[0] == 4:
223
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
224
+ imgs += [img]
225
+
226
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
227
+ mask_h = depth_h > 0
228
+ # print("valid pixels", np.sum(mask_h))
229
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
230
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
231
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
232
+ depth_h = distance
233
+
234
+
235
+ depths_h.append(depth_h)
236
+ masks_h.append(mask_h)
237
+
238
+ intrinsic = self.intrinsic
239
+ intrinsics.append(intrinsic)
240
+
241
+
242
+ near_fars.append(self.near_fars[idx])
243
+ image_perm = 0 # only supervised on reference view
244
+
245
+ mask_dilated = None
246
+
247
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
248
+ src_views = list()
249
+ for i in range(8):
250
+ # randomly choose 3 different number from [0,3]
251
+ # local_idxs = np.random.choice(4, 3, replace=False)
252
+ local_idxs = [0, 2, 3]
253
+ # local_idxs = np.random.choice(4, 3, replace=False)
254
+
255
+ local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs]
256
+ src_views += local_idxs
257
+ for vid in src_views:
258
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
259
+
260
+ img = Image.open(img_filename)
261
+ img_wh = self.img_wh
262
+
263
+ img = self.transform(img)
264
+ if img.shape[0] == 4:
265
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
266
+
267
+ imgs += [img]
268
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
269
+ depths_h.append(depth_h)
270
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
271
+
272
+ near_fars.append(self.all_near_fars[vid])
273
+ intrinsics.append(self.all_intrinsics[vid])
274
+
275
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
276
+
277
+ # print("len(imgs)", len(imgs))
278
+ # ! estimate scale_mat
279
+ scale_mat, scale_factor = self.cal_scale_mat(
280
+ img_hw=[img_wh[1], img_wh[0]],
281
+ intrinsics=intrinsics, extrinsics=w2cs,
282
+ near_fars=near_fars, factor=1.1
283
+ )
284
+
285
+
286
+ new_near_fars = []
287
+ new_w2cs = []
288
+ new_c2ws = []
289
+ new_affine_mats = []
290
+ new_depths_h = []
291
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
292
+
293
+ P = intrinsic @ extrinsic @ scale_mat
294
+ P = P[:3, :4]
295
+ # - should use load_K_Rt_from_P() to obtain c2w
296
+ c2w = load_K_Rt_from_P(None, P)[1]
297
+ w2c = np.linalg.inv(c2w)
298
+ new_w2cs.append(w2c)
299
+ new_c2ws.append(c2w)
300
+ affine_mat = np.eye(4)
301
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
302
+ new_affine_mats.append(affine_mat)
303
+
304
+ camera_o = c2w[:3, 3]
305
+ dist = np.sqrt(np.sum(camera_o ** 2))
306
+ near = dist - 1
307
+ far = dist + 1
308
+
309
+ new_near_fars.append([0.95 * near, 1.05 * far])
310
+ new_depths_h.append(depth * scale_factor)
311
+
312
+ # print(new_near_fars)
313
+ imgs = torch.stack(imgs).float()
314
+ depths_h = np.stack(new_depths_h)
315
+ masks_h = np.stack(masks_h)
316
+
317
+ affine_mats = np.stack(new_affine_mats)
318
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
319
+ new_near_fars)
320
+
321
+ if self.split == 'train':
322
+ start_idx = 0
323
+ else:
324
+ start_idx = 1
325
+
326
+ view_ids = [idx] + list(src_views)
327
+ sample['origin_idx'] = origin_idx
328
+ sample['images'] = imgs # (V, 3, H, W)
329
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
330
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
331
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
332
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
333
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
334
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
335
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
336
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
337
+
338
+ # sample['light_idx'] = torch.tensor(light_idx)
339
+ sample['scan'] = folder_id
340
+
341
+ sample['scale_factor'] = torch.tensor(scale_factor)
342
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
343
+ sample['render_img_idx'] = torch.tensor(image_perm)
344
+ sample['partial_vol_origin'] = self.partial_vol_origin
345
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
346
+
347
+
348
+ # - image to render
349
+ sample['query_image'] = sample['images'][0]
350
+ sample['query_c2w'] = sample['c2ws'][0]
351
+ sample['query_w2c'] = sample['w2cs'][0]
352
+ sample['query_intrinsic'] = sample['intrinsics'][0]
353
+ sample['query_depth'] = sample['depths_h'][0]
354
+ sample['query_mask'] = sample['masks_h'][0]
355
+ sample['query_near_far'] = sample['near_fars'][0]
356
+
357
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
358
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
359
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
360
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
361
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
362
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
363
+ sample['view_ids'] = sample['view_ids'][start_idx:]
364
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
365
+
366
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
367
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
368
+
369
+ # - generate rays
370
+ if ('val' in self.split) or ('test' in self.split):
371
+ sample_rays = gen_rays_from_single_image(
372
+ img_wh[1], img_wh[0],
373
+ sample['query_image'],
374
+ sample['query_intrinsic'],
375
+ sample['query_c2w'],
376
+ depth=sample['query_depth'],
377
+ mask=sample['query_mask'] if self.clean_image else None)
378
+ else:
379
+ sample_rays = gen_random_rays_from_single_image(
380
+ img_wh[1], img_wh[0],
381
+ self.N_rays,
382
+ sample['query_image'],
383
+ sample['query_intrinsic'],
384
+ sample['query_c2w'],
385
+ depth=sample['query_depth'],
386
+ mask=sample['query_mask'] if self.clean_image else None,
387
+ dilated_mask=mask_dilated,
388
+ importance_sample=self.importance_sample)
389
+
390
+
391
+ sample['rays'] = sample_rays
392
+
393
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_8_3_random.py ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+ self.near_far[1] = 1.8
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ pass
155
+
156
+ def read_mask(self, filename):
157
+ mask_h = cv2.imread(filename, 0)
158
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ mask[mask > 0] = 1 # the masks stored in png are not binary
164
+ mask_h[mask_h > 0] = 1
165
+
166
+ return mask, mask_h
167
+
168
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
+
170
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
+ # print("center", center)
172
+ # print("radius", radius)
173
+ # print("bounds", bounds)
174
+ # import ipdb; ipdb.set_trace()
175
+ radius = radius * factor
176
+ scale_mat = np.diag([radius, radius, radius, 1.0])
177
+ scale_mat[:3, 3] = center.cpu().numpy()
178
+ scale_mat = scale_mat.astype(np.float32)
179
+
180
+ return scale_mat, 1. / radius.cpu().numpy()
181
+
182
+ def __len__(self):
183
+ return 8*len(self.lvis_paths)
184
+
185
+
186
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
+ pass
188
+
189
+
190
+ def __getitem__(self, idx):
191
+ sample = {}
192
+ origin_idx = idx
193
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
194
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
195
+
196
+
197
+ folder_uid_dict = self.lvis_paths[idx//8]
198
+ idx = idx % 8 # [0, 7]
199
+ folder_id = folder_uid_dict['folder_id']
200
+ uid = folder_uid_dict['uid']
201
+
202
+
203
+ # target view
204
+ c2w = self.c2ws[idx]
205
+ w2c = np.linalg.inv(c2w)
206
+ w2c_ref = w2c
207
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
208
+
209
+ w2cs.append(w2c @ w2c_ref_inv)
210
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
211
+
212
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
213
+
214
+ depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
215
+
216
+
217
+ img = Image.open(img_filename)
218
+
219
+ img = self.transform(img) # (4, h, w)
220
+
221
+
222
+ if img.shape[0] == 4:
223
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
224
+ imgs += [img]
225
+
226
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
227
+ mask_h = depth_h > 0
228
+ # print("valid pixels", np.sum(mask_h))
229
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
230
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
231
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
232
+ depth_h = distance
233
+
234
+
235
+ depths_h.append(depth_h)
236
+ masks_h.append(mask_h)
237
+
238
+ intrinsic = self.intrinsic
239
+ intrinsics.append(intrinsic)
240
+
241
+
242
+ near_fars.append(self.near_fars[idx])
243
+ image_perm = 0 # only supervised on reference view
244
+
245
+ mask_dilated = None
246
+
247
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
248
+ src_views = list()
249
+ for i in range(8):
250
+
251
+ if self.split == 'train':
252
+ local_idxs = np.random.choice(4, 3, replace=False)
253
+ else:
254
+ local_idxs = [0, 2, 3]
255
+ # local_idxs = np.random.choice(4, 3, replace=False)
256
+
257
+ local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs]
258
+ src_views += local_idxs
259
+ for vid in src_views:
260
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
261
+
262
+ img = Image.open(img_filename)
263
+ img_wh = self.img_wh
264
+
265
+ img = self.transform(img)
266
+ if img.shape[0] == 4:
267
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
268
+
269
+ imgs += [img]
270
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
271
+ depths_h.append(depth_h)
272
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
273
+
274
+ near_fars.append(self.all_near_fars[vid])
275
+ intrinsics.append(self.all_intrinsics[vid])
276
+
277
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
278
+
279
+ # print("len(imgs)", len(imgs))
280
+ # ! estimate scale_mat
281
+ scale_mat, scale_factor = self.cal_scale_mat(
282
+ img_hw=[img_wh[1], img_wh[0]],
283
+ intrinsics=intrinsics, extrinsics=w2cs,
284
+ near_fars=near_fars, factor=1.1
285
+ )
286
+
287
+
288
+ new_near_fars = []
289
+ new_w2cs = []
290
+ new_c2ws = []
291
+ new_affine_mats = []
292
+ new_depths_h = []
293
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
294
+
295
+ P = intrinsic @ extrinsic @ scale_mat
296
+ P = P[:3, :4]
297
+ # - should use load_K_Rt_from_P() to obtain c2w
298
+ c2w = load_K_Rt_from_P(None, P)[1]
299
+ w2c = np.linalg.inv(c2w)
300
+ new_w2cs.append(w2c)
301
+ new_c2ws.append(c2w)
302
+ affine_mat = np.eye(4)
303
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
304
+ new_affine_mats.append(affine_mat)
305
+
306
+ camera_o = c2w[:3, 3]
307
+ dist = np.sqrt(np.sum(camera_o ** 2))
308
+ near = dist - 1
309
+ far = dist + 1
310
+
311
+ new_near_fars.append([0.95 * near, 1.05 * far])
312
+ new_depths_h.append(depth * scale_factor)
313
+
314
+ # print(new_near_fars)
315
+ imgs = torch.stack(imgs).float()
316
+ depths_h = np.stack(new_depths_h)
317
+ masks_h = np.stack(masks_h)
318
+
319
+ affine_mats = np.stack(new_affine_mats)
320
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
321
+ new_near_fars)
322
+
323
+ if self.split == 'train':
324
+ start_idx = 0
325
+ else:
326
+ start_idx = 1
327
+
328
+ view_ids = [idx] + list(src_views)
329
+ sample['origin_idx'] = origin_idx
330
+ sample['images'] = imgs # (V, 3, H, W)
331
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
332
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
333
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
334
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
335
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
336
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
337
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
338
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
339
+
340
+ # sample['light_idx'] = torch.tensor(light_idx)
341
+ sample['scan'] = folder_id
342
+
343
+ sample['scale_factor'] = torch.tensor(scale_factor)
344
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
345
+ sample['render_img_idx'] = torch.tensor(image_perm)
346
+ sample['partial_vol_origin'] = self.partial_vol_origin
347
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
348
+
349
+
350
+ # - image to render
351
+ sample['query_image'] = sample['images'][0]
352
+ sample['query_c2w'] = sample['c2ws'][0]
353
+ sample['query_w2c'] = sample['w2cs'][0]
354
+ sample['query_intrinsic'] = sample['intrinsics'][0]
355
+ sample['query_depth'] = sample['depths_h'][0]
356
+ sample['query_mask'] = sample['masks_h'][0]
357
+ sample['query_near_far'] = sample['near_fars'][0]
358
+
359
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
360
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
361
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
362
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
363
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
364
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
365
+ sample['view_ids'] = sample['view_ids'][start_idx:]
366
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
367
+
368
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
369
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
370
+
371
+ # - generate rays
372
+ if ('val' in self.split) or ('test' in self.split):
373
+ sample_rays = gen_rays_from_single_image(
374
+ img_wh[1], img_wh[0],
375
+ sample['query_image'],
376
+ sample['query_intrinsic'],
377
+ sample['query_c2w'],
378
+ depth=sample['query_depth'],
379
+ mask=sample['query_mask'] if self.clean_image else None)
380
+ else:
381
+ sample_rays = gen_random_rays_from_single_image(
382
+ img_wh[1], img_wh[0],
383
+ self.N_rays,
384
+ sample['query_image'],
385
+ sample['query_intrinsic'],
386
+ sample['query_c2w'],
387
+ depth=sample['query_depth'],
388
+ mask=sample['query_mask'] if self.clean_image else None,
389
+ dilated_mask=mask_dilated,
390
+ importance_sample=self.importance_sample)
391
+
392
+
393
+ sample['rays'] = sample_rays
394
+
395
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_8_4_random_shading.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+ self.near_far[1] = 1.8
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ pass
155
+
156
+ def read_mask(self, filename):
157
+ mask_h = cv2.imread(filename, 0)
158
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ mask[mask > 0] = 1 # the masks stored in png are not binary
164
+ mask_h[mask_h > 0] = 1
165
+
166
+ return mask, mask_h
167
+
168
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
+
170
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
+ # print("center", center)
172
+ # print("radius", radius)
173
+ # print("bounds", bounds)
174
+ # import ipdb; ipdb.set_trace()
175
+ radius = radius * factor
176
+ scale_mat = np.diag([radius, radius, radius, 1.0])
177
+ scale_mat[:3, 3] = center.cpu().numpy()
178
+ scale_mat = scale_mat.astype(np.float32)
179
+
180
+ return scale_mat, 1. / radius.cpu().numpy()
181
+
182
+ def __len__(self):
183
+ return 8*len(self.lvis_paths)
184
+
185
+
186
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
+ pass
188
+
189
+
190
+ def __getitem__(self, idx):
191
+ sample = {}
192
+ origin_idx = idx
193
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
194
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
195
+
196
+
197
+ folder_uid_dict = self.lvis_paths[idx//8]
198
+ idx = idx % 8 # [0, 7]
199
+ folder_id = folder_uid_dict['folder_id']
200
+ uid = folder_uid_dict['uid']
201
+
202
+
203
+ # target view
204
+ c2w = self.c2ws[idx]
205
+ w2c = np.linalg.inv(c2w)
206
+ w2c_ref = w2c
207
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
208
+
209
+ w2cs.append(w2c @ w2c_ref_inv)
210
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
211
+
212
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
213
+
214
+ depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
215
+
216
+
217
+ img = Image.open(img_filename)
218
+
219
+ img = self.transform(img) # (4, h, w)
220
+
221
+
222
+ if img.shape[0] == 4:
223
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
224
+ imgs += [img]
225
+
226
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
227
+ mask_h = depth_h > 0
228
+ # print("valid pixels", np.sum(mask_h))
229
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
230
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
231
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
232
+ depth_h = distance
233
+
234
+
235
+ depths_h.append(depth_h)
236
+ masks_h.append(mask_h)
237
+
238
+ intrinsic = self.intrinsic
239
+ intrinsics.append(intrinsic)
240
+
241
+
242
+ near_fars.append(self.near_fars[idx])
243
+ image_perm = 0 # only supervised on reference view
244
+
245
+ mask_dilated = None
246
+
247
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
248
+ src_views = range(8, 8 + 8 * 4)
249
+
250
+ for vid in src_views:
251
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
252
+
253
+ img = Image.open(img_filename)
254
+ img_wh = self.img_wh
255
+
256
+ img = self.transform(img)
257
+ if img.shape[0] == 4:
258
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
259
+
260
+ imgs += [img]
261
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
262
+ depths_h.append(depth_h)
263
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
264
+
265
+ near_fars.append(self.all_near_fars[vid])
266
+ intrinsics.append(self.all_intrinsics[vid])
267
+
268
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
269
+
270
+
271
+ # ! estimate scale_mat
272
+ scale_mat, scale_factor = self.cal_scale_mat(
273
+ img_hw=[img_wh[1], img_wh[0]],
274
+ intrinsics=intrinsics, extrinsics=w2cs,
275
+ near_fars=near_fars, factor=1.1
276
+ )
277
+
278
+
279
+ new_near_fars = []
280
+ new_w2cs = []
281
+ new_c2ws = []
282
+ new_affine_mats = []
283
+ new_depths_h = []
284
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
285
+
286
+ P = intrinsic @ extrinsic @ scale_mat
287
+ P = P[:3, :4]
288
+ # - should use load_K_Rt_from_P() to obtain c2w
289
+ c2w = load_K_Rt_from_P(None, P)[1]
290
+ w2c = np.linalg.inv(c2w)
291
+ new_w2cs.append(w2c)
292
+ new_c2ws.append(c2w)
293
+ affine_mat = np.eye(4)
294
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
295
+ new_affine_mats.append(affine_mat)
296
+
297
+ camera_o = c2w[:3, 3]
298
+ dist = np.sqrt(np.sum(camera_o ** 2))
299
+ near = dist - 1
300
+ far = dist + 1
301
+
302
+ new_near_fars.append([0.95 * near, 1.05 * far])
303
+ new_depths_h.append(depth * scale_factor)
304
+
305
+ if self.split == 'train':
306
+ # randomly select one view from eight views as reference view
307
+ idx_to_select = np.random.randint(0, 8)
308
+
309
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx_to_select}.png')
310
+ img = Image.open(img_filename)
311
+ img = self.transform(img) # (4, h, w)
312
+
313
+ if img.shape[0] == 4:
314
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
315
+
316
+ imgs[0] = img
317
+
318
+ w2c_selected = self.all_extrinsics[idx_to_select] @ w2c_ref_inv
319
+ P = self.all_intrinsics[idx_to_select] @ w2c_selected @ scale_mat
320
+ P = P[:3, :4]
321
+
322
+ c2w = load_K_Rt_from_P(None, P)[1]
323
+ w2c = np.linalg.inv(c2w)
324
+ affine_mat = np.eye(4)
325
+ affine_mat[:3, :4] = self.all_intrinsics[idx_to_select][:3, :3] @ w2c[:3, :4]
326
+ new_affine_mats[0] = affine_mat
327
+ camera_o = c2w[:3, 3]
328
+ dist = np.sqrt(np.sum(camera_o ** 2))
329
+ near = dist - 1
330
+ far = dist + 1
331
+ new_near_fars[0] = [0.95 * near, 1.05 * far]
332
+
333
+ new_w2cs[0] = w2c
334
+ new_c2ws[0] = c2w
335
+
336
+ depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx_to_select}_depth_mm.png'))
337
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
338
+ mask_h = depth_h > 0
339
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
340
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
341
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
342
+ depth_h = distance * scale_factor
343
+
344
+ new_depths_h[0] = depth_h
345
+ masks_h[0] = mask_h
346
+
347
+
348
+
349
+ # print(new_near_fars)
350
+ imgs = torch.stack(imgs).float()
351
+ depths_h = np.stack(new_depths_h)
352
+ masks_h = np.stack(masks_h)
353
+
354
+ affine_mats = np.stack(new_affine_mats)
355
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
356
+ new_near_fars)
357
+
358
+ if self.split == 'train':
359
+ start_idx = 0
360
+ else:
361
+ start_idx = 1
362
+
363
+
364
+ view_ids = [idx] + list(src_views)
365
+ sample['origin_idx'] = origin_idx
366
+ sample['images'] = imgs # (V, 3, H, W)
367
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
368
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
369
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
370
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
371
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
372
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
373
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
374
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
375
+
376
+ # sample['light_idx'] = torch.tensor(light_idx)
377
+ sample['scan'] = folder_id
378
+
379
+ sample['scale_factor'] = torch.tensor(scale_factor)
380
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
381
+ sample['render_img_idx'] = torch.tensor(image_perm)
382
+ sample['partial_vol_origin'] = self.partial_vol_origin
383
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
384
+
385
+
386
+ # - image to render
387
+ sample['query_image'] = sample['images'][0]
388
+ sample['query_c2w'] = sample['c2ws'][0]
389
+ sample['query_w2c'] = sample['w2cs'][0]
390
+ sample['query_intrinsic'] = sample['intrinsics'][0]
391
+ sample['query_depth'] = sample['depths_h'][0]
392
+ sample['query_mask'] = sample['masks_h'][0]
393
+ sample['query_near_far'] = sample['near_fars'][0]
394
+
395
+
396
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
397
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
398
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
399
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
400
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
401
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
402
+ sample['view_ids'] = sample['view_ids'][start_idx:]
403
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
404
+
405
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
406
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
407
+
408
+ # - generate rays
409
+ if ('val' in self.split) or ('test' in self.split):
410
+ sample_rays = gen_rays_from_single_image(
411
+ img_wh[1], img_wh[0],
412
+ sample['query_image'],
413
+ sample['query_intrinsic'],
414
+ sample['query_c2w'],
415
+ depth=sample['query_depth'],
416
+ mask=sample['query_mask'] if self.clean_image else None)
417
+ else:
418
+ sample_rays = gen_random_rays_from_single_image(
419
+ img_wh[1], img_wh[0],
420
+ self.N_rays,
421
+ sample['query_image'],
422
+ sample['query_intrinsic'],
423
+ sample['query_c2w'],
424
+ depth=sample['query_depth'],
425
+ mask=sample['query_mask'] if self.clean_image else None,
426
+ dilated_mask=mask_dilated,
427
+ importance_sample=self.importance_sample)
428
+
429
+
430
+ sample['rays'] = sample_rays
431
+
432
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+ self.near_far[1] = 1.8
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ pass
155
+
156
+ def read_mask(self, filename):
157
+ mask_h = cv2.imread(filename, 0)
158
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ mask[mask > 0] = 1 # the masks stored in png are not binary
164
+ mask_h[mask_h > 0] = 1
165
+
166
+ return mask, mask_h
167
+
168
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
+
170
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
+ # print("center", center)
172
+ # print("radius", radius)
173
+ # print("bounds", bounds)
174
+ # import ipdb; ipdb.set_trace()
175
+ radius = radius * factor
176
+ scale_mat = np.diag([radius, radius, radius, 1.0])
177
+ scale_mat[:3, 3] = center.cpu().numpy()
178
+ scale_mat = scale_mat.astype(np.float32)
179
+
180
+ return scale_mat, 1. / radius.cpu().numpy()
181
+
182
+ def __len__(self):
183
+ return 8*len(self.lvis_paths)
184
+
185
+
186
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
+ pass
188
+
189
+
190
+ def __getitem__(self, idx):
191
+ sample = {}
192
+ origin_idx = idx
193
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
194
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
195
+
196
+
197
+ folder_uid_dict = self.lvis_paths[idx//8]
198
+ idx = idx % 8 # [0, 7]
199
+ folder_id = folder_uid_dict['folder_id']
200
+ uid = folder_uid_dict['uid']
201
+
202
+
203
+ # target view
204
+ c2w = self.c2ws[idx]
205
+ w2c = np.linalg.inv(c2w)
206
+ w2c_ref = w2c
207
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
208
+
209
+ w2cs.append(w2c @ w2c_ref_inv)
210
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
211
+
212
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
213
+
214
+ depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
215
+
216
+
217
+ img = Image.open(img_filename)
218
+
219
+ img = self.transform(img) # (4, h, w)
220
+
221
+
222
+ if img.shape[0] == 4:
223
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
224
+ imgs += [img]
225
+
226
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
227
+ mask_h = depth_h > 0
228
+ # print("valid pixels", np.sum(mask_h))
229
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
230
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
231
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
232
+ depth_h = distance
233
+
234
+
235
+ depths_h.append(depth_h)
236
+ masks_h.append(mask_h)
237
+
238
+ intrinsic = self.intrinsic
239
+ intrinsics.append(intrinsic)
240
+
241
+
242
+ near_fars.append(self.near_fars[idx])
243
+ image_perm = 0 # only supervised on reference view
244
+
245
+ mask_dilated = None
246
+
247
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
248
+ src_views = range(8, 8 + 8 * 4)
249
+
250
+ for vid in src_views:
251
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
252
+
253
+ img = Image.open(img_filename)
254
+ img_wh = self.img_wh
255
+
256
+ img = self.transform(img)
257
+ if img.shape[0] == 4:
258
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
259
+
260
+ imgs += [img]
261
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
262
+ depths_h.append(depth_h)
263
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
264
+
265
+ near_fars.append(self.all_near_fars[vid])
266
+ intrinsics.append(self.all_intrinsics[vid])
267
+
268
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
269
+
270
+
271
+ # ! estimate scale_mat
272
+ scale_mat, scale_factor = self.cal_scale_mat(
273
+ img_hw=[img_wh[1], img_wh[0]],
274
+ intrinsics=intrinsics, extrinsics=w2cs,
275
+ near_fars=near_fars, factor=1.1
276
+ )
277
+
278
+
279
+ new_near_fars = []
280
+ new_w2cs = []
281
+ new_c2ws = []
282
+ new_affine_mats = []
283
+ new_depths_h = []
284
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
285
+
286
+ P = intrinsic @ extrinsic @ scale_mat
287
+ P = P[:3, :4]
288
+ # - should use load_K_Rt_from_P() to obtain c2w
289
+ c2w = load_K_Rt_from_P(None, P)[1]
290
+ w2c = np.linalg.inv(c2w)
291
+ new_w2cs.append(w2c)
292
+ new_c2ws.append(c2w)
293
+ affine_mat = np.eye(4)
294
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
295
+ new_affine_mats.append(affine_mat)
296
+
297
+ camera_o = c2w[:3, 3]
298
+ dist = np.sqrt(np.sum(camera_o ** 2))
299
+ near = dist - 1
300
+ far = dist + 1
301
+
302
+ new_near_fars.append([0.95 * near, 1.05 * far])
303
+ new_depths_h.append(depth * scale_factor)
304
+
305
+ # print(new_near_fars)
306
+ imgs = torch.stack(imgs).float()
307
+ depths_h = np.stack(new_depths_h)
308
+ masks_h = np.stack(masks_h)
309
+
310
+ affine_mats = np.stack(new_affine_mats)
311
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
312
+ new_near_fars)
313
+
314
+ if self.split == 'train':
315
+ start_idx = 0
316
+ else:
317
+ start_idx = 1
318
+
319
+ view_ids = [idx] + list(src_views)
320
+ sample['origin_idx'] = origin_idx
321
+ sample['images'] = imgs # (V, 3, H, W)
322
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
323
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
324
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
325
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
326
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
327
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
328
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
329
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
330
+
331
+ # sample['light_idx'] = torch.tensor(light_idx)
332
+ sample['scan'] = folder_id
333
+
334
+ sample['scale_factor'] = torch.tensor(scale_factor)
335
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
336
+ sample['render_img_idx'] = torch.tensor(image_perm)
337
+ sample['partial_vol_origin'] = self.partial_vol_origin
338
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
339
+
340
+
341
+ # - image to render
342
+ sample['query_image'] = sample['images'][0]
343
+ sample['query_c2w'] = sample['c2ws'][0]
344
+ sample['query_w2c'] = sample['w2cs'][0]
345
+ sample['query_intrinsic'] = sample['intrinsics'][0]
346
+ sample['query_depth'] = sample['depths_h'][0]
347
+ sample['query_mask'] = sample['masks_h'][0]
348
+ sample['query_near_far'] = sample['near_fars'][0]
349
+
350
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
351
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
352
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
353
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
354
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
355
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
356
+ sample['view_ids'] = sample['view_ids'][start_idx:]
357
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
358
+
359
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
360
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
361
+
362
+ # - generate rays
363
+ if ('val' in self.split) or ('test' in self.split):
364
+ sample_rays = gen_rays_from_single_image(
365
+ img_wh[1], img_wh[0],
366
+ sample['query_image'],
367
+ sample['query_intrinsic'],
368
+ sample['query_c2w'],
369
+ depth=sample['query_depth'],
370
+ mask=sample['query_mask'] if self.clean_image else None)
371
+ else:
372
+ sample_rays = gen_random_rays_from_single_image(
373
+ img_wh[1], img_wh[0],
374
+ self.N_rays,
375
+ sample['query_image'],
376
+ sample['query_intrinsic'],
377
+ sample['query_c2w'],
378
+ depth=sample['query_depth'],
379
+ mask=sample['query_mask'] if self.clean_image else None,
380
+ dilated_mask=mask_dilated,
381
+ importance_sample=self.importance_sample)
382
+
383
+
384
+ sample['rays'] = sample_rays
385
+
386
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage.py ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+ self.near_far[1] = 1.8
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ pass
155
+
156
+ def read_mask(self, filename):
157
+ mask_h = cv2.imread(filename, 0)
158
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ mask[mask > 0] = 1 # the masks stored in png are not binary
164
+ mask_h[mask_h > 0] = 1
165
+
166
+ return mask, mask_h
167
+
168
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
+
170
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
+ # print("center", center)
172
+ # print("radius", radius)
173
+ # print("bounds", bounds)
174
+ # import ipdb; ipdb.set_trace()
175
+ radius = radius * factor
176
+ scale_mat = np.diag([radius, radius, radius, 1.0])
177
+ scale_mat[:3, 3] = center.cpu().numpy()
178
+ scale_mat = scale_mat.astype(np.float32)
179
+
180
+ return scale_mat, 1. / radius.cpu().numpy()
181
+
182
+ def __len__(self):
183
+ return 8*len(self.lvis_paths)
184
+
185
+
186
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
+ pass
188
+
189
+
190
+ def __getitem__(self, idx):
191
+ sample = {}
192
+ origin_idx = idx
193
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
194
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
195
+
196
+
197
+ folder_uid_dict = self.lvis_paths[idx//8]
198
+ idx = idx % 8 # [0, 7]
199
+ folder_id = folder_uid_dict['folder_id']
200
+ uid = folder_uid_dict['uid']
201
+
202
+
203
+ # target view
204
+ c2w = self.c2ws[idx]
205
+ w2c = np.linalg.inv(c2w)
206
+ w2c_ref = w2c
207
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
208
+
209
+ w2cs.append(w2c @ w2c_ref_inv)
210
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
211
+
212
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
213
+
214
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
215
+
216
+
217
+ img = Image.open(img_filename)
218
+
219
+ img = self.transform(img) # (4, h, w)
220
+
221
+ # print("img_pre", img.shape)
222
+ if img.shape[0] == 4:
223
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
224
+ # print("img", img.shape)
225
+ imgs += [img]
226
+
227
+
228
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
229
+ mask_h = depth_h > 0
230
+ # print("valid pixels", np.sum(mask_h))
231
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
232
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
233
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
234
+ depth_h = distance
235
+ # print("depth_h", depth_h.shape)
236
+
237
+ depths_h.append(depth_h)
238
+ masks_h.append(mask_h)
239
+
240
+ intrinsic = self.intrinsic
241
+ intrinsics.append(intrinsic)
242
+
243
+
244
+ near_fars.append(self.near_fars[idx])
245
+ image_perm = 0 # only supervised on reference view
246
+
247
+ mask_dilated = None
248
+
249
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
250
+ src_views = range(8, 8 + 8 * 4)
251
+
252
+ for vid in src_views:
253
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{(vid - 8) // 4}_{vid % 4 + 1}.png')
254
+
255
+ img = Image.open(img_filename)
256
+ img_wh = self.img_wh
257
+
258
+ img = self.transform(img)
259
+ if img.shape[0] == 4:
260
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
261
+
262
+ imgs += [img]
263
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
264
+ depths_h.append(depth_h)
265
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
266
+
267
+ near_fars.append(self.all_near_fars[vid])
268
+ intrinsics.append(self.all_intrinsics[vid])
269
+
270
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
271
+
272
+
273
+ # ! estimate scale_mat
274
+ scale_mat, scale_factor = self.cal_scale_mat(
275
+ img_hw=[img_wh[1], img_wh[0]],
276
+ intrinsics=intrinsics, extrinsics=w2cs,
277
+ near_fars=near_fars, factor=1.1
278
+ )
279
+
280
+
281
+ new_near_fars = []
282
+ new_w2cs = []
283
+ new_c2ws = []
284
+ new_affine_mats = []
285
+ new_depths_h = []
286
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
287
+
288
+ P = intrinsic @ extrinsic @ scale_mat
289
+ P = P[:3, :4]
290
+ # - should use load_K_Rt_from_P() to obtain c2w
291
+ c2w = load_K_Rt_from_P(None, P)[1]
292
+ w2c = np.linalg.inv(c2w)
293
+ new_w2cs.append(w2c)
294
+ new_c2ws.append(c2w)
295
+ affine_mat = np.eye(4)
296
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
297
+ new_affine_mats.append(affine_mat)
298
+
299
+ camera_o = c2w[:3, 3]
300
+ dist = np.sqrt(np.sum(camera_o ** 2))
301
+ near = dist - 1
302
+ far = dist + 1
303
+
304
+ new_near_fars.append([0.95 * near, 1.05 * far])
305
+ new_depths_h.append(depth * scale_factor)
306
+
307
+ # print(new_near_fars)
308
+ imgs = torch.stack(imgs).float()
309
+ depths_h = np.stack(new_depths_h)
310
+ masks_h = np.stack(masks_h)
311
+
312
+ affine_mats = np.stack(new_affine_mats)
313
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
314
+ new_near_fars)
315
+
316
+ if self.split == 'train':
317
+ start_idx = 0
318
+ else:
319
+ start_idx = 1
320
+
321
+
322
+
323
+ target_w2cs = []
324
+ target_intrinsics = []
325
+ new_target_w2cs = []
326
+ for i_idx in range(8):
327
+ target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
328
+ target_intrinsics.append(self.all_intrinsics[i_idx])
329
+
330
+ for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
331
+
332
+ P = intrinsic @ extrinsic @ scale_mat
333
+ P = P[:3, :4]
334
+ # - should use load_K_Rt_from_P() to obtain c2w
335
+ c2w = load_K_Rt_from_P(None, P)[1]
336
+ w2c = np.linalg.inv(c2w)
337
+ new_target_w2cs.append(w2c)
338
+ target_w2cs = np.stack(new_target_w2cs)
339
+
340
+
341
+
342
+ view_ids = [idx] + list(src_views)
343
+ sample['origin_idx'] = origin_idx
344
+ sample['images'] = imgs # (V, 3, H, W)
345
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
346
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
347
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
348
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
349
+ sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
350
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
351
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
352
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
353
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
354
+
355
+ # sample['light_idx'] = torch.tensor(light_idx)
356
+ sample['scan'] = folder_id
357
+
358
+ sample['scale_factor'] = torch.tensor(scale_factor)
359
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
360
+ sample['render_img_idx'] = torch.tensor(image_perm)
361
+ sample['partial_vol_origin'] = self.partial_vol_origin
362
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
363
+
364
+
365
+ # - image to render
366
+ sample['query_image'] = sample['images'][0]
367
+ sample['query_c2w'] = sample['c2ws'][0]
368
+ sample['query_w2c'] = sample['w2cs'][0]
369
+ sample['query_intrinsic'] = sample['intrinsics'][0]
370
+ sample['query_depth'] = sample['depths_h'][0]
371
+ sample['query_mask'] = sample['masks_h'][0]
372
+ sample['query_near_far'] = sample['near_fars'][0]
373
+
374
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
375
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
376
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
377
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
378
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
379
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
380
+ sample['view_ids'] = sample['view_ids'][start_idx:]
381
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
382
+
383
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
384
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
385
+
386
+ # - generate rays
387
+ if ('val' in self.split) or ('test' in self.split):
388
+ sample_rays = gen_rays_from_single_image(
389
+ img_wh[1], img_wh[0],
390
+ sample['query_image'],
391
+ sample['query_intrinsic'],
392
+ sample['query_c2w'],
393
+ depth=sample['query_depth'],
394
+ mask=sample['query_mask'] if self.clean_image else None)
395
+ else:
396
+ sample_rays = gen_random_rays_from_single_image(
397
+ img_wh[1], img_wh[0],
398
+ self.N_rays,
399
+ sample['query_image'],
400
+ sample['query_intrinsic'],
401
+ sample['query_c2w'],
402
+ depth=sample['query_depth'],
403
+ mask=sample['query_mask'] if self.clean_image else None,
404
+ dilated_mask=mask_dilated,
405
+ importance_sample=self.importance_sample)
406
+
407
+
408
+ sample['rays'] = sample_rays
409
+
410
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage_temp.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+ self.near_far[1] = 1.8
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ pass
155
+
156
+ def read_mask(self, filename):
157
+ mask_h = cv2.imread(filename, 0)
158
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ mask[mask > 0] = 1 # the masks stored in png are not binary
164
+ mask_h[mask_h > 0] = 1
165
+
166
+ return mask, mask_h
167
+
168
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
+
170
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
+ # print("center", center)
172
+ # print("radius", radius)
173
+ # print("bounds", bounds)
174
+ # import ipdb; ipdb.set_trace()
175
+ radius = radius * factor
176
+ scale_mat = np.diag([radius, radius, radius, 1.0])
177
+ scale_mat[:3, 3] = center.cpu().numpy()
178
+ scale_mat = scale_mat.astype(np.float32)
179
+
180
+ return scale_mat, 1. / radius.cpu().numpy()
181
+
182
+ def __len__(self):
183
+ return 10
184
+
185
+
186
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
+ pass
188
+
189
+
190
+ def __getitem__(self, idx):
191
+ idx = idx * 8
192
+ sample = {}
193
+ origin_idx = idx
194
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
195
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
196
+
197
+
198
+ folder_uid_dict = self.lvis_paths[idx//8]
199
+ idx = idx % 8 # [0, 7]
200
+ folder_id = folder_uid_dict['folder_id']
201
+ uid = folder_uid_dict['uid']
202
+
203
+
204
+ # target view
205
+ c2w = self.c2ws[idx]
206
+ w2c = np.linalg.inv(c2w)
207
+ w2c_ref = w2c
208
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
209
+
210
+ w2cs.append(w2c @ w2c_ref_inv)
211
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
212
+
213
+ img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
214
+
215
+ depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
216
+
217
+
218
+ img = Image.open(img_filename)
219
+
220
+ img = self.transform(img) # (4, h, w)
221
+
222
+ # print("img_pre", img.shape)
223
+ if img.shape[0] == 4:
224
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
225
+ # print("img", img.shape)
226
+ imgs += [img]
227
+
228
+
229
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
230
+ mask_h = depth_h > 0
231
+ # print("valid pixels", np.sum(mask_h))
232
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
233
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
234
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
235
+ depth_h = distance
236
+ # print("depth_h", depth_h.shape)
237
+
238
+ depths_h.append(depth_h)
239
+ masks_h.append(mask_h)
240
+
241
+ intrinsic = self.intrinsic
242
+ intrinsics.append(intrinsic)
243
+
244
+
245
+ near_fars.append(self.near_fars[idx])
246
+ image_perm = 0 # only supervised on reference view
247
+
248
+ mask_dilated = None
249
+
250
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
251
+ src_views = range(8, 8 + 8 * 4)
252
+
253
+ for vid in src_views:
254
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{(vid - 8) // 4}_{vid % 4 + 1}.png')
255
+
256
+ img = Image.open(img_filename)
257
+ img_wh = self.img_wh
258
+
259
+ img = self.transform(img)
260
+ if img.shape[0] == 4:
261
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
262
+
263
+ imgs += [img]
264
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
265
+ depths_h.append(depth_h)
266
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
267
+
268
+ near_fars.append(self.all_near_fars[vid])
269
+ intrinsics.append(self.all_intrinsics[vid])
270
+
271
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
272
+
273
+
274
+ # ! estimate scale_mat
275
+ scale_mat, scale_factor = self.cal_scale_mat(
276
+ img_hw=[img_wh[1], img_wh[0]],
277
+ intrinsics=intrinsics, extrinsics=w2cs,
278
+ near_fars=near_fars, factor=1.1
279
+ )
280
+
281
+
282
+ new_near_fars = []
283
+ new_w2cs = []
284
+ new_c2ws = []
285
+ new_affine_mats = []
286
+ new_depths_h = []
287
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
288
+
289
+ P = intrinsic @ extrinsic @ scale_mat
290
+ P = P[:3, :4]
291
+ # - should use load_K_Rt_from_P() to obtain c2w
292
+ c2w = load_K_Rt_from_P(None, P)[1]
293
+ w2c = np.linalg.inv(c2w)
294
+ new_w2cs.append(w2c)
295
+ new_c2ws.append(c2w)
296
+ affine_mat = np.eye(4)
297
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
298
+ new_affine_mats.append(affine_mat)
299
+
300
+ camera_o = c2w[:3, 3]
301
+ dist = np.sqrt(np.sum(camera_o ** 2))
302
+ near = dist - 1
303
+ far = dist + 1
304
+
305
+ new_near_fars.append([0.95 * near, 1.05 * far])
306
+ new_depths_h.append(depth * scale_factor)
307
+
308
+ # print(new_near_fars)
309
+ imgs = torch.stack(imgs).float()
310
+ depths_h = np.stack(new_depths_h)
311
+ masks_h = np.stack(masks_h)
312
+
313
+ affine_mats = np.stack(new_affine_mats)
314
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
315
+ new_near_fars)
316
+
317
+ if self.split == 'train':
318
+ start_idx = 0
319
+ else:
320
+ start_idx = 1
321
+
322
+
323
+
324
+ target_w2cs = []
325
+ target_intrinsics = []
326
+ new_target_w2cs = []
327
+ for i_idx in range(8):
328
+ target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
329
+ target_intrinsics.append(self.all_intrinsics[i_idx])
330
+
331
+ for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
332
+
333
+ P = intrinsic @ extrinsic @ scale_mat
334
+ P = P[:3, :4]
335
+ # - should use load_K_Rt_from_P() to obtain c2w
336
+ c2w = load_K_Rt_from_P(None, P)[1]
337
+ w2c = np.linalg.inv(c2w)
338
+ new_target_w2cs.append(w2c)
339
+ target_w2cs = np.stack(new_target_w2cs)
340
+
341
+
342
+
343
+ view_ids = [idx] + list(src_views)
344
+ sample['origin_idx'] = origin_idx
345
+ sample['images'] = imgs # (V, 3, H, W)
346
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
347
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
348
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
349
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
350
+ sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
351
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
352
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
353
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
354
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
355
+
356
+ # sample['light_idx'] = torch.tensor(light_idx)
357
+ sample['scan'] = folder_id
358
+
359
+ sample['scale_factor'] = torch.tensor(scale_factor)
360
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
361
+ sample['render_img_idx'] = torch.tensor(image_perm)
362
+ sample['partial_vol_origin'] = self.partial_vol_origin
363
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
364
+
365
+
366
+ # - image to render
367
+ sample['query_image'] = sample['images'][0]
368
+ sample['query_c2w'] = sample['c2ws'][0]
369
+ sample['query_w2c'] = sample['w2cs'][0]
370
+ sample['query_intrinsic'] = sample['intrinsics'][0]
371
+ sample['query_depth'] = sample['depths_h'][0]
372
+ sample['query_mask'] = sample['masks_h'][0]
373
+ sample['query_near_far'] = sample['near_fars'][0]
374
+
375
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
376
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
377
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
378
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
379
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
380
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
381
+ sample['view_ids'] = sample['view_ids'][start_idx:]
382
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
383
+
384
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
385
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
386
+
387
+ # - generate rays
388
+ if ('val' in self.split) or ('test' in self.split):
389
+ sample_rays = gen_rays_from_single_image(
390
+ img_wh[1], img_wh[0],
391
+ sample['query_image'],
392
+ sample['query_intrinsic'],
393
+ sample['query_c2w'],
394
+ depth=sample['query_depth'],
395
+ mask=sample['query_mask'] if self.clean_image else None)
396
+ else:
397
+ sample_rays = gen_random_rays_from_single_image(
398
+ img_wh[1], img_wh[0],
399
+ self.N_rays,
400
+ sample['query_image'],
401
+ sample['query_intrinsic'],
402
+ sample['query_c2w'],
403
+ depth=sample['query_depth'],
404
+ mask=sample['query_mask'] if self.clean_image else None,
405
+ dilated_mask=mask_dilated,
406
+ importance_sample=self.importance_sample)
407
+
408
+
409
+ sample['rays'] = sample_rays
410
+
411
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+
18
+
19
+ def get_ray_directions(H, W, focal, center=None):
20
+ """
21
+ Get ray directions for all pixels in camera coordinate.
22
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
24
+ Inputs:
25
+ H, W, focal: image height, width and focal length
26
+ Outputs:
27
+ directions: (H, W, 3), the direction of the rays in camera coordinate
28
+ """
29
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
+
31
+ i, j = grid.unbind(-1)
32
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
+ # see https://github.com/bmild/nerf/issues/24
34
+ cent = center if center is not None else [W / 2, H / 2]
35
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
+
37
+ return directions
38
+
39
+ def load_K_Rt_from_P(filename, P=None):
40
+ if P is None:
41
+ lines = open(filename).read().splitlines()
42
+ if len(lines) == 4:
43
+ lines = lines[1:]
44
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
+ P = np.asarray(lines).astype(np.float32).squeeze()
46
+
47
+ out = cv2.decomposeProjectionMatrix(P)
48
+ K = out[0]
49
+ R = out[1]
50
+ t = out[2]
51
+
52
+ K = K / K[2, 2]
53
+ intrinsics = np.eye(4)
54
+ intrinsics[:3, :3] = K
55
+
56
+ pose = np.eye(4, dtype=np.float32)
57
+ pose[:3, :3] = R.transpose() # ? why need transpose here
58
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
+
60
+ return intrinsics, pose # ! return cam2world matrix here
61
+
62
+
63
+ # ! load one ref-image with multiple src-images in camera coordinate system
64
+ class BlenderPerView(Dataset):
65
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
+ split_filepath=None, pair_filepath=None,
67
+ N_rays=512,
68
+ vol_dims=[128, 128, 128], batch_size=1,
69
+ clean_image=False, importance_sample=False, test_ref_views=[],
70
+ specific_dataset_name = 'GSO'
71
+ ):
72
+
73
+ # print("root_dir: ", root_dir)
74
+ self.root_dir = root_dir
75
+ self.split = split
76
+ # self.specific_dataset_name = 'Realfusion'
77
+ # self.specific_dataset_name = 'GSO'
78
+ # self.specific_dataset_name = 'Objaverse'
79
+ # self.specific_dataset_name = 'Zero123'
80
+
81
+ self.specific_dataset_name = specific_dataset_name
82
+ self.n_views = n_views
83
+ self.N_rays = N_rays
84
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
+
86
+ self.clean_image = clean_image
87
+ self.importance_sample = importance_sample
88
+ self.test_ref_views = test_ref_views # used for testing
89
+ self.scale_factor = 1.0
90
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
+ assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
+ # find all subfolders
93
+ main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
+ self.shape_list = [""] # os.listdir(main_folder) # MODIFIED
95
+ self.shape_list.sort()
96
+
97
+ # self.shape_list = ['barrel_render']
98
+ # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
99
+
100
+
101
+ self.lvis_paths = []
102
+ for shape_name in self.shape_list:
103
+ self.lvis_paths.append(os.path.join(main_folder, shape_name))
104
+
105
+ # print("lvis_paths: ", self.lvis_paths)
106
+
107
+ if img_wh is not None:
108
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
109
+ 'img_wh must both be multiples of 32!'
110
+
111
+
112
+ # * bounding box for rendering
113
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
114
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
115
+
116
+ # - used for cost volume regularization
117
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
118
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
119
+
120
+
121
+ def define_transforms(self):
122
+ self.transform = T.Compose([T.ToTensor()])
123
+
124
+
125
+
126
+ def load_cam_info(self):
127
+ for vid, img_id in enumerate(self.img_ids):
128
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
129
+ self.all_intrinsics.append(intrinsic)
130
+ self.all_extrinsics.append(extrinsic)
131
+ self.all_near_fars.append(near_far)
132
+
133
+ def read_depth(self, filename):
134
+ pass
135
+
136
+ def read_mask(self, filename):
137
+ mask_h = cv2.imread(filename, 0)
138
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
139
+ interpolation=cv2.INTER_NEAREST)
140
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
141
+ interpolation=cv2.INTER_NEAREST)
142
+
143
+ mask[mask > 0] = 1 # the masks stored in png are not binary
144
+ mask_h[mask_h > 0] = 1
145
+
146
+ return mask, mask_h
147
+
148
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
149
+
150
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
151
+
152
+ radius = radius * factor
153
+ scale_mat = np.diag([radius, radius, radius, 1.0])
154
+ scale_mat[:3, 3] = center.cpu().numpy()
155
+ scale_mat = scale_mat.astype(np.float32)
156
+
157
+ return scale_mat, 1. / radius.cpu().numpy()
158
+
159
+ def __len__(self):
160
+ # return 8*len(self.lvis_paths)
161
+ return len(self.lvis_paths)
162
+
163
+
164
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
165
+ pass
166
+
167
+
168
+ def __getitem__(self, idx):
169
+ sample = {}
170
+ idx = idx * 8 # to be deleted
171
+ origin_idx = idx
172
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
173
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
174
+
175
+ folder_path = self.lvis_paths[idx//8]
176
+ idx = idx % 8 # [0, 7]
177
+
178
+ # last subdir name
179
+ shape_name = os.path.split(folder_path)[-1]
180
+
181
+ pose_json_path = os.path.join(folder_path, "pose.json")
182
+ with open(pose_json_path, 'r') as f:
183
+ meta = json.load(f)
184
+
185
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
186
+ self.img_wh = (256, 256)
187
+ self.input_poses = np.array(list(meta["c2ws"].values()))
188
+ intrinsic = np.eye(4)
189
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
190
+ self.intrinsic = intrinsic
191
+ self.near_far = np.array(meta["near_far"])
192
+ self.near_far[1] = 1.8
193
+ self.define_transforms()
194
+ self.blender2opencv = np.array(
195
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
196
+ )
197
+
198
+ self.c2ws = []
199
+ self.w2cs = []
200
+ self.near_fars = []
201
+ # self.root_dir = root_dir
202
+ for image_dix, img_id in enumerate(self.img_ids):
203
+ pose = self.input_poses[image_dix]
204
+ c2w = pose @ self.blender2opencv
205
+ self.c2ws.append(c2w)
206
+ self.w2cs.append(np.linalg.inv(c2w))
207
+ self.near_fars.append(self.near_far)
208
+ self.c2ws = np.stack(self.c2ws, axis=0)
209
+ self.w2cs = np.stack(self.w2cs, axis=0)
210
+
211
+
212
+ self.all_intrinsics = [] # the cam info of the whole scene
213
+ self.all_extrinsics = []
214
+ self.all_near_fars = []
215
+ self.load_cam_info()
216
+
217
+
218
+ # target view
219
+ c2w = self.c2ws[idx]
220
+ w2c = np.linalg.inv(c2w)
221
+ w2c_ref = w2c
222
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
223
+
224
+ w2cs.append(w2c @ w2c_ref_inv)
225
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
226
+
227
+ # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
228
+ img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
229
+
230
+ img = Image.open(img_filename)
231
+ img = self.transform(img) # (4, h, w)
232
+
233
+
234
+ if img.shape[0] == 4:
235
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
236
+ imgs += [img]
237
+
238
+
239
+ depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
240
+ depth_h = depth_h.fill_(-1.0)
241
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
242
+
243
+
244
+ depths_h.append(depth_h)
245
+ masks_h.append(mask_h)
246
+
247
+ intrinsic = self.intrinsic
248
+ intrinsics.append(intrinsic)
249
+
250
+
251
+ near_fars.append(self.near_fars[idx])
252
+ image_perm = 0 # only supervised on reference view
253
+
254
+ mask_dilated = None
255
+
256
+
257
+ src_views = range(8, 8 + 8 * 4)
258
+
259
+ for vid in src_views:
260
+
261
+ # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
262
+ img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
263
+ img = Image.open(img_filename)
264
+ img_wh = self.img_wh
265
+
266
+ img = self.transform(img)
267
+ if img.shape[0] == 4:
268
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
269
+
270
+ imgs += [img]
271
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
272
+ depths_h.append(depth_h)
273
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
274
+
275
+ near_fars.append(self.all_near_fars[vid])
276
+ intrinsics.append(self.all_intrinsics[vid])
277
+
278
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
279
+
280
+
281
+ # ! estimate scale_mat
282
+ scale_mat, scale_factor = self.cal_scale_mat(
283
+ img_hw=[img_wh[1], img_wh[0]],
284
+ intrinsics=intrinsics, extrinsics=w2cs,
285
+ near_fars=near_fars, factor=1.1
286
+ )
287
+
288
+
289
+ new_near_fars = []
290
+ new_w2cs = []
291
+ new_c2ws = []
292
+ new_affine_mats = []
293
+ new_depths_h = []
294
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
295
+
296
+ P = intrinsic @ extrinsic @ scale_mat
297
+ P = P[:3, :4]
298
+ # - should use load_K_Rt_from_P() to obtain c2w
299
+ c2w = load_K_Rt_from_P(None, P)[1]
300
+ w2c = np.linalg.inv(c2w)
301
+ new_w2cs.append(w2c)
302
+ new_c2ws.append(c2w)
303
+ affine_mat = np.eye(4)
304
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
305
+ new_affine_mats.append(affine_mat)
306
+
307
+ camera_o = c2w[:3, 3]
308
+ dist = np.sqrt(np.sum(camera_o ** 2))
309
+ near = dist - 1
310
+ far = dist + 1
311
+
312
+ new_near_fars.append([0.95 * near, 1.05 * far])
313
+ new_depths_h.append(depth * scale_factor)
314
+
315
+ # print(new_near_fars)
316
+ imgs = torch.stack(imgs).float()
317
+ depths_h = np.stack(new_depths_h)
318
+ masks_h = np.stack(masks_h)
319
+
320
+ affine_mats = np.stack(new_affine_mats)
321
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
322
+ new_near_fars)
323
+
324
+ if self.split == 'train':
325
+ start_idx = 0
326
+ else:
327
+ start_idx = 1
328
+
329
+
330
+
331
+ target_w2cs = []
332
+ target_intrinsics = []
333
+ new_target_w2cs = []
334
+ for i_idx in range(8):
335
+ target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
336
+ target_intrinsics.append(self.all_intrinsics[i_idx])
337
+
338
+ for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
339
+
340
+ P = intrinsic @ extrinsic @ scale_mat
341
+ P = P[:3, :4]
342
+ # - should use load_K_Rt_from_P() to obtain c2w
343
+ c2w = load_K_Rt_from_P(None, P)[1]
344
+ w2c = np.linalg.inv(c2w)
345
+ new_target_w2cs.append(w2c)
346
+ target_w2cs = np.stack(new_target_w2cs)
347
+
348
+
349
+
350
+ view_ids = [idx] + list(src_views)
351
+ sample['origin_idx'] = origin_idx
352
+ sample['images'] = imgs # (V, 3, H, W)
353
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
354
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
355
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
356
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
357
+ sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
358
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
359
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
360
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
361
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
362
+
363
+ # sample['light_idx'] = torch.tensor(light_idx)
364
+ sample['scan'] = shape_name
365
+
366
+ sample['scale_factor'] = torch.tensor(scale_factor)
367
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
368
+ sample['render_img_idx'] = torch.tensor(image_perm)
369
+ sample['partial_vol_origin'] = self.partial_vol_origin
370
+ sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
371
+ # print("meta: ", sample['meta'])
372
+
373
+ # - image to render
374
+ sample['query_image'] = sample['images'][0]
375
+ sample['query_c2w'] = sample['c2ws'][0]
376
+ sample['query_w2c'] = sample['w2cs'][0]
377
+ sample['query_intrinsic'] = sample['intrinsics'][0]
378
+ sample['query_depth'] = sample['depths_h'][0]
379
+ sample['query_mask'] = sample['masks_h'][0]
380
+ sample['query_near_far'] = sample['near_fars'][0]
381
+
382
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
383
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
384
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
385
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
386
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
387
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
388
+ sample['view_ids'] = sample['view_ids'][start_idx:]
389
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
390
+
391
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
392
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
393
+
394
+ # - generate rays
395
+ if ('val' in self.split) or ('test' in self.split):
396
+ sample_rays = gen_rays_from_single_image(
397
+ img_wh[1], img_wh[0],
398
+ sample['query_image'],
399
+ sample['query_intrinsic'],
400
+ sample['query_c2w'],
401
+ depth=sample['query_depth'],
402
+ mask=sample['query_mask'] if self.clean_image else None)
403
+ else:
404
+ sample_rays = gen_random_rays_from_single_image(
405
+ img_wh[1], img_wh[0],
406
+ self.N_rays,
407
+ sample['query_image'],
408
+ sample['query_intrinsic'],
409
+ sample['query_c2w'],
410
+ depth=sample['query_depth'],
411
+ mask=sample['query_mask'] if self.clean_image else None,
412
+ dilated_mask=mask_dilated,
413
+ importance_sample=self.importance_sample)
414
+
415
+
416
+ sample['rays'] = sample_rays
417
+
418
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data3_1.py ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+ # self.specific_dataset_name = 'Realfusion'
73
+ self.specific_dataset_name = 'Objaverse'
74
+ self.n_views = n_views
75
+ self.N_rays = N_rays
76
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
77
+
78
+ self.clean_image = clean_image
79
+ self.importance_sample = importance_sample
80
+ self.test_ref_views = test_ref_views # used for testing
81
+ self.scale_factor = 1.0
82
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
83
+ assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
84
+ # find all subfolders
85
+ main_folder = os.path.join(root_dir, self.specific_dataset_name)
86
+ self.shape_list = os.listdir(main_folder)
87
+ self.shape_list.sort()
88
+
89
+ # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
90
+
91
+
92
+ self.lvis_paths = []
93
+ for shape_name in self.shape_list:
94
+ self.lvis_paths.append(os.path.join(main_folder, shape_name))
95
+
96
+ # print("lvis_paths: ", self.lvis_paths)
97
+
98
+ if img_wh is not None:
99
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
100
+ 'img_wh must both be multiples of 32!'
101
+
102
+
103
+ # * bounding box for rendering
104
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
105
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
106
+
107
+ # - used for cost volume regularization
108
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
109
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
110
+
111
+
112
+ def define_transforms(self):
113
+ self.transform = T.Compose([T.ToTensor()])
114
+
115
+
116
+
117
+ def load_cam_info(self):
118
+ for vid, img_id in enumerate(self.img_ids):
119
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
120
+ self.all_intrinsics.append(intrinsic)
121
+ self.all_extrinsics.append(extrinsic)
122
+ self.all_near_fars.append(near_far)
123
+
124
+ def read_depth(self, filename):
125
+ pass
126
+
127
+ def read_mask(self, filename):
128
+ mask_h = cv2.imread(filename, 0)
129
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
130
+ interpolation=cv2.INTER_NEAREST)
131
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
132
+ interpolation=cv2.INTER_NEAREST)
133
+
134
+ mask[mask > 0] = 1 # the masks stored in png are not binary
135
+ mask_h[mask_h > 0] = 1
136
+
137
+ return mask, mask_h
138
+
139
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
140
+
141
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
142
+
143
+ radius = radius * factor
144
+ scale_mat = np.diag([radius, radius, radius, 1.0])
145
+ scale_mat[:3, 3] = center.cpu().numpy()
146
+ scale_mat = scale_mat.astype(np.float32)
147
+
148
+ return scale_mat, 1. / radius.cpu().numpy()
149
+
150
+ def __len__(self):
151
+ # return 8*len(self.lvis_paths)
152
+ return len(self.lvis_paths)
153
+
154
+
155
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
156
+ pass
157
+
158
+
159
+ def __getitem__(self, idx):
160
+ sample = {}
161
+ idx = idx * 8 # to be deleted
162
+ origin_idx = idx
163
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
164
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
165
+
166
+
167
+ folder_path = self.lvis_paths[idx//8]
168
+ idx = idx % 8 # [0, 7]
169
+
170
+ # last subdir name
171
+ shape_name = os.path.split(folder_path)[-1]
172
+
173
+
174
+ pose_json_path = os.path.join(folder_path, "pose.json")
175
+ with open(pose_json_path, 'r') as f:
176
+ meta = json.load(f)
177
+
178
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
179
+ self.img_wh = (256, 256)
180
+ self.input_poses = np.array(list(meta["c2ws"].values()))
181
+ intrinsic = np.eye(4)
182
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
183
+ self.intrinsic = intrinsic
184
+ self.near_far = np.array(meta["near_far"])
185
+ self.near_far[1] = 1.8
186
+ self.define_transforms()
187
+ self.blender2opencv = np.array(
188
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
189
+ )
190
+
191
+
192
+ self.c2ws = []
193
+ self.w2cs = []
194
+ self.near_fars = []
195
+ # self.root_dir = root_dir
196
+ for image_dix, img_id in enumerate(self.img_ids):
197
+ pose = self.input_poses[image_dix]
198
+ c2w = pose @ self.blender2opencv
199
+ self.c2ws.append(c2w)
200
+ self.w2cs.append(np.linalg.inv(c2w))
201
+ self.near_fars.append(self.near_far)
202
+ self.c2ws = np.stack(self.c2ws, axis=0)
203
+ self.w2cs = np.stack(self.w2cs, axis=0)
204
+
205
+
206
+ self.all_intrinsics = [] # the cam info of the whole scene
207
+ self.all_extrinsics = []
208
+ self.all_near_fars = []
209
+ self.load_cam_info()
210
+
211
+
212
+ # target view
213
+ c2w = self.c2ws[idx]
214
+ w2c = np.linalg.inv(c2w)
215
+ w2c_ref = w2c
216
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
217
+
218
+ w2cs.append(w2c @ w2c_ref_inv)
219
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
220
+
221
+ img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
222
+ # print(self.img_ids)
223
+ img = Image.open(img_filename)
224
+ img = self.transform(img) # (4, h, w)
225
+
226
+
227
+ if img.shape[0] == 4:
228
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
229
+ imgs += [img]
230
+
231
+
232
+ depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
233
+ depth_h = depth_h.fill_(-1.0)
234
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
235
+
236
+
237
+ depths_h.append(depth_h)
238
+ masks_h.append(mask_h)
239
+
240
+ intrinsic = self.intrinsic
241
+ intrinsics.append(intrinsic)
242
+
243
+
244
+ near_fars.append(self.near_fars[idx])
245
+ image_perm = 0 # only supervised on reference view
246
+
247
+ mask_dilated = None
248
+
249
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
250
+ src_views = range(8, 8 + 8 * 4)
251
+
252
+ for vid in src_views:
253
+ if vid % 4 == 0:
254
+ vid = (vid - 8) // 4
255
+ img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[vid]}')
256
+ else:
257
+ img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
258
+
259
+ img = Image.open(img_filename)
260
+ img_wh = self.img_wh
261
+
262
+ img = self.transform(img)
263
+ if img.shape[0] == 4:
264
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
265
+
266
+ imgs += [img]
267
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
268
+ depths_h.append(depth_h)
269
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
270
+
271
+ near_fars.append(self.all_near_fars[vid])
272
+ intrinsics.append(self.all_intrinsics[vid])
273
+
274
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
275
+
276
+
277
+ # ! estimate scale_mat
278
+ scale_mat, scale_factor = self.cal_scale_mat(
279
+ img_hw=[img_wh[1], img_wh[0]],
280
+ intrinsics=intrinsics, extrinsics=w2cs,
281
+ near_fars=near_fars, factor=1.1
282
+ )
283
+
284
+
285
+ new_near_fars = []
286
+ new_w2cs = []
287
+ new_c2ws = []
288
+ new_affine_mats = []
289
+ new_depths_h = []
290
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
291
+
292
+ P = intrinsic @ extrinsic @ scale_mat
293
+ P = P[:3, :4]
294
+ # - should use load_K_Rt_from_P() to obtain c2w
295
+ c2w = load_K_Rt_from_P(None, P)[1]
296
+ w2c = np.linalg.inv(c2w)
297
+ new_w2cs.append(w2c)
298
+ new_c2ws.append(c2w)
299
+ affine_mat = np.eye(4)
300
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
301
+ new_affine_mats.append(affine_mat)
302
+
303
+ camera_o = c2w[:3, 3]
304
+ dist = np.sqrt(np.sum(camera_o ** 2))
305
+ near = dist - 1
306
+ far = dist + 1
307
+
308
+ new_near_fars.append([0.95 * near, 1.05 * far])
309
+ new_depths_h.append(depth * scale_factor)
310
+
311
+ # print(new_near_fars)
312
+ imgs = torch.stack(imgs).float()
313
+ depths_h = np.stack(new_depths_h)
314
+ masks_h = np.stack(masks_h)
315
+
316
+ affine_mats = np.stack(new_affine_mats)
317
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
318
+ new_near_fars)
319
+
320
+ if self.split == 'train':
321
+ start_idx = 0
322
+ else:
323
+ start_idx = 1
324
+
325
+
326
+
327
+ target_w2cs = []
328
+ target_intrinsics = []
329
+ new_target_w2cs = []
330
+ for i_idx in range(8):
331
+ target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
332
+ target_intrinsics.append(self.all_intrinsics[i_idx])
333
+
334
+ for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
335
+
336
+ P = intrinsic @ extrinsic @ scale_mat
337
+ P = P[:3, :4]
338
+ # - should use load_K_Rt_from_P() to obtain c2w
339
+ c2w = load_K_Rt_from_P(None, P)[1]
340
+ w2c = np.linalg.inv(c2w)
341
+ new_target_w2cs.append(w2c)
342
+ target_w2cs = np.stack(new_target_w2cs)
343
+
344
+
345
+
346
+ view_ids = [idx] + list(src_views)
347
+ sample['origin_idx'] = origin_idx
348
+ sample['images'] = imgs # (V, 3, H, W)
349
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
350
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
351
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
352
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
353
+ sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
354
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
355
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
356
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
357
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
358
+
359
+ # sample['light_idx'] = torch.tensor(light_idx)
360
+ sample['scan'] = shape_name
361
+
362
+ sample['scale_factor'] = torch.tensor(scale_factor)
363
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
364
+ sample['render_img_idx'] = torch.tensor(image_perm)
365
+ sample['partial_vol_origin'] = self.partial_vol_origin
366
+ sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
367
+ # print("meta: ", sample['meta'])
368
+
369
+ # - image to render
370
+ sample['query_image'] = sample['images'][0]
371
+ sample['query_c2w'] = sample['c2ws'][0]
372
+ sample['query_w2c'] = sample['w2cs'][0]
373
+ sample['query_intrinsic'] = sample['intrinsics'][0]
374
+ sample['query_depth'] = sample['depths_h'][0]
375
+ sample['query_mask'] = sample['masks_h'][0]
376
+ sample['query_near_far'] = sample['near_fars'][0]
377
+
378
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
379
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
380
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
381
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
382
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
383
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
384
+ sample['view_ids'] = sample['view_ids'][start_idx:]
385
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
386
+
387
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
388
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
389
+
390
+ # - generate rays
391
+ if ('val' in self.split) or ('test' in self.split):
392
+ sample_rays = gen_rays_from_single_image(
393
+ img_wh[1], img_wh[0],
394
+ sample['query_image'],
395
+ sample['query_intrinsic'],
396
+ sample['query_c2w'],
397
+ depth=sample['query_depth'],
398
+ mask=sample['query_mask'] if self.clean_image else None)
399
+ else:
400
+ sample_rays = gen_random_rays_from_single_image(
401
+ img_wh[1], img_wh[0],
402
+ self.N_rays,
403
+ sample['query_image'],
404
+ sample['query_intrinsic'],
405
+ sample['query_c2w'],
406
+ depth=sample['query_depth'],
407
+ mask=sample['query_mask'] if self.clean_image else None,
408
+ dilated_mask=mask_dilated,
409
+ importance_sample=self.importance_sample)
410
+
411
+
412
+ sample['rays'] = sample_rays
413
+
414
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_32_wide.py ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+
18
+ def calc_pose(phis, thetas, size, radius = 1.2):
19
+ import torch
20
+ def normalize(vectors):
21
+ return vectors / (torch.norm(vectors, dim=-1, keepdim=True) + 1e-10)
22
+ # device = torch.device('cuda')
23
+ thetas = torch.FloatTensor(thetas)
24
+ phis = torch.FloatTensor(phis)
25
+
26
+ centers = torch.stack([
27
+ radius * torch.sin(thetas) * torch.sin(phis),
28
+ -radius * torch.cos(thetas) * torch.sin(phis),
29
+ radius * torch.cos(phis),
30
+ ], dim=-1) # [B, 3]
31
+
32
+ # lookat
33
+ forward_vector = normalize(centers).squeeze(0)
34
+ up_vector = torch.FloatTensor([0, 0, 1]).unsqueeze(0).repeat(size, 1)
35
+ right_vector = normalize(torch.cross(up_vector, forward_vector, dim=-1))
36
+ if right_vector.pow(2).sum() < 0.01:
37
+ right_vector = torch.FloatTensor([0, 1, 0]).unsqueeze(0).repeat(size, 1)
38
+ up_vector = normalize(torch.cross(forward_vector, right_vector, dim=-1))
39
+
40
+ poses = torch.eye(4, dtype=torch.float)[:3].unsqueeze(0).repeat(size, 1, 1)
41
+ poses[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), dim=-1)
42
+ poses[:, :3, 3] = centers
43
+ return poses
44
+
45
+ def get_ray_directions(H, W, focal, center=None):
46
+ """
47
+ Get ray directions for all pixels in camera coordinate.
48
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
49
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
50
+ Inputs:
51
+ H, W, focal: image height, width and focal length
52
+ Outputs:
53
+ directions: (H, W, 3), the direction of the rays in camera coordinate
54
+ """
55
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
56
+
57
+ i, j = grid.unbind(-1)
58
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
59
+ # see https://github.com/bmild/nerf/issues/24
60
+ cent = center if center is not None else [W / 2, H / 2]
61
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
62
+
63
+ return directions
64
+
65
+ def load_K_Rt_from_P(filename, P=None):
66
+ if P is None:
67
+ lines = open(filename).read().splitlines()
68
+ if len(lines) == 4:
69
+ lines = lines[1:]
70
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
71
+ P = np.asarray(lines).astype(np.float32).squeeze()
72
+
73
+ out = cv2.decomposeProjectionMatrix(P)
74
+ K = out[0]
75
+ R = out[1]
76
+ t = out[2]
77
+
78
+ K = K / K[2, 2]
79
+ intrinsics = np.eye(4)
80
+ intrinsics[:3, :3] = K
81
+
82
+ pose = np.eye(4, dtype=np.float32)
83
+ pose[:3, :3] = R.transpose() # ? why need transpose here
84
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
85
+
86
+ return intrinsics, pose # ! return cam2world matrix here
87
+
88
+
89
+ # ! load one ref-image with multiple src-images in camera coordinate system
90
+ class BlenderPerView(Dataset):
91
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
92
+ split_filepath=None, pair_filepath=None,
93
+ N_rays=512,
94
+ vol_dims=[128, 128, 128], batch_size=1,
95
+ clean_image=False, importance_sample=False, test_ref_views=[],
96
+ specific_dataset_name = 'GSO'
97
+ ):
98
+
99
+ # print("root_dir: ", root_dir)
100
+ self.root_dir = root_dir
101
+ self.split = split
102
+ # self.specific_dataset_name = 'Realfusion'
103
+ # self.specific_dataset_name = 'GSO'
104
+ # self.specific_dataset_name = 'Objaverse'
105
+ # self.specific_dataset_name = 'Zero123'
106
+
107
+ self.specific_dataset_name = specific_dataset_name
108
+ self.n_views = n_views
109
+ self.N_rays = N_rays
110
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
111
+
112
+ self.clean_image = clean_image
113
+ self.importance_sample = importance_sample
114
+ self.test_ref_views = test_ref_views # used for testing
115
+ self.scale_factor = 1.0
116
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
117
+ assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
118
+ # find all subfolders
119
+ main_folder = os.path.join(root_dir)
120
+ self.shape_list = os.listdir(main_folder)
121
+ self.shape_list.sort()
122
+
123
+ # self.shape_list = ['barrel_render']
124
+ # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
125
+
126
+
127
+ self.lvis_paths = []
128
+ for shape_name in self.shape_list:
129
+ self.lvis_paths.append(os.path.join(main_folder, shape_name))
130
+
131
+ # print("lvis_paths: ", self.lvis_paths)
132
+
133
+ if img_wh is not None:
134
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
135
+ 'img_wh must both be multiples of 32!'
136
+
137
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
138
+
139
+ with open(pose_json_path, 'r') as f:
140
+ meta = json.load(f)
141
+ intrinsic = np.eye(4)
142
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
143
+ self.intrinsic = intrinsic
144
+ self.near_far = np.array(meta["near_far"])
145
+ self.near_far[1] = 1.8
146
+
147
+ # * bounding box for rendering
148
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
149
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
150
+
151
+ # - used for cost volume regularization
152
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
153
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
154
+
155
+
156
+ def define_transforms(self):
157
+ self.transform = T.Compose([T.ToTensor()])
158
+
159
+
160
+
161
+ def load_cam_info(self):
162
+ for vid in range(self.input_poses.shape[0]):
163
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
164
+ self.all_intrinsics.append(intrinsic)
165
+ self.all_extrinsics.append(extrinsic)
166
+ self.all_near_fars.append(near_far)
167
+
168
+ def read_depth(self, filename):
169
+ pass
170
+
171
+ def read_mask(self, filename):
172
+ mask_h = cv2.imread(filename, 0)
173
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
174
+ interpolation=cv2.INTER_NEAREST)
175
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
176
+ interpolation=cv2.INTER_NEAREST)
177
+
178
+ mask[mask > 0] = 1 # the masks stored in png are not binary
179
+ mask_h[mask_h > 0] = 1
180
+
181
+ return mask, mask_h
182
+
183
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
184
+
185
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
186
+
187
+ radius = radius * factor
188
+ scale_mat = np.diag([radius, radius, radius, 1.0])
189
+ scale_mat[:3, 3] = center.cpu().numpy()
190
+ scale_mat = scale_mat.astype(np.float32)
191
+
192
+ return scale_mat, 1. / radius.cpu().numpy()
193
+
194
+ def __len__(self):
195
+ # return 8*len(self.lvis_paths)
196
+ return len(self.lvis_paths)
197
+
198
+
199
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
200
+ pass
201
+
202
+
203
+ def __getitem__(self, idx):
204
+ sample = {}
205
+ idx = idx * 8 # to be deleted
206
+ origin_idx = idx
207
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
208
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
209
+
210
+ folder_path = self.lvis_paths[idx//8]
211
+ idx = idx % 8 # [0, 7]
212
+
213
+ # last subdir name
214
+ shape_name = os.path.split(folder_path)[-1]
215
+
216
+ # pose_json_path = os.path.join(folder_path, "pose.json")
217
+ # with open(pose_json_path, 'r') as f:
218
+ # meta = json.load(f)
219
+
220
+ # self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
221
+ # self.img_wh = (256, 256)
222
+ # self.input_poses = np.array(list(meta["c2ws"].values()))
223
+ # intrinsic = np.eye(4)
224
+ # intrinsic[:3, :3] = np.array(meta["intrinsics"])
225
+ # self.intrinsic = intrinsic
226
+ # self.near_far = np.array(meta["near_far"])
227
+ # self.near_far[1] = 1.8
228
+ # self.define_transforms()
229
+ # self.blender2opencv = np.array(
230
+ # [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
231
+ # )
232
+
233
+ pose_file = os.path.join(folder_path, '32_random', 'views.npz')
234
+ pose_array = np.load(pose_file)
235
+ pose = calc_pose(pose_array['elevations'], pose_array['azimuths'], 32) # [32, 3, 4] c2ws
236
+
237
+ self.img_wh = (256, 256)
238
+ self.input_poses = np.array(pose)
239
+ self.input_poses = np.concatenate([self.input_poses, np.tile(np.array([0, 0, 0, 1], dtype=np.float32)[None, None, :], [self.input_poses.shape[0], 1, 1])], axis=1)
240
+ self.define_transforms()
241
+ self.blender2opencv = np.array(
242
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
243
+ )
244
+
245
+ self.c2ws = []
246
+ self.w2cs = []
247
+ self.near_fars = []
248
+ # self.root_dir = root_dir
249
+ for image_dix in range(pose.shape[0]):
250
+ pose = self.input_poses[image_dix]
251
+ c2w = pose @ self.blender2opencv
252
+ self.c2ws.append(c2w)
253
+ self.w2cs.append(np.linalg.inv(c2w))
254
+ self.near_fars.append(self.near_far)
255
+ self.c2ws = np.stack(self.c2ws, axis=0)
256
+ self.w2cs = np.stack(self.w2cs, axis=0)
257
+
258
+
259
+ self.all_intrinsics = [] # the cam info of the whole scene
260
+ self.all_extrinsics = []
261
+ self.all_near_fars = []
262
+ self.load_cam_info()
263
+
264
+
265
+ # target view
266
+ c2w = self.c2ws[idx]
267
+ w2c = np.linalg.inv(c2w)
268
+ w2c_ref = w2c
269
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
270
+
271
+ w2cs.append(w2c @ w2c_ref_inv)
272
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
273
+
274
+ # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
275
+ img_filename = os.path.join(folder_path, '32_random', f'{idx}.png')
276
+
277
+ img = Image.open(img_filename)
278
+ img = self.transform(img) # (4, h, w)
279
+
280
+
281
+ if img.shape[0] == 4:
282
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
283
+ imgs += [img]
284
+
285
+
286
+ depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
287
+ depth_h = depth_h.fill_(-1.0)
288
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
289
+
290
+
291
+ depths_h.append(depth_h)
292
+ masks_h.append(mask_h)
293
+
294
+ intrinsic = self.intrinsic
295
+ intrinsics.append(intrinsic)
296
+
297
+
298
+ near_fars.append(self.near_fars[idx])
299
+ image_perm = 0 # only supervised on reference view
300
+
301
+ mask_dilated = None
302
+
303
+
304
+ src_views = range(0, 8 * 4)
305
+
306
+ for vid in src_views:
307
+
308
+ # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
309
+ img_filename = os.path.join(folder_path, '32_random', f'{vid}.png')
310
+ img = Image.open(img_filename)
311
+ img_wh = self.img_wh
312
+
313
+ img = self.transform(img)
314
+ if img.shape[0] == 4:
315
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
316
+
317
+ imgs += [img]
318
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
319
+ depths_h.append(depth_h)
320
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
321
+
322
+ near_fars.append(self.all_near_fars[vid])
323
+ intrinsics.append(self.all_intrinsics[vid])
324
+
325
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
326
+
327
+
328
+ # ! estimate scale_mat
329
+ scale_mat, scale_factor = self.cal_scale_mat(
330
+ img_hw=[img_wh[1], img_wh[0]],
331
+ intrinsics=intrinsics, extrinsics=w2cs,
332
+ near_fars=near_fars, factor=1.1
333
+ )
334
+
335
+
336
+ new_near_fars = []
337
+ new_w2cs = []
338
+ new_c2ws = []
339
+ new_affine_mats = []
340
+ new_depths_h = []
341
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
342
+
343
+ P = intrinsic @ extrinsic @ scale_mat
344
+ P = P[:3, :4]
345
+ # - should use load_K_Rt_from_P() to obtain c2w
346
+ c2w = load_K_Rt_from_P(None, P)[1]
347
+ w2c = np.linalg.inv(c2w)
348
+ new_w2cs.append(w2c)
349
+ new_c2ws.append(c2w)
350
+ affine_mat = np.eye(4)
351
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
352
+ new_affine_mats.append(affine_mat)
353
+
354
+ camera_o = c2w[:3, 3]
355
+ dist = np.sqrt(np.sum(camera_o ** 2))
356
+ near = dist - 1
357
+ far = dist + 1
358
+
359
+ new_near_fars.append([0.95 * near, 1.05 * far])
360
+ new_depths_h.append(depth * scale_factor)
361
+
362
+ # print(new_near_fars)
363
+ imgs = torch.stack(imgs).float()
364
+ depths_h = np.stack(new_depths_h)
365
+ masks_h = np.stack(masks_h)
366
+
367
+ affine_mats = np.stack(new_affine_mats)
368
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
369
+ new_near_fars)
370
+
371
+ if self.split == 'train':
372
+ start_idx = 0
373
+ else:
374
+ start_idx = 1
375
+
376
+
377
+
378
+ target_w2cs = []
379
+ target_intrinsics = []
380
+ new_target_w2cs = []
381
+ for i_idx in range(8):
382
+ target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
383
+ target_intrinsics.append(self.all_intrinsics[i_idx])
384
+
385
+ for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
386
+
387
+ P = intrinsic @ extrinsic @ scale_mat
388
+ P = P[:3, :4]
389
+ # - should use load_K_Rt_from_P() to obtain c2w
390
+ c2w = load_K_Rt_from_P(None, P)[1]
391
+ w2c = np.linalg.inv(c2w)
392
+ new_target_w2cs.append(w2c)
393
+ target_w2cs = np.stack(new_target_w2cs)
394
+
395
+
396
+
397
+ view_ids = [idx] + list(src_views)
398
+ sample['origin_idx'] = origin_idx
399
+ sample['images'] = imgs # (V, 3, H, W)
400
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
401
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
402
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
403
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
404
+ sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
405
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
406
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
407
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
408
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
409
+
410
+ # sample['light_idx'] = torch.tensor(light_idx)
411
+ sample['scan'] = shape_name
412
+
413
+ sample['scale_factor'] = torch.tensor(scale_factor)
414
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
415
+ sample['render_img_idx'] = torch.tensor(image_perm)
416
+ sample['partial_vol_origin'] = self.partial_vol_origin
417
+ sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
418
+ # print("meta: ", sample['meta'])
419
+
420
+ # - image to render
421
+ sample['query_image'] = sample['images'][0]
422
+ sample['query_c2w'] = sample['c2ws'][0]
423
+ sample['query_w2c'] = sample['w2cs'][0]
424
+ sample['query_intrinsic'] = sample['intrinsics'][0]
425
+ sample['query_depth'] = sample['depths_h'][0]
426
+ sample['query_mask'] = sample['masks_h'][0]
427
+ sample['query_near_far'] = sample['near_fars'][0]
428
+
429
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
430
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
431
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
432
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
433
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
434
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
435
+ sample['view_ids'] = sample['view_ids'][start_idx:]
436
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
437
+
438
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
439
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
440
+
441
+ # - generate rays
442
+ if ('val' in self.split) or ('test' in self.split):
443
+ sample_rays = gen_rays_from_single_image(
444
+ img_wh[1], img_wh[0],
445
+ sample['query_image'],
446
+ sample['query_intrinsic'],
447
+ sample['query_c2w'],
448
+ depth=sample['query_depth'],
449
+ mask=sample['query_mask'] if self.clean_image else None)
450
+ else:
451
+ sample_rays = gen_random_rays_from_single_image(
452
+ img_wh[1], img_wh[0],
453
+ self.N_rays,
454
+ sample['query_image'],
455
+ sample['query_intrinsic'],
456
+ sample['query_c2w'],
457
+ depth=sample['query_depth'],
458
+ mask=sample['query_mask'] if self.clean_image else None,
459
+ dilated_mask=mask_dilated,
460
+ importance_sample=self.importance_sample)
461
+
462
+
463
+ sample['rays'] = sample_rays
464
+
465
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_4_4.py ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+
18
+
19
+ def get_ray_directions(H, W, focal, center=None):
20
+ """
21
+ Get ray directions for all pixels in camera coordinate.
22
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
24
+ Inputs:
25
+ H, W, focal: image height, width and focal length
26
+ Outputs:
27
+ directions: (H, W, 3), the direction of the rays in camera coordinate
28
+ """
29
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
+
31
+ i, j = grid.unbind(-1)
32
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
+ # see https://github.com/bmild/nerf/issues/24
34
+ cent = center if center is not None else [W / 2, H / 2]
35
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
+
37
+ return directions
38
+
39
+ def load_K_Rt_from_P(filename, P=None):
40
+ if P is None:
41
+ lines = open(filename).read().splitlines()
42
+ if len(lines) == 4:
43
+ lines = lines[1:]
44
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
+ P = np.asarray(lines).astype(np.float32).squeeze()
46
+
47
+ out = cv2.decomposeProjectionMatrix(P)
48
+ K = out[0]
49
+ R = out[1]
50
+ t = out[2]
51
+
52
+ K = K / K[2, 2]
53
+ intrinsics = np.eye(4)
54
+ intrinsics[:3, :3] = K
55
+
56
+ pose = np.eye(4, dtype=np.float32)
57
+ pose[:3, :3] = R.transpose() # ? why need transpose here
58
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
+
60
+ return intrinsics, pose # ! return cam2world matrix here
61
+
62
+
63
+ # ! load one ref-image with multiple src-images in camera coordinate system
64
+ class BlenderPerView(Dataset):
65
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
+ split_filepath=None, pair_filepath=None,
67
+ N_rays=512,
68
+ vol_dims=[128, 128, 128], batch_size=1,
69
+ clean_image=False, importance_sample=False, test_ref_views=[],
70
+ specific_dataset_name = 'GSO'
71
+ ):
72
+
73
+ # print("root_dir: ", root_dir)
74
+ self.root_dir = root_dir
75
+ self.split = split
76
+ # self.specific_dataset_name = 'Realfusion'
77
+ # self.specific_dataset_name = 'GSO'
78
+ # self.specific_dataset_name = 'Objaverse'
79
+ # self.specific_dataset_name = 'Zero123'
80
+
81
+ self.specific_dataset_name = specific_dataset_name
82
+ self.n_views = n_views
83
+ self.N_rays = N_rays
84
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
+
86
+ self.clean_image = clean_image
87
+ self.importance_sample = importance_sample
88
+ self.test_ref_views = test_ref_views # used for testing
89
+ self.scale_factor = 1.0
90
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
+ assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
+ # find all subfolders
93
+ main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
+ self.shape_list = os.listdir(main_folder)
95
+ self.shape_list.sort()
96
+
97
+ # self.shape_list = ['barrel_render']
98
+ # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
99
+
100
+
101
+ self.lvis_paths = []
102
+ for shape_name in self.shape_list:
103
+ self.lvis_paths.append(os.path.join(main_folder, shape_name))
104
+
105
+ # print("lvis_paths: ", self.lvis_paths)
106
+
107
+ if img_wh is not None:
108
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
109
+ 'img_wh must both be multiples of 32!'
110
+
111
+
112
+ # * bounding box for rendering
113
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
114
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
115
+
116
+ # - used for cost volume regularization
117
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
118
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
119
+
120
+
121
+ def define_transforms(self):
122
+ self.transform = T.Compose([T.ToTensor()])
123
+
124
+
125
+
126
+ def load_cam_info(self):
127
+ for vid, img_id in enumerate(self.img_ids):
128
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
129
+ self.all_intrinsics.append(intrinsic)
130
+ self.all_extrinsics.append(extrinsic)
131
+ self.all_near_fars.append(near_far)
132
+
133
+ def read_depth(self, filename):
134
+ pass
135
+
136
+ def read_mask(self, filename):
137
+ mask_h = cv2.imread(filename, 0)
138
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
139
+ interpolation=cv2.INTER_NEAREST)
140
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
141
+ interpolation=cv2.INTER_NEAREST)
142
+
143
+ mask[mask > 0] = 1 # the masks stored in png are not binary
144
+ mask_h[mask_h > 0] = 1
145
+
146
+ return mask, mask_h
147
+
148
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
149
+
150
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
151
+
152
+ radius = radius * factor
153
+ scale_mat = np.diag([radius, radius, radius, 1.0])
154
+ scale_mat[:3, 3] = center.cpu().numpy()
155
+ scale_mat = scale_mat.astype(np.float32)
156
+
157
+ return scale_mat, 1. / radius.cpu().numpy()
158
+
159
+ def __len__(self):
160
+ # return 8*len(self.lvis_paths)
161
+ return len(self.lvis_paths)
162
+
163
+
164
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
165
+ pass
166
+
167
+
168
+ def __getitem__(self, idx):
169
+ sample = {}
170
+ idx = idx * 8 # to be deleted
171
+ origin_idx = idx
172
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
173
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
174
+
175
+ folder_path = self.lvis_paths[idx//8]
176
+ idx = idx % 8 # [0, 7]
177
+
178
+ # last subdir name
179
+ shape_name = os.path.split(folder_path)[-1]
180
+
181
+ pose_json_path = os.path.join(folder_path, "pose.json")
182
+ with open(pose_json_path, 'r') as f:
183
+ meta = json.load(f)
184
+
185
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
186
+ self.img_wh = (256, 256)
187
+ self.input_poses = np.array(list(meta["c2ws"].values()))
188
+ intrinsic = np.eye(4)
189
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
190
+ self.intrinsic = intrinsic
191
+ self.near_far = np.array(meta["near_far"])
192
+ self.near_far[1] = 1.8
193
+ self.define_transforms()
194
+ self.blender2opencv = np.array(
195
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
196
+ )
197
+
198
+ self.c2ws = []
199
+ self.w2cs = []
200
+ self.near_fars = []
201
+ # self.root_dir = root_dir
202
+ for image_dix, img_id in enumerate(self.img_ids):
203
+ pose = self.input_poses[image_dix]
204
+ c2w = pose @ self.blender2opencv
205
+ self.c2ws.append(c2w)
206
+ self.w2cs.append(np.linalg.inv(c2w))
207
+ self.near_fars.append(self.near_far)
208
+ self.c2ws = np.stack(self.c2ws, axis=0)
209
+ self.w2cs = np.stack(self.w2cs, axis=0)
210
+
211
+
212
+ self.all_intrinsics = [] # the cam info of the whole scene
213
+ self.all_extrinsics = []
214
+ self.all_near_fars = []
215
+ self.load_cam_info()
216
+
217
+
218
+ # target view
219
+ c2w = self.c2ws[idx]
220
+ w2c = np.linalg.inv(c2w)
221
+ w2c_ref = w2c
222
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
223
+
224
+ w2cs.append(w2c @ w2c_ref_inv)
225
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
226
+
227
+ # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
228
+ img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
229
+
230
+ img = Image.open(img_filename)
231
+ img = self.transform(img) # (4, h, w)
232
+
233
+
234
+ if img.shape[0] == 4:
235
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
236
+ imgs += [img]
237
+
238
+
239
+ depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
240
+ depth_h = depth_h.fill_(-1.0)
241
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
242
+
243
+
244
+ depths_h.append(depth_h)
245
+ masks_h.append(mask_h)
246
+
247
+ intrinsic = self.intrinsic
248
+ intrinsics.append(intrinsic)
249
+
250
+
251
+ near_fars.append(self.near_fars[idx])
252
+ image_perm = 0 # only supervised on reference view
253
+
254
+ mask_dilated = None
255
+
256
+
257
+ src_views = range(8, 8 + 8 * 4)
258
+
259
+ for vid in src_views:
260
+ if (vid // 4) % 2 != 0:
261
+ continue
262
+ # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
263
+ img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
264
+ img = Image.open(img_filename)
265
+ img_wh = self.img_wh
266
+
267
+ img = self.transform(img)
268
+ if img.shape[0] == 4:
269
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
270
+
271
+ imgs += [img]
272
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
273
+ depths_h.append(depth_h)
274
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
275
+
276
+ near_fars.append(self.all_near_fars[vid])
277
+ intrinsics.append(self.all_intrinsics[vid])
278
+
279
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
280
+
281
+
282
+ # ! estimate scale_mat
283
+ scale_mat, scale_factor = self.cal_scale_mat(
284
+ img_hw=[img_wh[1], img_wh[0]],
285
+ intrinsics=intrinsics, extrinsics=w2cs,
286
+ near_fars=near_fars, factor=1.1
287
+ )
288
+
289
+
290
+ new_near_fars = []
291
+ new_w2cs = []
292
+ new_c2ws = []
293
+ new_affine_mats = []
294
+ new_depths_h = []
295
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
296
+
297
+ P = intrinsic @ extrinsic @ scale_mat
298
+ P = P[:3, :4]
299
+ # - should use load_K_Rt_from_P() to obtain c2w
300
+ c2w = load_K_Rt_from_P(None, P)[1]
301
+ w2c = np.linalg.inv(c2w)
302
+ new_w2cs.append(w2c)
303
+ new_c2ws.append(c2w)
304
+ affine_mat = np.eye(4)
305
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
306
+ new_affine_mats.append(affine_mat)
307
+
308
+ camera_o = c2w[:3, 3]
309
+ dist = np.sqrt(np.sum(camera_o ** 2))
310
+ near = dist - 1
311
+ far = dist + 1
312
+
313
+ new_near_fars.append([0.95 * near, 1.05 * far])
314
+ new_depths_h.append(depth * scale_factor)
315
+
316
+ # print(new_near_fars)
317
+ imgs = torch.stack(imgs).float()
318
+ depths_h = np.stack(new_depths_h)
319
+ masks_h = np.stack(masks_h)
320
+
321
+ affine_mats = np.stack(new_affine_mats)
322
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
323
+ new_near_fars)
324
+
325
+ if self.split == 'train':
326
+ start_idx = 0
327
+ else:
328
+ start_idx = 1
329
+
330
+
331
+
332
+ target_w2cs = []
333
+ target_intrinsics = []
334
+ new_target_w2cs = []
335
+ for i_idx in range(8):
336
+ target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
337
+ target_intrinsics.append(self.all_intrinsics[i_idx])
338
+
339
+ for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
340
+
341
+ P = intrinsic @ extrinsic @ scale_mat
342
+ P = P[:3, :4]
343
+ # - should use load_K_Rt_from_P() to obtain c2w
344
+ c2w = load_K_Rt_from_P(None, P)[1]
345
+ w2c = np.linalg.inv(c2w)
346
+ new_target_w2cs.append(w2c)
347
+ target_w2cs = np.stack(new_target_w2cs)
348
+
349
+
350
+
351
+ view_ids = [idx] + list(src_views)
352
+ sample['origin_idx'] = origin_idx
353
+ sample['images'] = imgs # (V, 3, H, W)
354
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
355
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
356
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
357
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
358
+ sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
359
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
360
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
361
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
362
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
363
+
364
+ # sample['light_idx'] = torch.tensor(light_idx)
365
+ sample['scan'] = shape_name
366
+
367
+ sample['scale_factor'] = torch.tensor(scale_factor)
368
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
369
+ sample['render_img_idx'] = torch.tensor(image_perm)
370
+ sample['partial_vol_origin'] = self.partial_vol_origin
371
+ sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
372
+ # print("meta: ", sample['meta'])
373
+
374
+ # - image to render
375
+ sample['query_image'] = sample['images'][0]
376
+ sample['query_c2w'] = sample['c2ws'][0]
377
+ sample['query_w2c'] = sample['w2cs'][0]
378
+ sample['query_intrinsic'] = sample['intrinsics'][0]
379
+ sample['query_depth'] = sample['depths_h'][0]
380
+ sample['query_mask'] = sample['masks_h'][0]
381
+ sample['query_near_far'] = sample['near_fars'][0]
382
+
383
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
384
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
385
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
386
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
387
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
388
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
389
+ sample['view_ids'] = sample['view_ids'][start_idx:]
390
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
391
+
392
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
393
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
394
+
395
+ # - generate rays
396
+ if ('val' in self.split) or ('test' in self.split):
397
+ sample_rays = gen_rays_from_single_image(
398
+ img_wh[1], img_wh[0],
399
+ sample['query_image'],
400
+ sample['query_intrinsic'],
401
+ sample['query_c2w'],
402
+ depth=sample['query_depth'],
403
+ mask=sample['query_mask'] if self.clean_image else None)
404
+ else:
405
+ sample_rays = gen_random_rays_from_single_image(
406
+ img_wh[1], img_wh[0],
407
+ self.N_rays,
408
+ sample['query_image'],
409
+ sample['query_intrinsic'],
410
+ sample['query_c2w'],
411
+ depth=sample['query_depth'],
412
+ mask=sample['query_mask'] if self.clean_image else None,
413
+ dilated_mask=mask_dilated,
414
+ importance_sample=self.importance_sample)
415
+
416
+
417
+ sample['rays'] = sample_rays
418
+
419
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_6_4.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+
18
+
19
+ def get_ray_directions(H, W, focal, center=None):
20
+ """
21
+ Get ray directions for all pixels in camera coordinate.
22
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
24
+ Inputs:
25
+ H, W, focal: image height, width and focal length
26
+ Outputs:
27
+ directions: (H, W, 3), the direction of the rays in camera coordinate
28
+ """
29
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
+
31
+ i, j = grid.unbind(-1)
32
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
+ # see https://github.com/bmild/nerf/issues/24
34
+ cent = center if center is not None else [W / 2, H / 2]
35
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
+
37
+ return directions
38
+
39
+ def load_K_Rt_from_P(filename, P=None):
40
+ if P is None:
41
+ lines = open(filename).read().splitlines()
42
+ if len(lines) == 4:
43
+ lines = lines[1:]
44
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
+ P = np.asarray(lines).astype(np.float32).squeeze()
46
+
47
+ out = cv2.decomposeProjectionMatrix(P)
48
+ K = out[0]
49
+ R = out[1]
50
+ t = out[2]
51
+
52
+ K = K / K[2, 2]
53
+ intrinsics = np.eye(4)
54
+ intrinsics[:3, :3] = K
55
+
56
+ pose = np.eye(4, dtype=np.float32)
57
+ pose[:3, :3] = R.transpose() # ? why need transpose here
58
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
+
60
+ return intrinsics, pose # ! return cam2world matrix here
61
+
62
+
63
+ # ! load one ref-image with multiple src-images in camera coordinate system
64
+ class BlenderPerView(Dataset):
65
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
+ split_filepath=None, pair_filepath=None,
67
+ N_rays=512,
68
+ vol_dims=[128, 128, 128], batch_size=1,
69
+ clean_image=False, importance_sample=False, test_ref_views=[],
70
+ specific_dataset_name = 'GSO'
71
+ ):
72
+
73
+ # print("root_dir: ", root_dir)
74
+ self.root_dir = root_dir
75
+ self.split = split
76
+ # self.specific_dataset_name = 'Realfusion'
77
+ # self.specific_dataset_name = 'GSO'
78
+ # self.specific_dataset_name = 'Objaverse'
79
+ # self.specific_dataset_name = 'Zero123'
80
+
81
+ self.specific_dataset_name = specific_dataset_name
82
+ self.n_views = n_views
83
+ self.N_rays = N_rays
84
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
+
86
+ self.clean_image = clean_image
87
+ self.importance_sample = importance_sample
88
+ self.test_ref_views = test_ref_views # used for testing
89
+ self.scale_factor = 1.0
90
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
+ assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
+ # find all subfolders
93
+ main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
+ self.shape_list = os.listdir(main_folder)
95
+ self.shape_list.sort()
96
+
97
+ # self.shape_list = ['barrel_render']
98
+ # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
99
+
100
+
101
+ self.lvis_paths = []
102
+ for shape_name in self.shape_list:
103
+ self.lvis_paths.append(os.path.join(main_folder, shape_name))
104
+
105
+ # print("lvis_paths: ", self.lvis_paths)
106
+
107
+ if img_wh is not None:
108
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
109
+ 'img_wh must both be multiples of 32!'
110
+
111
+
112
+ # * bounding box for rendering
113
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
114
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
115
+
116
+ # - used for cost volume regularization
117
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
118
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
119
+
120
+
121
+ def define_transforms(self):
122
+ self.transform = T.Compose([T.ToTensor()])
123
+
124
+
125
+
126
+ def load_cam_info(self):
127
+ for vid, img_id in enumerate(self.img_ids):
128
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
129
+ self.all_intrinsics.append(intrinsic)
130
+ self.all_extrinsics.append(extrinsic)
131
+ self.all_near_fars.append(near_far)
132
+
133
+ def read_depth(self, filename):
134
+ pass
135
+
136
+ def read_mask(self, filename):
137
+ mask_h = cv2.imread(filename, 0)
138
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
139
+ interpolation=cv2.INTER_NEAREST)
140
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
141
+ interpolation=cv2.INTER_NEAREST)
142
+
143
+ mask[mask > 0] = 1 # the masks stored in png are not binary
144
+ mask_h[mask_h > 0] = 1
145
+
146
+ return mask, mask_h
147
+
148
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
149
+
150
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
151
+
152
+ radius = radius * factor
153
+ scale_mat = np.diag([radius, radius, radius, 1.0])
154
+ scale_mat[:3, 3] = center.cpu().numpy()
155
+ scale_mat = scale_mat.astype(np.float32)
156
+
157
+ return scale_mat, 1. / radius.cpu().numpy()
158
+
159
+ def __len__(self):
160
+ # return 8*len(self.lvis_paths)
161
+ return len(self.lvis_paths)
162
+
163
+
164
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
165
+ pass
166
+
167
+
168
+ def __getitem__(self, idx):
169
+ sample = {}
170
+ idx = idx * 8 # to be deleted
171
+ origin_idx = idx
172
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
173
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
174
+
175
+ folder_path = self.lvis_paths[idx//8]
176
+ idx = idx % 8 # [0, 7]
177
+
178
+ # last subdir name
179
+ shape_name = os.path.split(folder_path)[-1]
180
+
181
+ pose_json_path = os.path.join(folder_path, "pose.json")
182
+ with open(pose_json_path, 'r') as f:
183
+ meta = json.load(f)
184
+
185
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
186
+ self.img_wh = (256, 256)
187
+ self.input_poses = np.array(list(meta["c2ws"].values()))
188
+ intrinsic = np.eye(4)
189
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
190
+ self.intrinsic = intrinsic
191
+ self.near_far = np.array(meta["near_far"])
192
+ self.near_far[1] = 1.8
193
+ self.define_transforms()
194
+ self.blender2opencv = np.array(
195
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
196
+ )
197
+
198
+ self.c2ws = []
199
+ self.w2cs = []
200
+ self.near_fars = []
201
+ # self.root_dir = root_dir
202
+ for image_dix, img_id in enumerate(self.img_ids):
203
+ pose = self.input_poses[image_dix]
204
+ c2w = pose @ self.blender2opencv
205
+ self.c2ws.append(c2w)
206
+ self.w2cs.append(np.linalg.inv(c2w))
207
+ self.near_fars.append(self.near_far)
208
+ self.c2ws = np.stack(self.c2ws, axis=0)
209
+ self.w2cs = np.stack(self.w2cs, axis=0)
210
+
211
+
212
+ self.all_intrinsics = [] # the cam info of the whole scene
213
+ self.all_extrinsics = []
214
+ self.all_near_fars = []
215
+ self.load_cam_info()
216
+
217
+
218
+ # target view
219
+ c2w = self.c2ws[idx]
220
+ w2c = np.linalg.inv(c2w)
221
+ w2c_ref = w2c
222
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
223
+
224
+ w2cs.append(w2c @ w2c_ref_inv)
225
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
226
+
227
+ # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
228
+ img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
229
+
230
+ img = Image.open(img_filename)
231
+ img = self.transform(img) # (4, h, w)
232
+
233
+
234
+ if img.shape[0] == 4:
235
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
236
+ imgs += [img]
237
+
238
+
239
+ depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
240
+ depth_h = depth_h.fill_(-1.0)
241
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
242
+
243
+
244
+ depths_h.append(depth_h)
245
+ masks_h.append(mask_h)
246
+
247
+ intrinsic = self.intrinsic
248
+ intrinsics.append(intrinsic)
249
+
250
+
251
+ near_fars.append(self.near_fars[idx])
252
+ image_perm = 0 # only supervised on reference view
253
+
254
+ mask_dilated = None
255
+
256
+
257
+ src_views = range(8, 8 + 8 * 4)
258
+
259
+ for vid in src_views:
260
+ if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6):
261
+ continue
262
+
263
+ # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
264
+ img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
265
+ img = Image.open(img_filename)
266
+ img_wh = self.img_wh
267
+
268
+ img = self.transform(img)
269
+ if img.shape[0] == 4:
270
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
271
+
272
+ imgs += [img]
273
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
274
+ depths_h.append(depth_h)
275
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
276
+
277
+ near_fars.append(self.all_near_fars[vid])
278
+ intrinsics.append(self.all_intrinsics[vid])
279
+
280
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
281
+
282
+
283
+ # ! estimate scale_mat
284
+ scale_mat, scale_factor = self.cal_scale_mat(
285
+ img_hw=[img_wh[1], img_wh[0]],
286
+ intrinsics=intrinsics, extrinsics=w2cs,
287
+ near_fars=near_fars, factor=1.1
288
+ )
289
+
290
+
291
+ new_near_fars = []
292
+ new_w2cs = []
293
+ new_c2ws = []
294
+ new_affine_mats = []
295
+ new_depths_h = []
296
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
297
+
298
+ P = intrinsic @ extrinsic @ scale_mat
299
+ P = P[:3, :4]
300
+ # - should use load_K_Rt_from_P() to obtain c2w
301
+ c2w = load_K_Rt_from_P(None, P)[1]
302
+ w2c = np.linalg.inv(c2w)
303
+ new_w2cs.append(w2c)
304
+ new_c2ws.append(c2w)
305
+ affine_mat = np.eye(4)
306
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
307
+ new_affine_mats.append(affine_mat)
308
+
309
+ camera_o = c2w[:3, 3]
310
+ dist = np.sqrt(np.sum(camera_o ** 2))
311
+ near = dist - 1
312
+ far = dist + 1
313
+
314
+ new_near_fars.append([0.95 * near, 1.05 * far])
315
+ new_depths_h.append(depth * scale_factor)
316
+
317
+ # print(new_near_fars)
318
+ imgs = torch.stack(imgs).float()
319
+ depths_h = np.stack(new_depths_h)
320
+ masks_h = np.stack(masks_h)
321
+
322
+ affine_mats = np.stack(new_affine_mats)
323
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
324
+ new_near_fars)
325
+
326
+ if self.split == 'train':
327
+ start_idx = 0
328
+ else:
329
+ start_idx = 1
330
+
331
+
332
+
333
+ target_w2cs = []
334
+ target_intrinsics = []
335
+ new_target_w2cs = []
336
+ for i_idx in range(8):
337
+ target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
338
+ target_intrinsics.append(self.all_intrinsics[i_idx])
339
+
340
+ for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
341
+
342
+ P = intrinsic @ extrinsic @ scale_mat
343
+ P = P[:3, :4]
344
+ # - should use load_K_Rt_from_P() to obtain c2w
345
+ c2w = load_K_Rt_from_P(None, P)[1]
346
+ w2c = np.linalg.inv(c2w)
347
+ new_target_w2cs.append(w2c)
348
+ target_w2cs = np.stack(new_target_w2cs)
349
+
350
+
351
+
352
+ view_ids = [idx] + list(src_views)
353
+ sample['origin_idx'] = origin_idx
354
+ sample['images'] = imgs # (V, 3, H, W)
355
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
356
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
357
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
358
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
359
+ sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
360
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
361
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
362
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
363
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
364
+
365
+ # sample['light_idx'] = torch.tensor(light_idx)
366
+ sample['scan'] = shape_name
367
+
368
+ sample['scale_factor'] = torch.tensor(scale_factor)
369
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
370
+ sample['render_img_idx'] = torch.tensor(image_perm)
371
+ sample['partial_vol_origin'] = self.partial_vol_origin
372
+ sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
373
+ # print("meta: ", sample['meta'])
374
+
375
+ # - image to render
376
+ sample['query_image'] = sample['images'][0]
377
+ sample['query_c2w'] = sample['c2ws'][0]
378
+ sample['query_w2c'] = sample['w2cs'][0]
379
+ sample['query_intrinsic'] = sample['intrinsics'][0]
380
+ sample['query_depth'] = sample['depths_h'][0]
381
+ sample['query_mask'] = sample['masks_h'][0]
382
+ sample['query_near_far'] = sample['near_fars'][0]
383
+
384
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
385
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
386
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
387
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
388
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
389
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
390
+ sample['view_ids'] = sample['view_ids'][start_idx:]
391
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
392
+
393
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
394
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
395
+
396
+ # - generate rays
397
+ if ('val' in self.split) or ('test' in self.split):
398
+ sample_rays = gen_rays_from_single_image(
399
+ img_wh[1], img_wh[0],
400
+ sample['query_image'],
401
+ sample['query_intrinsic'],
402
+ sample['query_c2w'],
403
+ depth=sample['query_depth'],
404
+ mask=sample['query_mask'] if self.clean_image else None)
405
+ else:
406
+ sample_rays = gen_random_rays_from_single_image(
407
+ img_wh[1], img_wh[0],
408
+ self.N_rays,
409
+ sample['query_image'],
410
+ sample['query_intrinsic'],
411
+ sample['query_c2w'],
412
+ depth=sample['query_depth'],
413
+ mask=sample['query_mask'] if self.clean_image else None,
414
+ dilated_mask=mask_dilated,
415
+ importance_sample=self.importance_sample)
416
+
417
+
418
+ sample['rays'] = sample_rays
419
+
420
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_3.py ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+
18
+
19
+ def get_ray_directions(H, W, focal, center=None):
20
+ """
21
+ Get ray directions for all pixels in camera coordinate.
22
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
24
+ Inputs:
25
+ H, W, focal: image height, width and focal length
26
+ Outputs:
27
+ directions: (H, W, 3), the direction of the rays in camera coordinate
28
+ """
29
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
+
31
+ i, j = grid.unbind(-1)
32
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
+ # see https://github.com/bmild/nerf/issues/24
34
+ cent = center if center is not None else [W / 2, H / 2]
35
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
+
37
+ return directions
38
+
39
+ def load_K_Rt_from_P(filename, P=None):
40
+ if P is None:
41
+ lines = open(filename).read().splitlines()
42
+ if len(lines) == 4:
43
+ lines = lines[1:]
44
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
+ P = np.asarray(lines).astype(np.float32).squeeze()
46
+
47
+ out = cv2.decomposeProjectionMatrix(P)
48
+ K = out[0]
49
+ R = out[1]
50
+ t = out[2]
51
+
52
+ K = K / K[2, 2]
53
+ intrinsics = np.eye(4)
54
+ intrinsics[:3, :3] = K
55
+
56
+ pose = np.eye(4, dtype=np.float32)
57
+ pose[:3, :3] = R.transpose() # ? why need transpose here
58
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
+
60
+ return intrinsics, pose # ! return cam2world matrix here
61
+
62
+
63
+ # ! load one ref-image with multiple src-images in camera coordinate system
64
+ class BlenderPerView(Dataset):
65
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
+ split_filepath=None, pair_filepath=None,
67
+ N_rays=512,
68
+ vol_dims=[128, 128, 128], batch_size=1,
69
+ clean_image=False, importance_sample=False, test_ref_views=[],
70
+ specific_dataset_name = 'GSO'
71
+ ):
72
+
73
+ # print("root_dir: ", root_dir)
74
+ self.root_dir = root_dir
75
+ self.split = split
76
+ # self.specific_dataset_name = 'Realfusion'
77
+ # self.specific_dataset_name = 'GSO'
78
+ # self.specific_dataset_name = 'Objaverse'
79
+ # self.specific_dataset_name = 'Zero123'
80
+
81
+ self.specific_dataset_name = specific_dataset_name
82
+ self.n_views = n_views
83
+ self.N_rays = N_rays
84
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
+
86
+ self.clean_image = clean_image
87
+ self.importance_sample = importance_sample
88
+ self.test_ref_views = test_ref_views # used for testing
89
+ self.scale_factor = 1.0
90
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
+ assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
+ # find all subfolders
93
+ main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
+ self.shape_list = os.listdir(main_folder)
95
+ self.shape_list.sort()
96
+
97
+ # self.shape_list = ['barrel_render']
98
+ # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
99
+
100
+
101
+ self.lvis_paths = []
102
+ for shape_name in self.shape_list:
103
+ self.lvis_paths.append(os.path.join(main_folder, shape_name))
104
+
105
+ # print("lvis_paths: ", self.lvis_paths)
106
+
107
+ if img_wh is not None:
108
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
109
+ 'img_wh must both be multiples of 32!'
110
+
111
+
112
+ # * bounding box for rendering
113
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
114
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
115
+
116
+ # - used for cost volume regularization
117
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
118
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
119
+
120
+
121
+ def define_transforms(self):
122
+ self.transform = T.Compose([T.ToTensor()])
123
+
124
+
125
+
126
+ def load_cam_info(self):
127
+ for vid, img_id in enumerate(self.img_ids):
128
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
129
+ self.all_intrinsics.append(intrinsic)
130
+ self.all_extrinsics.append(extrinsic)
131
+ self.all_near_fars.append(near_far)
132
+
133
+ def read_depth(self, filename):
134
+ pass
135
+
136
+ def read_mask(self, filename):
137
+ mask_h = cv2.imread(filename, 0)
138
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
139
+ interpolation=cv2.INTER_NEAREST)
140
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
141
+ interpolation=cv2.INTER_NEAREST)
142
+
143
+ mask[mask > 0] = 1 # the masks stored in png are not binary
144
+ mask_h[mask_h > 0] = 1
145
+
146
+ return mask, mask_h
147
+
148
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
149
+
150
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
151
+
152
+ radius = radius * factor
153
+ scale_mat = np.diag([radius, radius, radius, 1.0])
154
+ scale_mat[:3, 3] = center.cpu().numpy()
155
+ scale_mat = scale_mat.astype(np.float32)
156
+
157
+ return scale_mat, 1. / radius.cpu().numpy()
158
+
159
+ def __len__(self):
160
+ # return 8*len(self.lvis_paths)
161
+ return len(self.lvis_paths)
162
+
163
+
164
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
165
+ pass
166
+
167
+
168
+ def __getitem__(self, idx):
169
+ sample = {}
170
+ idx = idx * 8 # to be deleted
171
+ origin_idx = idx
172
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
173
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
174
+
175
+ folder_path = self.lvis_paths[idx//8]
176
+ idx = idx % 8 # [0, 7]
177
+
178
+ # last subdir name
179
+ shape_name = os.path.split(folder_path)[-1]
180
+
181
+ pose_json_path = os.path.join(folder_path, "pose.json")
182
+ with open(pose_json_path, 'r') as f:
183
+ meta = json.load(f)
184
+
185
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
186
+ self.img_wh = (256, 256)
187
+ self.input_poses = np.array(list(meta["c2ws"].values()))
188
+ intrinsic = np.eye(4)
189
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
190
+ self.intrinsic = intrinsic
191
+ self.near_far = np.array(meta["near_far"])
192
+ self.near_far[1] = 1.8
193
+ self.define_transforms()
194
+ self.blender2opencv = np.array(
195
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
196
+ )
197
+
198
+ self.c2ws = []
199
+ self.w2cs = []
200
+ self.near_fars = []
201
+ # self.root_dir = root_dir
202
+ for image_dix, img_id in enumerate(self.img_ids):
203
+ pose = self.input_poses[image_dix]
204
+ c2w = pose @ self.blender2opencv
205
+ self.c2ws.append(c2w)
206
+ self.w2cs.append(np.linalg.inv(c2w))
207
+ self.near_fars.append(self.near_far)
208
+ self.c2ws = np.stack(self.c2ws, axis=0)
209
+ self.w2cs = np.stack(self.w2cs, axis=0)
210
+
211
+
212
+ self.all_intrinsics = [] # the cam info of the whole scene
213
+ self.all_extrinsics = []
214
+ self.all_near_fars = []
215
+ self.load_cam_info()
216
+
217
+
218
+ # target view
219
+ c2w = self.c2ws[idx]
220
+ w2c = np.linalg.inv(c2w)
221
+ w2c_ref = w2c
222
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
223
+
224
+ w2cs.append(w2c @ w2c_ref_inv)
225
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
226
+
227
+ # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
228
+ img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
229
+
230
+ img = Image.open(img_filename)
231
+ img = self.transform(img) # (4, h, w)
232
+
233
+
234
+ if img.shape[0] == 4:
235
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
236
+ imgs += [img]
237
+
238
+
239
+ depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
240
+ depth_h = depth_h.fill_(-1.0)
241
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
242
+
243
+
244
+ depths_h.append(depth_h)
245
+ masks_h.append(mask_h)
246
+
247
+ intrinsic = self.intrinsic
248
+ intrinsics.append(intrinsic)
249
+
250
+
251
+ near_fars.append(self.near_fars[idx])
252
+ image_perm = 0 # only supervised on reference view
253
+
254
+ mask_dilated = None
255
+
256
+
257
+ # src_views = range(8, 8 + 8 * 4)
258
+
259
+ src_views = list()
260
+ for i in range(8):
261
+ # randomly choose 3 different number from [0,3]
262
+ # local_idxs = np.random.choice(4, 3, replace=False)
263
+ local_idxs = [0, 2, 3]
264
+ # local_idxs = np.random.choice(4, 3, replace=False)
265
+
266
+ local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs]
267
+ src_views += local_idxs
268
+
269
+ for vid in src_views:
270
+
271
+ # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
272
+ img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
273
+ img = Image.open(img_filename)
274
+ img_wh = self.img_wh
275
+
276
+ img = self.transform(img)
277
+ if img.shape[0] == 4:
278
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
279
+
280
+ imgs += [img]
281
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
282
+ depths_h.append(depth_h)
283
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
284
+
285
+ near_fars.append(self.all_near_fars[vid])
286
+ intrinsics.append(self.all_intrinsics[vid])
287
+
288
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
289
+
290
+
291
+ # ! estimate scale_mat
292
+ scale_mat, scale_factor = self.cal_scale_mat(
293
+ img_hw=[img_wh[1], img_wh[0]],
294
+ intrinsics=intrinsics, extrinsics=w2cs,
295
+ near_fars=near_fars, factor=1.1
296
+ )
297
+
298
+
299
+ new_near_fars = []
300
+ new_w2cs = []
301
+ new_c2ws = []
302
+ new_affine_mats = []
303
+ new_depths_h = []
304
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
305
+
306
+ P = intrinsic @ extrinsic @ scale_mat
307
+ P = P[:3, :4]
308
+ # - should use load_K_Rt_from_P() to obtain c2w
309
+ c2w = load_K_Rt_from_P(None, P)[1]
310
+ w2c = np.linalg.inv(c2w)
311
+ new_w2cs.append(w2c)
312
+ new_c2ws.append(c2w)
313
+ affine_mat = np.eye(4)
314
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
315
+ new_affine_mats.append(affine_mat)
316
+
317
+ camera_o = c2w[:3, 3]
318
+ dist = np.sqrt(np.sum(camera_o ** 2))
319
+ near = dist - 1
320
+ far = dist + 1
321
+
322
+ new_near_fars.append([0.95 * near, 1.05 * far])
323
+ new_depths_h.append(depth * scale_factor)
324
+
325
+ # print(new_near_fars)
326
+ imgs = torch.stack(imgs).float()
327
+ depths_h = np.stack(new_depths_h)
328
+ masks_h = np.stack(masks_h)
329
+
330
+ affine_mats = np.stack(new_affine_mats)
331
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
332
+ new_near_fars)
333
+
334
+ if self.split == 'train':
335
+ start_idx = 0
336
+ else:
337
+ start_idx = 1
338
+
339
+
340
+
341
+ target_w2cs = []
342
+ target_intrinsics = []
343
+ new_target_w2cs = []
344
+ for i_idx in range(8):
345
+ target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
346
+ target_intrinsics.append(self.all_intrinsics[i_idx])
347
+
348
+ for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
349
+
350
+ P = intrinsic @ extrinsic @ scale_mat
351
+ P = P[:3, :4]
352
+ # - should use load_K_Rt_from_P() to obtain c2w
353
+ c2w = load_K_Rt_from_P(None, P)[1]
354
+ w2c = np.linalg.inv(c2w)
355
+ new_target_w2cs.append(w2c)
356
+ target_w2cs = np.stack(new_target_w2cs)
357
+
358
+
359
+
360
+ view_ids = [idx] + list(src_views)
361
+ sample['origin_idx'] = origin_idx
362
+ sample['images'] = imgs # (V, 3, H, W)
363
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
364
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
365
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
366
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
367
+ sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
368
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
369
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
370
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
371
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
372
+
373
+ # sample['light_idx'] = torch.tensor(light_idx)
374
+ sample['scan'] = shape_name
375
+
376
+ sample['scale_factor'] = torch.tensor(scale_factor)
377
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
378
+ sample['render_img_idx'] = torch.tensor(image_perm)
379
+ sample['partial_vol_origin'] = self.partial_vol_origin
380
+ sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
381
+ # print("meta: ", sample['meta'])
382
+
383
+ # - image to render
384
+ sample['query_image'] = sample['images'][0]
385
+ sample['query_c2w'] = sample['c2ws'][0]
386
+ sample['query_w2c'] = sample['w2cs'][0]
387
+ sample['query_intrinsic'] = sample['intrinsics'][0]
388
+ sample['query_depth'] = sample['depths_h'][0]
389
+ sample['query_mask'] = sample['masks_h'][0]
390
+ sample['query_near_far'] = sample['near_fars'][0]
391
+
392
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
393
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
394
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
395
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
396
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
397
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
398
+ sample['view_ids'] = sample['view_ids'][start_idx:]
399
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
400
+
401
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
402
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
403
+
404
+ # - generate rays
405
+ if ('val' in self.split) or ('test' in self.split):
406
+ sample_rays = gen_rays_from_single_image(
407
+ img_wh[1], img_wh[0],
408
+ sample['query_image'],
409
+ sample['query_intrinsic'],
410
+ sample['query_c2w'],
411
+ depth=sample['query_depth'],
412
+ mask=sample['query_mask'] if self.clean_image else None)
413
+ else:
414
+ sample_rays = gen_random_rays_from_single_image(
415
+ img_wh[1], img_wh[0],
416
+ self.N_rays,
417
+ sample['query_image'],
418
+ sample['query_intrinsic'],
419
+ sample['query_c2w'],
420
+ depth=sample['query_depth'],
421
+ mask=sample['query_mask'] if self.clean_image else None,
422
+ dilated_mask=mask_dilated,
423
+ importance_sample=self.importance_sample)
424
+
425
+
426
+ sample['rays'] = sample_rays
427
+
428
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_wide.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+
18
+
19
+ def get_ray_directions(H, W, focal, center=None):
20
+ """
21
+ Get ray directions for all pixels in camera coordinate.
22
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
24
+ Inputs:
25
+ H, W, focal: image height, width and focal length
26
+ Outputs:
27
+ directions: (H, W, 3), the direction of the rays in camera coordinate
28
+ """
29
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
+
31
+ i, j = grid.unbind(-1)
32
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
+ # see https://github.com/bmild/nerf/issues/24
34
+ cent = center if center is not None else [W / 2, H / 2]
35
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
+
37
+ return directions
38
+
39
+ def load_K_Rt_from_P(filename, P=None):
40
+ if P is None:
41
+ lines = open(filename).read().splitlines()
42
+ if len(lines) == 4:
43
+ lines = lines[1:]
44
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
+ P = np.asarray(lines).astype(np.float32).squeeze()
46
+
47
+ out = cv2.decomposeProjectionMatrix(P)
48
+ K = out[0]
49
+ R = out[1]
50
+ t = out[2]
51
+
52
+ K = K / K[2, 2]
53
+ intrinsics = np.eye(4)
54
+ intrinsics[:3, :3] = K
55
+
56
+ pose = np.eye(4, dtype=np.float32)
57
+ pose[:3, :3] = R.transpose() # ? why need transpose here
58
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
+
60
+ return intrinsics, pose # ! return cam2world matrix here
61
+
62
+
63
+ # ! load one ref-image with multiple src-images in camera coordinate system
64
+ class BlenderPerView(Dataset):
65
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
+ split_filepath=None, pair_filepath=None,
67
+ N_rays=512,
68
+ vol_dims=[128, 128, 128], batch_size=1,
69
+ clean_image=False, importance_sample=False, test_ref_views=[],
70
+ specific_dataset_name = 'GSO'
71
+ ):
72
+
73
+ # print("root_dir: ", root_dir)
74
+ self.root_dir = root_dir
75
+ self.split = split
76
+ # self.specific_dataset_name = 'Realfusion'
77
+ # self.specific_dataset_name = 'GSO'
78
+ # self.specific_dataset_name = 'Objaverse'
79
+ # self.specific_dataset_name = 'Zero123'
80
+
81
+ self.specific_dataset_name = specific_dataset_name
82
+ self.n_views = n_views
83
+ self.N_rays = N_rays
84
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
+
86
+ self.clean_image = clean_image
87
+ self.importance_sample = importance_sample
88
+ self.test_ref_views = test_ref_views # used for testing
89
+ self.scale_factor = 1.0
90
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
+ assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
+ # find all subfolders
93
+ main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
+ self.shape_list = os.listdir(main_folder)
95
+ self.shape_list.sort()
96
+
97
+ # self.shape_list = ['barrel_render']
98
+ # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
99
+
100
+
101
+ self.lvis_paths = []
102
+ for shape_name in self.shape_list:
103
+ self.lvis_paths.append(os.path.join(main_folder, shape_name))
104
+
105
+ # print("lvis_paths: ", self.lvis_paths)
106
+
107
+ if img_wh is not None:
108
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
109
+ 'img_wh must both be multiples of 32!'
110
+
111
+
112
+ # * bounding box for rendering
113
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
114
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
115
+
116
+ # - used for cost volume regularization
117
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
118
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
119
+
120
+
121
+ def define_transforms(self):
122
+ self.transform = T.Compose([T.ToTensor()])
123
+
124
+
125
+
126
+ def load_cam_info(self):
127
+ for vid, img_id in enumerate(self.img_ids):
128
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
129
+ self.all_intrinsics.append(intrinsic)
130
+ self.all_extrinsics.append(extrinsic)
131
+ self.all_near_fars.append(near_far)
132
+
133
+ def read_depth(self, filename):
134
+ pass
135
+
136
+ def read_mask(self, filename):
137
+ mask_h = cv2.imread(filename, 0)
138
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
139
+ interpolation=cv2.INTER_NEAREST)
140
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
141
+ interpolation=cv2.INTER_NEAREST)
142
+
143
+ mask[mask > 0] = 1 # the masks stored in png are not binary
144
+ mask_h[mask_h > 0] = 1
145
+
146
+ return mask, mask_h
147
+
148
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
149
+
150
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
151
+
152
+ radius = radius * factor
153
+ scale_mat = np.diag([radius, radius, radius, 1.0])
154
+ scale_mat[:3, 3] = center.cpu().numpy()
155
+ scale_mat = scale_mat.astype(np.float32)
156
+
157
+ return scale_mat, 1. / radius.cpu().numpy()
158
+
159
+ def __len__(self):
160
+ # return 8*len(self.lvis_paths)
161
+ return len(self.lvis_paths)
162
+
163
+
164
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
165
+ pass
166
+
167
+
168
+ def __getitem__(self, idx):
169
+ sample = {}
170
+ idx = idx * 8 # to be deleted
171
+ origin_idx = idx
172
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
173
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
174
+
175
+ folder_path = self.lvis_paths[idx//8]
176
+ idx = idx % 8 # [0, 7]
177
+
178
+ # last subdir name
179
+ shape_name = os.path.split(folder_path)[-1]
180
+
181
+ pose_json_path = os.path.join(folder_path, "pose.json")
182
+ with open(pose_json_path, 'r') as f:
183
+ meta = json.load(f)
184
+
185
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
186
+ self.img_wh = (256, 256)
187
+ self.input_poses = np.array(list(meta["c2ws"].values()))
188
+ intrinsic = np.eye(4)
189
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
190
+ self.intrinsic = intrinsic
191
+ self.near_far = np.array(meta["near_far"])
192
+ self.near_far[1] = 1.8
193
+ self.define_transforms()
194
+ self.blender2opencv = np.array(
195
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
196
+ )
197
+
198
+ self.c2ws = []
199
+ self.w2cs = []
200
+ self.near_fars = []
201
+ # self.root_dir = root_dir
202
+ for image_dix, img_id in enumerate(self.img_ids):
203
+ pose = self.input_poses[image_dix]
204
+ c2w = pose @ self.blender2opencv
205
+ self.c2ws.append(c2w)
206
+ self.w2cs.append(np.linalg.inv(c2w))
207
+ self.near_fars.append(self.near_far)
208
+ self.c2ws = np.stack(self.c2ws, axis=0)
209
+ self.w2cs = np.stack(self.w2cs, axis=0)
210
+
211
+
212
+ self.all_intrinsics = [] # the cam info of the whole scene
213
+ self.all_extrinsics = []
214
+ self.all_near_fars = []
215
+ self.load_cam_info()
216
+
217
+
218
+ # target view
219
+ c2w = self.c2ws[idx]
220
+ w2c = np.linalg.inv(c2w)
221
+ w2c_ref = w2c
222
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
223
+
224
+ w2cs.append(w2c @ w2c_ref_inv)
225
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
226
+
227
+ # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
228
+ img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
229
+
230
+ img = Image.open(img_filename)
231
+ img = self.transform(img) # (4, h, w)
232
+
233
+
234
+ if img.shape[0] == 4:
235
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
236
+ imgs += [img]
237
+
238
+
239
+ depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
240
+ depth_h = depth_h.fill_(-1.0)
241
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
242
+
243
+
244
+ depths_h.append(depth_h)
245
+ masks_h.append(mask_h)
246
+
247
+ intrinsic = self.intrinsic
248
+ intrinsics.append(intrinsic)
249
+
250
+
251
+ near_fars.append(self.near_fars[idx])
252
+ image_perm = 0 # only supervised on reference view
253
+
254
+ mask_dilated = None
255
+
256
+
257
+ src_views = range(8)
258
+
259
+
260
+ for vid in src_views:
261
+
262
+ # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
263
+ # img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
264
+ img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[vid]}')
265
+ img = Image.open(img_filename)
266
+ img_wh = self.img_wh
267
+
268
+ img = self.transform(img)
269
+ if img.shape[0] == 4:
270
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
271
+
272
+ imgs += [img]
273
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
274
+ depths_h.append(depth_h)
275
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
276
+
277
+ near_fars.append(self.all_near_fars[vid])
278
+ intrinsics.append(self.all_intrinsics[vid])
279
+
280
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
281
+
282
+
283
+ # ! estimate scale_mat
284
+ scale_mat, scale_factor = self.cal_scale_mat(
285
+ img_hw=[img_wh[1], img_wh[0]],
286
+ intrinsics=intrinsics, extrinsics=w2cs,
287
+ near_fars=near_fars, factor=1.1
288
+ )
289
+
290
+
291
+ new_near_fars = []
292
+ new_w2cs = []
293
+ new_c2ws = []
294
+ new_affine_mats = []
295
+ new_depths_h = []
296
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
297
+
298
+ P = intrinsic @ extrinsic @ scale_mat
299
+ P = P[:3, :4]
300
+ # - should use load_K_Rt_from_P() to obtain c2w
301
+ c2w = load_K_Rt_from_P(None, P)[1]
302
+ w2c = np.linalg.inv(c2w)
303
+ new_w2cs.append(w2c)
304
+ new_c2ws.append(c2w)
305
+ affine_mat = np.eye(4)
306
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
307
+ new_affine_mats.append(affine_mat)
308
+
309
+ camera_o = c2w[:3, 3]
310
+ dist = np.sqrt(np.sum(camera_o ** 2))
311
+ near = dist - 1
312
+ far = dist + 1
313
+
314
+ new_near_fars.append([0.95 * near, 1.05 * far])
315
+ new_depths_h.append(depth * scale_factor)
316
+
317
+ # print(new_near_fars)
318
+ imgs = torch.stack(imgs).float()
319
+ depths_h = np.stack(new_depths_h)
320
+ masks_h = np.stack(masks_h)
321
+
322
+ affine_mats = np.stack(new_affine_mats)
323
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
324
+ new_near_fars)
325
+
326
+ if self.split == 'train':
327
+ start_idx = 0
328
+ else:
329
+ start_idx = 1
330
+
331
+
332
+
333
+ target_w2cs = []
334
+ target_intrinsics = []
335
+ new_target_w2cs = []
336
+ for i_idx in range(8):
337
+ target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
338
+ target_intrinsics.append(self.all_intrinsics[i_idx])
339
+
340
+ for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
341
+
342
+ P = intrinsic @ extrinsic @ scale_mat
343
+ P = P[:3, :4]
344
+ # - should use load_K_Rt_from_P() to obtain c2w
345
+ c2w = load_K_Rt_from_P(None, P)[1]
346
+ w2c = np.linalg.inv(c2w)
347
+ new_target_w2cs.append(w2c)
348
+ target_w2cs = np.stack(new_target_w2cs)
349
+
350
+
351
+
352
+ view_ids = [idx] + list(src_views)
353
+ sample['origin_idx'] = origin_idx
354
+ sample['images'] = imgs # (V, 3, H, W)
355
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
356
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
357
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
358
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
359
+ sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
360
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
361
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
362
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
363
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
364
+
365
+ # sample['light_idx'] = torch.tensor(light_idx)
366
+ sample['scan'] = shape_name
367
+
368
+ sample['scale_factor'] = torch.tensor(scale_factor)
369
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
370
+ sample['render_img_idx'] = torch.tensor(image_perm)
371
+ sample['partial_vol_origin'] = self.partial_vol_origin
372
+ sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
373
+ # print("meta: ", sample['meta'])
374
+
375
+ # - image to render
376
+ sample['query_image'] = sample['images'][0]
377
+ sample['query_c2w'] = sample['c2ws'][0]
378
+ sample['query_w2c'] = sample['w2cs'][0]
379
+ sample['query_intrinsic'] = sample['intrinsics'][0]
380
+ sample['query_depth'] = sample['depths_h'][0]
381
+ sample['query_mask'] = sample['masks_h'][0]
382
+ sample['query_near_far'] = sample['near_fars'][0]
383
+
384
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
385
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
386
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
387
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
388
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
389
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
390
+ sample['view_ids'] = sample['view_ids'][start_idx:]
391
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
392
+
393
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
394
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
395
+
396
+ # - generate rays
397
+ if ('val' in self.split) or ('test' in self.split):
398
+ sample_rays = gen_rays_from_single_image(
399
+ img_wh[1], img_wh[0],
400
+ sample['query_image'],
401
+ sample['query_intrinsic'],
402
+ sample['query_c2w'],
403
+ depth=sample['query_depth'],
404
+ mask=sample['query_mask'] if self.clean_image else None)
405
+ else:
406
+ sample_rays = gen_random_rays_from_single_image(
407
+ img_wh[1], img_wh[0],
408
+ self.N_rays,
409
+ sample['query_image'],
410
+ sample['query_intrinsic'],
411
+ sample['query_c2w'],
412
+ depth=sample['query_depth'],
413
+ mask=sample['query_mask'] if self.clean_image else None,
414
+ dilated_mask=mask_dilated,
415
+ importance_sample=self.importance_sample)
416
+
417
+
418
+ sample['rays'] = sample_rays
419
+
420
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_temp.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+
18
+
19
+ def get_ray_directions(H, W, focal, center=None):
20
+ """
21
+ Get ray directions for all pixels in camera coordinate.
22
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
23
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
24
+ Inputs:
25
+ H, W, focal: image height, width and focal length
26
+ Outputs:
27
+ directions: (H, W, 3), the direction of the rays in camera coordinate
28
+ """
29
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
30
+
31
+ i, j = grid.unbind(-1)
32
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
33
+ # see https://github.com/bmild/nerf/issues/24
34
+ cent = center if center is not None else [W / 2, H / 2]
35
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
36
+
37
+ return directions
38
+
39
+ def load_K_Rt_from_P(filename, P=None):
40
+ if P is None:
41
+ lines = open(filename).read().splitlines()
42
+ if len(lines) == 4:
43
+ lines = lines[1:]
44
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
45
+ P = np.asarray(lines).astype(np.float32).squeeze()
46
+
47
+ out = cv2.decomposeProjectionMatrix(P)
48
+ K = out[0]
49
+ R = out[1]
50
+ t = out[2]
51
+
52
+ K = K / K[2, 2]
53
+ intrinsics = np.eye(4)
54
+ intrinsics[:3, :3] = K
55
+
56
+ pose = np.eye(4, dtype=np.float32)
57
+ pose[:3, :3] = R.transpose() # ? why need transpose here
58
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
59
+
60
+ return intrinsics, pose # ! return cam2world matrix here
61
+
62
+
63
+ # ! load one ref-image with multiple src-images in camera coordinate system
64
+ class BlenderPerView(Dataset):
65
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
66
+ split_filepath=None, pair_filepath=None,
67
+ N_rays=512,
68
+ vol_dims=[128, 128, 128], batch_size=1,
69
+ clean_image=False, importance_sample=False, test_ref_views=[],
70
+ specific_dataset_name = 'GSO'
71
+ ):
72
+
73
+ # print("root_dir: ", root_dir)
74
+ self.root_dir = root_dir
75
+ self.split = split
76
+ # self.specific_dataset_name = 'Realfusion'
77
+ # self.specific_dataset_name = 'GSO'
78
+ # self.specific_dataset_name = 'Objaverse'
79
+ self.specific_dataset_name = 'Objaverse_archived'
80
+
81
+ # self.specific_dataset_name = specific_dataset_name
82
+ self.n_views = n_views
83
+ self.N_rays = N_rays
84
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
85
+
86
+ self.clean_image = clean_image
87
+ self.importance_sample = importance_sample
88
+ self.test_ref_views = test_ref_views # used for testing
89
+ self.scale_factor = 1.0
90
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
91
+ assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
92
+ # find all subfolders
93
+ main_folder = os.path.join(root_dir, self.specific_dataset_name)
94
+ self.shape_list = os.listdir(main_folder)
95
+ self.shape_list.sort()
96
+
97
+ # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
98
+
99
+
100
+ self.lvis_paths = []
101
+ for shape_name in self.shape_list:
102
+ self.lvis_paths.append(os.path.join(main_folder, shape_name))
103
+
104
+ # print("lvis_paths: ", self.lvis_paths)
105
+
106
+ if img_wh is not None:
107
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
108
+ 'img_wh must both be multiples of 32!'
109
+
110
+
111
+ # * bounding box for rendering
112
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
113
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
114
+
115
+ # - used for cost volume regularization
116
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
117
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
118
+
119
+
120
+ def define_transforms(self):
121
+ self.transform = T.Compose([T.ToTensor()])
122
+
123
+
124
+
125
+ def load_cam_info(self):
126
+ for vid, img_id in enumerate(self.img_ids):
127
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
128
+ self.all_intrinsics.append(intrinsic)
129
+ self.all_extrinsics.append(extrinsic)
130
+ self.all_near_fars.append(near_far)
131
+
132
+ def read_depth(self, filename):
133
+ pass
134
+
135
+ def read_mask(self, filename):
136
+ mask_h = cv2.imread(filename, 0)
137
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
138
+ interpolation=cv2.INTER_NEAREST)
139
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
140
+ interpolation=cv2.INTER_NEAREST)
141
+
142
+ mask[mask > 0] = 1 # the masks stored in png are not binary
143
+ mask_h[mask_h > 0] = 1
144
+
145
+ return mask, mask_h
146
+
147
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
148
+
149
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
150
+
151
+ radius = radius * factor
152
+ scale_mat = np.diag([radius, radius, radius, 1.0])
153
+ scale_mat[:3, 3] = center.cpu().numpy()
154
+ scale_mat = scale_mat.astype(np.float32)
155
+
156
+ return scale_mat, 1. / radius.cpu().numpy()
157
+
158
+ def __len__(self):
159
+ # return 8*len(self.lvis_paths)
160
+ return len(self.lvis_paths)
161
+
162
+
163
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
164
+ pass
165
+
166
+
167
+ def __getitem__(self, idx):
168
+ sample = {}
169
+ idx = idx * 8 # to be deleted
170
+ origin_idx = idx
171
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
172
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
173
+
174
+ folder_path = self.lvis_paths[idx//8]
175
+ idx = idx % 8 # [0, 7]
176
+
177
+ # last subdir name
178
+ shape_name = os.path.split(folder_path)[-1]
179
+
180
+ pose_json_path = os.path.join('/objaverse-processed/zero12345_img/zero12345_narrow_pose.json')
181
+ with open(pose_json_path, 'r') as f:
182
+ meta = json.load(f)
183
+
184
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
185
+ self.img_wh = (256, 256)
186
+ self.input_poses = np.array(list(meta["c2ws"].values()))
187
+ intrinsic = np.eye(4)
188
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
189
+ self.intrinsic = intrinsic
190
+ self.near_far = np.array(meta["near_far"])
191
+ self.near_far[1] = 1.8
192
+ self.define_transforms()
193
+ self.blender2opencv = np.array(
194
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
195
+ )
196
+
197
+ self.c2ws = []
198
+ self.w2cs = []
199
+ self.near_fars = []
200
+ # self.root_dir = root_dir
201
+ for image_dix, img_id in enumerate(self.img_ids):
202
+ pose = self.input_poses[image_dix]
203
+ c2w = pose @ self.blender2opencv
204
+ self.c2ws.append(c2w)
205
+ self.w2cs.append(np.linalg.inv(c2w))
206
+ self.near_fars.append(self.near_far)
207
+ self.c2ws = np.stack(self.c2ws, axis=0)
208
+ self.w2cs = np.stack(self.w2cs, axis=0)
209
+
210
+
211
+ self.all_intrinsics = [] # the cam info of the whole scene
212
+ self.all_extrinsics = []
213
+ self.all_near_fars = []
214
+ self.load_cam_info()
215
+
216
+
217
+ # target view
218
+ c2w = self.c2ws[idx]
219
+ w2c = np.linalg.inv(c2w)
220
+ w2c_ref = w2c
221
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
222
+
223
+ w2cs.append(w2c @ w2c_ref_inv)
224
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
225
+
226
+ # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
227
+ img_filename = os.path.join(folder_path, 'stage1_8', f'{idx}.png')
228
+
229
+ img = Image.open(img_filename)
230
+ img = self.transform(img) # (4, h, w)
231
+
232
+
233
+ if img.shape[0] == 4:
234
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
235
+ imgs += [img]
236
+
237
+
238
+ depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
239
+ depth_h = depth_h.fill_(-1.0)
240
+ mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
241
+
242
+
243
+ depths_h.append(depth_h)
244
+ masks_h.append(mask_h)
245
+
246
+ intrinsic = self.intrinsic
247
+ intrinsics.append(intrinsic)
248
+
249
+
250
+ near_fars.append(self.near_fars[idx])
251
+ image_perm = 0 # only supervised on reference view
252
+
253
+ mask_dilated = None
254
+
255
+
256
+ src_views = range(8, 8 + 8 * 4)
257
+
258
+ for vid in src_views:
259
+
260
+ # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
261
+ img_filename = os.path.join(folder_path, 'stage2_8', f'{(vid-8)//4}_{(vid-8)%4}.png')
262
+ img = Image.open(img_filename)
263
+ img_wh = self.img_wh
264
+
265
+ img = self.transform(img)
266
+ if img.shape[0] == 4:
267
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
268
+
269
+ imgs += [img]
270
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
271
+ depths_h.append(depth_h)
272
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
273
+
274
+ near_fars.append(self.all_near_fars[vid])
275
+ intrinsics.append(self.all_intrinsics[vid])
276
+
277
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
278
+
279
+
280
+ # ! estimate scale_mat
281
+ scale_mat, scale_factor = self.cal_scale_mat(
282
+ img_hw=[img_wh[1], img_wh[0]],
283
+ intrinsics=intrinsics, extrinsics=w2cs,
284
+ near_fars=near_fars, factor=1.1
285
+ )
286
+
287
+
288
+ new_near_fars = []
289
+ new_w2cs = []
290
+ new_c2ws = []
291
+ new_affine_mats = []
292
+ new_depths_h = []
293
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
294
+
295
+ P = intrinsic @ extrinsic @ scale_mat
296
+ P = P[:3, :4]
297
+ # - should use load_K_Rt_from_P() to obtain c2w
298
+ c2w = load_K_Rt_from_P(None, P)[1]
299
+ w2c = np.linalg.inv(c2w)
300
+ new_w2cs.append(w2c)
301
+ new_c2ws.append(c2w)
302
+ affine_mat = np.eye(4)
303
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
304
+ new_affine_mats.append(affine_mat)
305
+
306
+ camera_o = c2w[:3, 3]
307
+ dist = np.sqrt(np.sum(camera_o ** 2))
308
+ near = dist - 1
309
+ far = dist + 1
310
+
311
+ new_near_fars.append([0.95 * near, 1.05 * far])
312
+ new_depths_h.append(depth * scale_factor)
313
+
314
+ # print(new_near_fars)
315
+ imgs = torch.stack(imgs).float()
316
+ depths_h = np.stack(new_depths_h)
317
+ masks_h = np.stack(masks_h)
318
+
319
+ affine_mats = np.stack(new_affine_mats)
320
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
321
+ new_near_fars)
322
+
323
+ if self.split == 'train':
324
+ start_idx = 0
325
+ else:
326
+ start_idx = 1
327
+
328
+
329
+
330
+ target_w2cs = []
331
+ target_intrinsics = []
332
+ new_target_w2cs = []
333
+ for i_idx in range(8):
334
+ target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
335
+ target_intrinsics.append(self.all_intrinsics[i_idx])
336
+
337
+ for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
338
+
339
+ P = intrinsic @ extrinsic @ scale_mat
340
+ P = P[:3, :4]
341
+ # - should use load_K_Rt_from_P() to obtain c2w
342
+ c2w = load_K_Rt_from_P(None, P)[1]
343
+ w2c = np.linalg.inv(c2w)
344
+ new_target_w2cs.append(w2c)
345
+ target_w2cs = np.stack(new_target_w2cs)
346
+
347
+
348
+
349
+ view_ids = [idx] + list(src_views)
350
+ sample['origin_idx'] = origin_idx
351
+ sample['images'] = imgs # (V, 3, H, W)
352
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
353
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
354
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
355
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
356
+ sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
357
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
358
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
359
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
360
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
361
+
362
+ # sample['light_idx'] = torch.tensor(light_idx)
363
+ sample['scan'] = shape_name
364
+
365
+ sample['scale_factor'] = torch.tensor(scale_factor)
366
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
367
+ sample['render_img_idx'] = torch.tensor(image_perm)
368
+ sample['partial_vol_origin'] = self.partial_vol_origin
369
+ sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
370
+ # print("meta: ", sample['meta'])
371
+
372
+ # - image to render
373
+ sample['query_image'] = sample['images'][0]
374
+ sample['query_c2w'] = sample['c2ws'][0]
375
+ sample['query_w2c'] = sample['w2cs'][0]
376
+ sample['query_intrinsic'] = sample['intrinsics'][0]
377
+ sample['query_depth'] = sample['depths_h'][0]
378
+ sample['query_mask'] = sample['masks_h'][0]
379
+ sample['query_near_far'] = sample['near_fars'][0]
380
+
381
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
382
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
383
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
384
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
385
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
386
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
387
+ sample['view_ids'] = sample['view_ids'][start_idx:]
388
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
389
+
390
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
391
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
392
+
393
+ # - generate rays
394
+ if ('val' in self.split) or ('test' in self.split):
395
+ sample_rays = gen_rays_from_single_image(
396
+ img_wh[1], img_wh[0],
397
+ sample['query_image'],
398
+ sample['query_intrinsic'],
399
+ sample['query_c2w'],
400
+ depth=sample['query_depth'],
401
+ mask=sample['query_mask'] if self.clean_image else None)
402
+ else:
403
+ sample_rays = gen_random_rays_from_single_image(
404
+ img_wh[1], img_wh[0],
405
+ self.N_rays,
406
+ sample['query_image'],
407
+ sample['query_intrinsic'],
408
+ sample['query_c2w'],
409
+ depth=sample['query_depth'],
410
+ mask=sample['query_mask'] if self.clean_image else None,
411
+ dilated_mask=mask_dilated,
412
+ importance_sample=self.importance_sample)
413
+
414
+
415
+ sample['rays'] = sample_rays
416
+
417
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_no_depth.py ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+ self.near_far[1] = 1.8
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ pass
155
+
156
+ def read_mask(self, filename):
157
+ mask_h = cv2.imread(filename, 0)
158
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ mask[mask > 0] = 1 # the masks stored in png are not binary
164
+ mask_h[mask_h > 0] = 1
165
+
166
+ return mask, mask_h
167
+
168
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
+
170
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
+ # print("center", center)
172
+ # print("radius", radius)
173
+ # print("bounds", bounds)
174
+ # import ipdb; ipdb.set_trace()
175
+ radius = radius * factor
176
+ scale_mat = np.diag([radius, radius, radius, 1.0])
177
+ scale_mat[:3, 3] = center.cpu().numpy()
178
+ scale_mat = scale_mat.astype(np.float32)
179
+
180
+ return scale_mat, 1. / radius.cpu().numpy()
181
+
182
+ def __len__(self):
183
+ return 8*len(self.lvis_paths)
184
+
185
+
186
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
+ pass
188
+
189
+
190
+ def __getitem__(self, idx):
191
+ sample = {}
192
+ origin_idx = idx
193
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
194
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
195
+
196
+
197
+ folder_uid_dict = self.lvis_paths[idx//8]
198
+ idx = idx % 8 # [0, 7]
199
+ folder_id = folder_uid_dict['folder_id']
200
+ uid = folder_uid_dict['uid']
201
+
202
+
203
+ # target view
204
+ c2w = self.c2ws[idx]
205
+ w2c = np.linalg.inv(c2w)
206
+ w2c_ref = w2c
207
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
208
+
209
+ w2cs.append(w2c @ w2c_ref_inv)
210
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
211
+
212
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
213
+
214
+ depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
215
+
216
+
217
+ img = Image.open(img_filename)
218
+
219
+ img = self.transform(img) # (4, h, w)
220
+
221
+
222
+ if img.shape[0] == 4:
223
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
224
+ imgs += [img]
225
+
226
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
227
+ mask_h = depth_h > 0
228
+ # print("valid pixels", np.sum(mask_h))
229
+ # directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
230
+ # surface_points = directions * depth_h[..., None] # [H, W, 3]
231
+ # distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
232
+ # depth_h = distance
233
+
234
+ depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
235
+ depth_h = depth_h.fill_(-1.0)
236
+
237
+ depths_h.append(depth_h)
238
+ masks_h.append(mask_h)
239
+
240
+ intrinsic = self.intrinsic
241
+ intrinsics.append(intrinsic)
242
+
243
+
244
+ near_fars.append(self.near_fars[idx])
245
+ image_perm = 0 # only supervised on reference view
246
+
247
+ mask_dilated = None
248
+
249
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
250
+ src_views = range(8, 8 + 8 * 4)
251
+
252
+ for vid in src_views:
253
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
254
+
255
+ img = Image.open(img_filename)
256
+ img_wh = self.img_wh
257
+
258
+ img = self.transform(img)
259
+ if img.shape[0] == 4:
260
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
261
+
262
+ imgs += [img]
263
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
264
+ depths_h.append(depth_h)
265
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
266
+
267
+ near_fars.append(self.all_near_fars[vid])
268
+ intrinsics.append(self.all_intrinsics[vid])
269
+
270
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
271
+
272
+
273
+ # ! estimate scale_mat
274
+ scale_mat, scale_factor = self.cal_scale_mat(
275
+ img_hw=[img_wh[1], img_wh[0]],
276
+ intrinsics=intrinsics, extrinsics=w2cs,
277
+ near_fars=near_fars, factor=1.1
278
+ )
279
+
280
+
281
+ new_near_fars = []
282
+ new_w2cs = []
283
+ new_c2ws = []
284
+ new_affine_mats = []
285
+ new_depths_h = []
286
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
287
+
288
+ P = intrinsic @ extrinsic @ scale_mat
289
+ P = P[:3, :4]
290
+ # - should use load_K_Rt_from_P() to obtain c2w
291
+ c2w = load_K_Rt_from_P(None, P)[1]
292
+ w2c = np.linalg.inv(c2w)
293
+ new_w2cs.append(w2c)
294
+ new_c2ws.append(c2w)
295
+ affine_mat = np.eye(4)
296
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
297
+ new_affine_mats.append(affine_mat)
298
+
299
+ camera_o = c2w[:3, 3]
300
+ dist = np.sqrt(np.sum(camera_o ** 2))
301
+ near = dist - 1
302
+ far = dist + 1
303
+
304
+ new_near_fars.append([0.95 * near, 1.05 * far])
305
+ new_depths_h.append(depth * scale_factor)
306
+
307
+ # print(new_near_fars)
308
+ imgs = torch.stack(imgs).float()
309
+ depths_h = np.stack(new_depths_h)
310
+ masks_h = np.stack(masks_h)
311
+
312
+ affine_mats = np.stack(new_affine_mats)
313
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
314
+ new_near_fars)
315
+
316
+ if self.split == 'train':
317
+ start_idx = 0
318
+ else:
319
+ start_idx = 1
320
+
321
+ view_ids = [idx] + list(src_views)
322
+ sample['origin_idx'] = origin_idx
323
+ sample['images'] = imgs # (V, 3, H, W)
324
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
325
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
326
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
327
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
328
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
329
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
330
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
331
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
332
+
333
+ # sample['light_idx'] = torch.tensor(light_idx)
334
+ sample['scan'] = folder_id
335
+
336
+ sample['scale_factor'] = torch.tensor(scale_factor)
337
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
338
+ sample['render_img_idx'] = torch.tensor(image_perm)
339
+ sample['partial_vol_origin'] = self.partial_vol_origin
340
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
341
+
342
+
343
+ # - image to render
344
+ sample['query_image'] = sample['images'][0]
345
+ sample['query_c2w'] = sample['c2ws'][0]
346
+ sample['query_w2c'] = sample['w2cs'][0]
347
+ sample['query_intrinsic'] = sample['intrinsics'][0]
348
+ sample['query_depth'] = sample['depths_h'][0]
349
+ sample['query_mask'] = sample['masks_h'][0]
350
+ sample['query_near_far'] = sample['near_fars'][0]
351
+
352
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
353
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
354
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
355
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
356
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
357
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
358
+ sample['view_ids'] = sample['view_ids'][start_idx:]
359
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
360
+
361
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
362
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
363
+
364
+ # - generate rays
365
+ if ('val' in self.split) or ('test' in self.split):
366
+ sample_rays = gen_rays_from_single_image(
367
+ img_wh[1], img_wh[0],
368
+ sample['query_image'],
369
+ sample['query_intrinsic'],
370
+ sample['query_c2w'],
371
+ depth=sample['query_depth'],
372
+ mask=sample['query_mask'] if self.clean_image else None)
373
+ else:
374
+ sample_rays = gen_random_rays_from_single_image(
375
+ img_wh[1], img_wh[0],
376
+ self.N_rays,
377
+ sample['query_image'],
378
+ sample['query_intrinsic'],
379
+ sample['query_c2w'],
380
+ depth=sample['query_depth'],
381
+ mask=sample['query_mask'] if self.clean_image else None,
382
+ dilated_mask=mask_dilated,
383
+ importance_sample=self.importance_sample)
384
+
385
+
386
+ sample['rays'] = sample_rays
387
+
388
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4.py ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+ self.near_far[1] = 1.8
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ pass
155
+
156
+ def read_mask(self, filename):
157
+ mask_h = cv2.imread(filename, 0)
158
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ mask[mask > 0] = 1 # the masks stored in png are not binary
164
+ mask_h[mask_h > 0] = 1
165
+
166
+ return mask, mask_h
167
+
168
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
+
170
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
+ # print("center", center)
172
+ # print("radius", radius)
173
+ # print("bounds", bounds)
174
+ # import ipdb; ipdb.set_trace()
175
+ radius = radius * factor
176
+ scale_mat = np.diag([radius, radius, radius, 1.0])
177
+ scale_mat[:3, 3] = center.cpu().numpy()
178
+ scale_mat = scale_mat.astype(np.float32)
179
+
180
+ return scale_mat, 1. / radius.cpu().numpy()
181
+
182
+ def __len__(self):
183
+ return 4*len(self.lvis_paths)
184
+
185
+
186
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
+ pass
188
+
189
+
190
+ def __getitem__(self, idx):
191
+ idx = idx * 2
192
+ sample = {}
193
+ origin_idx = idx
194
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
195
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
196
+
197
+
198
+ folder_uid_dict = self.lvis_paths[idx//8]
199
+ idx = idx % 8 # [0, 7]
200
+ folder_id = folder_uid_dict['folder_id']
201
+ uid = folder_uid_dict['uid']
202
+
203
+
204
+ # target view
205
+ c2w = self.c2ws[idx]
206
+ w2c = np.linalg.inv(c2w)
207
+ w2c_ref = w2c
208
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
209
+
210
+ w2cs.append(w2c @ w2c_ref_inv)
211
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
212
+
213
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
214
+
215
+ depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
216
+
217
+
218
+ img = Image.open(img_filename)
219
+
220
+ img = self.transform(img) # (4, h, w)
221
+
222
+
223
+ if img.shape[0] == 4:
224
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
225
+ imgs += [img]
226
+
227
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
228
+ mask_h = depth_h > 0
229
+ # print("valid pixels", np.sum(mask_h))
230
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
231
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
232
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
233
+ depth_h = distance
234
+
235
+
236
+ depths_h.append(depth_h)
237
+ masks_h.append(mask_h)
238
+
239
+ intrinsic = self.intrinsic
240
+ intrinsics.append(intrinsic)
241
+
242
+
243
+ near_fars.append(self.near_fars[idx])
244
+ image_perm = 0 # only supervised on reference view
245
+
246
+ mask_dilated = None
247
+
248
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
249
+ src_views = range(8, 8 + 8 * 4)
250
+
251
+ for vid in src_views:
252
+ if (vid // 4) % 2 != 0:
253
+ continue
254
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
255
+
256
+ img = Image.open(img_filename)
257
+ img_wh = self.img_wh
258
+
259
+ img = self.transform(img)
260
+ if img.shape[0] == 4:
261
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
262
+
263
+ imgs += [img]
264
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
265
+ depths_h.append(depth_h)
266
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
267
+
268
+ near_fars.append(self.all_near_fars[vid])
269
+ intrinsics.append(self.all_intrinsics[vid])
270
+
271
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
272
+
273
+ # print("len(imgs)", len(imgs))
274
+ # ! estimate scale_mat
275
+ scale_mat, scale_factor = self.cal_scale_mat(
276
+ img_hw=[img_wh[1], img_wh[0]],
277
+ intrinsics=intrinsics, extrinsics=w2cs,
278
+ near_fars=near_fars, factor=1.1
279
+ )
280
+
281
+
282
+ new_near_fars = []
283
+ new_w2cs = []
284
+ new_c2ws = []
285
+ new_affine_mats = []
286
+ new_depths_h = []
287
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
288
+
289
+ P = intrinsic @ extrinsic @ scale_mat
290
+ P = P[:3, :4]
291
+ # - should use load_K_Rt_from_P() to obtain c2w
292
+ c2w = load_K_Rt_from_P(None, P)[1]
293
+ w2c = np.linalg.inv(c2w)
294
+ new_w2cs.append(w2c)
295
+ new_c2ws.append(c2w)
296
+ affine_mat = np.eye(4)
297
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
298
+ new_affine_mats.append(affine_mat)
299
+
300
+ camera_o = c2w[:3, 3]
301
+ dist = np.sqrt(np.sum(camera_o ** 2))
302
+ near = dist - 1
303
+ far = dist + 1
304
+
305
+ new_near_fars.append([0.95 * near, 1.05 * far])
306
+ new_depths_h.append(depth * scale_factor)
307
+
308
+ # print(new_near_fars)
309
+ imgs = torch.stack(imgs).float()
310
+ depths_h = np.stack(new_depths_h)
311
+ masks_h = np.stack(masks_h)
312
+
313
+ affine_mats = np.stack(new_affine_mats)
314
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
315
+ new_near_fars)
316
+
317
+ if self.split == 'train':
318
+ start_idx = 0
319
+ else:
320
+ start_idx = 1
321
+
322
+ view_ids = [idx] + list(src_views)
323
+ sample['origin_idx'] = origin_idx
324
+ sample['images'] = imgs # (V, 3, H, W)
325
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
326
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
327
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
328
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
329
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
330
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
331
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
332
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
333
+
334
+ # sample['light_idx'] = torch.tensor(light_idx)
335
+ sample['scan'] = folder_id
336
+
337
+ sample['scale_factor'] = torch.tensor(scale_factor)
338
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
339
+ sample['render_img_idx'] = torch.tensor(image_perm)
340
+ sample['partial_vol_origin'] = self.partial_vol_origin
341
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
342
+
343
+
344
+ # - image to render
345
+ sample['query_image'] = sample['images'][0]
346
+ sample['query_c2w'] = sample['c2ws'][0]
347
+ sample['query_w2c'] = sample['w2cs'][0]
348
+ sample['query_intrinsic'] = sample['intrinsics'][0]
349
+ sample['query_depth'] = sample['depths_h'][0]
350
+ sample['query_mask'] = sample['masks_h'][0]
351
+ sample['query_near_far'] = sample['near_fars'][0]
352
+
353
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
354
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
355
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
356
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
357
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
358
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
359
+ sample['view_ids'] = sample['view_ids'][start_idx:]
360
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
361
+
362
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
363
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
364
+
365
+ # - generate rays
366
+ if ('val' in self.split) or ('test' in self.split):
367
+ sample_rays = gen_rays_from_single_image(
368
+ img_wh[1], img_wh[0],
369
+ sample['query_image'],
370
+ sample['query_intrinsic'],
371
+ sample['query_c2w'],
372
+ depth=sample['query_depth'],
373
+ mask=sample['query_mask'] if self.clean_image else None)
374
+ else:
375
+ sample_rays = gen_random_rays_from_single_image(
376
+ img_wh[1], img_wh[0],
377
+ self.N_rays,
378
+ sample['query_image'],
379
+ sample['query_intrinsic'],
380
+ sample['query_c2w'],
381
+ depth=sample['query_depth'],
382
+ mask=sample['query_mask'] if self.clean_image else None,
383
+ dilated_mask=mask_dilated,
384
+ importance_sample=self.importance_sample)
385
+
386
+
387
+ sample['rays'] = sample_rays
388
+
389
+ return sample
SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4_and_4.py ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ def load_K_Rt_from_P(filename, P=None):
38
+ if P is None:
39
+ lines = open(filename).read().splitlines()
40
+ if len(lines) == 4:
41
+ lines = lines[1:]
42
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
43
+ P = np.asarray(lines).astype(np.float32).squeeze()
44
+
45
+ out = cv2.decomposeProjectionMatrix(P)
46
+ K = out[0]
47
+ R = out[1]
48
+ t = out[2]
49
+
50
+ K = K / K[2, 2]
51
+ intrinsics = np.eye(4)
52
+ intrinsics[:3, :3] = K
53
+
54
+ pose = np.eye(4, dtype=np.float32)
55
+ pose[:3, :3] = R.transpose() # ? why need transpose here
56
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
57
+
58
+ return intrinsics, pose # ! return cam2world matrix here
59
+
60
+
61
+ # ! load one ref-image with multiple src-images in camera coordinate system
62
+ class BlenderPerView(Dataset):
63
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
64
+ split_filepath=None, pair_filepath=None,
65
+ N_rays=512,
66
+ vol_dims=[128, 128, 128], batch_size=1,
67
+ clean_image=False, importance_sample=False, test_ref_views=[]):
68
+
69
+ # print("root_dir: ", root_dir)
70
+ self.root_dir = root_dir
71
+ self.split = split
72
+
73
+ self.n_views = n_views
74
+ self.N_rays = N_rays
75
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
76
+
77
+ self.clean_image = clean_image
78
+ self.importance_sample = importance_sample
79
+ self.test_ref_views = test_ref_views # used for testing
80
+ self.scale_factor = 1.0
81
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
82
+
83
+ lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
84
+ with open(lvis_json_path, 'r') as f:
85
+ lvis_paths = json.load(f)
86
+ if self.split == 'train':
87
+ self.lvis_paths = lvis_paths['train']
88
+ else:
89
+ self.lvis_paths = lvis_paths['val']
90
+ if img_wh is not None:
91
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
92
+ 'img_wh must both be multiples of 32!'
93
+
94
+
95
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
96
+ with open(pose_json_path, 'r') as f:
97
+ meta = json.load(f)
98
+
99
+ self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
100
+ self.img_wh = (256, 256)
101
+ self.input_poses = np.array(list(meta["c2ws"].values()))
102
+ intrinsic = np.eye(4)
103
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
104
+ self.intrinsic = intrinsic
105
+ self.near_far = np.array(meta["near_far"])
106
+ self.near_far[1] = 1.8
107
+ self.define_transforms()
108
+ self.blender2opencv = np.array(
109
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
110
+ )
111
+
112
+
113
+ self.c2ws = []
114
+ self.w2cs = []
115
+ self.near_fars = []
116
+ # self.root_dir = root_dir
117
+ for idx, img_id in enumerate(self.img_ids):
118
+ pose = self.input_poses[idx]
119
+ c2w = pose @ self.blender2opencv
120
+ self.c2ws.append(c2w)
121
+ self.w2cs.append(np.linalg.inv(c2w))
122
+ self.near_fars.append(self.near_far)
123
+ self.c2ws = np.stack(self.c2ws, axis=0)
124
+ self.w2cs = np.stack(self.w2cs, axis=0)
125
+
126
+
127
+ self.all_intrinsics = [] # the cam info of the whole scene
128
+ self.all_extrinsics = []
129
+ self.all_near_fars = []
130
+ self.load_cam_info()
131
+
132
+ # * bounding box for rendering
133
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
134
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
135
+
136
+ # - used for cost volume regularization
137
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
138
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
139
+
140
+
141
+ def define_transforms(self):
142
+ self.transform = T.Compose([T.ToTensor()])
143
+
144
+
145
+
146
+ def load_cam_info(self):
147
+ for vid, img_id in enumerate(self.img_ids):
148
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
149
+ self.all_intrinsics.append(intrinsic)
150
+ self.all_extrinsics.append(extrinsic)
151
+ self.all_near_fars.append(near_far)
152
+
153
+ def read_depth(self, filename):
154
+ pass
155
+
156
+ def read_mask(self, filename):
157
+ mask_h = cv2.imread(filename, 0)
158
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
159
+ interpolation=cv2.INTER_NEAREST)
160
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
161
+ interpolation=cv2.INTER_NEAREST)
162
+
163
+ mask[mask > 0] = 1 # the masks stored in png are not binary
164
+ mask_h[mask_h > 0] = 1
165
+
166
+ return mask, mask_h
167
+
168
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
169
+
170
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
171
+ # print("center", center)
172
+ # print("radius", radius)
173
+ # print("bounds", bounds)
174
+ # import ipdb; ipdb.set_trace()
175
+ radius = radius * factor
176
+ scale_mat = np.diag([radius, radius, radius, 1.0])
177
+ scale_mat[:3, 3] = center.cpu().numpy()
178
+ scale_mat = scale_mat.astype(np.float32)
179
+
180
+ return scale_mat, 1. / radius.cpu().numpy()
181
+
182
+ def __len__(self):
183
+ return 8*len(self.lvis_paths)
184
+
185
+
186
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
187
+ pass
188
+
189
+
190
+ def __getitem__(self, idx):
191
+ idx = idx
192
+ sample = {}
193
+ origin_idx = idx
194
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
195
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
196
+
197
+
198
+ folder_uid_dict = self.lvis_paths[idx//8]
199
+ idx = idx % 8 # [0, 7]
200
+ folder_id = folder_uid_dict['folder_id']
201
+ uid = folder_uid_dict['uid']
202
+
203
+
204
+ # target view
205
+ c2w = self.c2ws[idx]
206
+ w2c = np.linalg.inv(c2w)
207
+ w2c_ref = w2c
208
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
209
+
210
+ w2cs.append(w2c @ w2c_ref_inv)
211
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
212
+
213
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
214
+
215
+ depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
216
+
217
+
218
+ img = Image.open(img_filename)
219
+
220
+ img = self.transform(img) # (4, h, w)
221
+
222
+
223
+ if img.shape[0] == 4:
224
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
225
+ imgs += [img]
226
+
227
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
228
+ mask_h = depth_h > 0
229
+ # print("valid pixels", np.sum(mask_h))
230
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
231
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
232
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
233
+ depth_h = distance
234
+
235
+
236
+ depths_h.append(depth_h)
237
+ masks_h.append(mask_h)
238
+
239
+ intrinsic = self.intrinsic
240
+ intrinsics.append(intrinsic)
241
+
242
+
243
+ near_fars.append(self.near_fars[idx])
244
+ image_perm = 0 # only supervised on reference view
245
+
246
+ mask_dilated = None
247
+
248
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
249
+
250
+ src_views = range(8, 8 + 8 * 4)
251
+
252
+ vid_list = []
253
+ for vid in src_views:
254
+ if (vid // 4) % 2 != idx % 2:
255
+ continue
256
+ vid_list.append(vid)
257
+ img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
258
+
259
+ img = Image.open(img_filename)
260
+ img_wh = self.img_wh
261
+
262
+ img = self.transform(img)
263
+ if img.shape[0] == 4:
264
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
265
+
266
+ imgs += [img]
267
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
268
+ depths_h.append(depth_h)
269
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
270
+
271
+ near_fars.append(self.all_near_fars[vid])
272
+ intrinsics.append(self.all_intrinsics[vid])
273
+
274
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
275
+
276
+
277
+ # print("idx:", idx)
278
+ # print("len(imgs)", len(imgs))
279
+ # print("vid_list", vid_list)
280
+ # ! estimate scale_mat
281
+ scale_mat, scale_factor = self.cal_scale_mat(
282
+ img_hw=[img_wh[1], img_wh[0]],
283
+ intrinsics=intrinsics, extrinsics=w2cs,
284
+ near_fars=near_fars, factor=1.1
285
+ )
286
+
287
+
288
+ new_near_fars = []
289
+ new_w2cs = []
290
+ new_c2ws = []
291
+ new_affine_mats = []
292
+ new_depths_h = []
293
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
294
+
295
+ P = intrinsic @ extrinsic @ scale_mat
296
+ P = P[:3, :4]
297
+ # - should use load_K_Rt_from_P() to obtain c2w
298
+ c2w = load_K_Rt_from_P(None, P)[1]
299
+ w2c = np.linalg.inv(c2w)
300
+ new_w2cs.append(w2c)
301
+ new_c2ws.append(c2w)
302
+ affine_mat = np.eye(4)
303
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
304
+ new_affine_mats.append(affine_mat)
305
+
306
+ camera_o = c2w[:3, 3]
307
+ dist = np.sqrt(np.sum(camera_o ** 2))
308
+ near = dist - 1
309
+ far = dist + 1
310
+
311
+ new_near_fars.append([0.95 * near, 1.05 * far])
312
+ new_depths_h.append(depth * scale_factor)
313
+
314
+ # print(new_near_fars)
315
+ imgs = torch.stack(imgs).float()
316
+ depths_h = np.stack(new_depths_h)
317
+ masks_h = np.stack(masks_h)
318
+
319
+ affine_mats = np.stack(new_affine_mats)
320
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
321
+ new_near_fars)
322
+
323
+ if self.split == 'train':
324
+ start_idx = 0
325
+ else:
326
+ start_idx = 1
327
+
328
+ view_ids = [idx] + list(src_views)
329
+ sample['origin_idx'] = origin_idx
330
+ sample['images'] = imgs # (V, 3, H, W)
331
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
332
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
333
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
334
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
335
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
336
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
337
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
338
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
339
+
340
+ # sample['light_idx'] = torch.tensor(light_idx)
341
+ sample['scan'] = folder_id
342
+
343
+ sample['scale_factor'] = torch.tensor(scale_factor)
344
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
345
+ sample['render_img_idx'] = torch.tensor(image_perm)
346
+ sample['partial_vol_origin'] = self.partial_vol_origin
347
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
348
+
349
+
350
+ # - image to render
351
+ sample['query_image'] = sample['images'][0]
352
+ sample['query_c2w'] = sample['c2ws'][0]
353
+ sample['query_w2c'] = sample['w2cs'][0]
354
+ sample['query_intrinsic'] = sample['intrinsics'][0]
355
+ sample['query_depth'] = sample['depths_h'][0]
356
+ sample['query_mask'] = sample['masks_h'][0]
357
+ sample['query_near_far'] = sample['near_fars'][0]
358
+
359
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
360
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
361
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
362
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
363
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
364
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
365
+ sample['view_ids'] = sample['view_ids'][start_idx:]
366
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
367
+
368
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
369
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
370
+
371
+ # - generate rays
372
+ if ('val' in self.split) or ('test' in self.split):
373
+ sample_rays = gen_rays_from_single_image(
374
+ img_wh[1], img_wh[0],
375
+ sample['query_image'],
376
+ sample['query_intrinsic'],
377
+ sample['query_c2w'],
378
+ depth=sample['query_depth'],
379
+ mask=sample['query_mask'] if self.clean_image else None)
380
+ else:
381
+ sample_rays = gen_random_rays_from_single_image(
382
+ img_wh[1], img_wh[0],
383
+ self.N_rays,
384
+ sample['query_image'],
385
+ sample['query_intrinsic'],
386
+ sample['query_c2w'],
387
+ depth=sample['query_depth'],
388
+ mask=sample['query_mask'] if self.clean_image else None,
389
+ dilated_mask=mask_dilated,
390
+ importance_sample=self.importance_sample)
391
+
392
+
393
+ sample['rays'] = sample_rays
394
+
395
+ return sample
SparseNeuS_demo_v1/data/blender_gt_32.py ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+ import json
13
+ from termcolor import colored
14
+ import imageio
15
+ from kornia import create_meshgrid
16
+ import open3d as o3d
17
+ def get_ray_directions(H, W, focal, center=None):
18
+ """
19
+ Get ray directions for all pixels in camera coordinate.
20
+ Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
21
+ ray-tracing-generating-camera-rays/standard-coordinate-systems
22
+ Inputs:
23
+ H, W, focal: image height, width and focal length
24
+ Outputs:
25
+ directions: (H, W, 3), the direction of the rays in camera coordinate
26
+ """
27
+ grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
28
+
29
+ i, j = grid.unbind(-1)
30
+ # the direction here is without +0.5 pixel centering as calibration is not so accurate
31
+ # see https://github.com/bmild/nerf/issues/24
32
+ cent = center if center is not None else [W / 2, H / 2]
33
+ directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
34
+
35
+ return directions
36
+
37
+ import os, json
38
+ import numpy as np
39
+ def calc_pose(phis, thetas, size, radius = 1.2):
40
+ import torch
41
+ def normalize(vectors):
42
+ return vectors / (torch.norm(vectors, dim=-1, keepdim=True) + 1e-10)
43
+ # device = torch.device('cuda')
44
+ thetas = torch.FloatTensor(thetas)
45
+ phis = torch.FloatTensor(phis)
46
+
47
+ centers = torch.stack([
48
+ radius * torch.sin(thetas) * torch.sin(phis),
49
+ -radius * torch.cos(thetas) * torch.sin(phis),
50
+ radius * torch.cos(phis),
51
+ ], dim=-1) # [B, 3]
52
+
53
+ # lookat
54
+ forward_vector = normalize(centers).squeeze(0)
55
+ up_vector = torch.FloatTensor([0, 0, 1]).unsqueeze(0).repeat(size, 1)
56
+ right_vector = normalize(torch.cross(up_vector, forward_vector, dim=-1))
57
+ if right_vector.pow(2).sum() < 0.01:
58
+ right_vector = torch.FloatTensor([0, 1, 0]).unsqueeze(0).repeat(size, 1)
59
+ up_vector = normalize(torch.cross(forward_vector, right_vector, dim=-1))
60
+
61
+ poses = torch.eye(4, dtype=torch.float)[:3].unsqueeze(0).repeat(size, 1, 1)
62
+ poses[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), dim=-1)
63
+ poses[:, :3, 3] = centers
64
+ return poses
65
+
66
+ def load_K_Rt_from_P(filename, P=None):
67
+ if P is None:
68
+ lines = open(filename).read().splitlines()
69
+ if len(lines) == 4:
70
+ lines = lines[1:]
71
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
72
+ P = np.asarray(lines).astype(np.float32).squeeze()
73
+
74
+ out = cv2.decomposeProjectionMatrix(P)
75
+ K = out[0]
76
+ R = out[1]
77
+ t = out[2]
78
+
79
+ K = K / K[2, 2]
80
+ intrinsics = np.eye(4)
81
+ intrinsics[:3, :3] = K
82
+
83
+ pose = np.eye(4, dtype=np.float32)
84
+ pose[:3, :3] = R.transpose() # ? why need transpose here
85
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
86
+
87
+ return intrinsics, pose # ! return cam2world matrix here
88
+
89
+
90
+ # ! load one ref-image with multiple src-images in camera coordinate system
91
+ class BlenderPerView(Dataset):
92
+ def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
93
+ split_filepath=None, pair_filepath=None,
94
+ N_rays=512,
95
+ vol_dims=[128, 128, 128], batch_size=1,
96
+ clean_image=False, importance_sample=False, test_ref_views=[]):
97
+
98
+ # print("root_dir: ", root_dir)
99
+ self.root_dir = root_dir
100
+ self.split = split
101
+
102
+ self.n_views = n_views
103
+ self.N_rays = N_rays
104
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
105
+
106
+ self.clean_image = clean_image
107
+ self.importance_sample = importance_sample
108
+ self.test_ref_views = test_ref_views # used for testing
109
+ self.scale_factor = 1.0
110
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
111
+
112
+ lvis_json_path = '/objaverse-processed/zero12345_img/random32_split.json' # folder_id and uid
113
+ with open(lvis_json_path, 'r') as f:
114
+ lvis_paths = json.load(f)
115
+ if self.split == 'train':
116
+ self.lvis_paths = lvis_paths['train']
117
+ else:
118
+ self.lvis_paths = lvis_paths['val']
119
+ if img_wh is not None:
120
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
121
+ 'img_wh must both be multiples of 32!'
122
+
123
+ pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
124
+
125
+ with open(pose_json_path, 'r') as f:
126
+ meta = json.load(f)
127
+ intrinsic = np.eye(4)
128
+ intrinsic[:3, :3] = np.array(meta["intrinsics"])
129
+ self.intrinsic = intrinsic
130
+ self.near_far = np.array(meta["near_far"])
131
+ self.near_far[1] = 1.8
132
+
133
+ # * bounding box for rendering
134
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
135
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
136
+
137
+ # - used for cost volume regularization
138
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
139
+ self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
140
+
141
+
142
+ def define_transforms(self):
143
+ self.transform = T.Compose([T.ToTensor()])
144
+
145
+
146
+
147
+ def load_cam_info(self):
148
+ for vid in range(self.input_poses.shape[0]):
149
+ intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
150
+ self.all_intrinsics.append(intrinsic)
151
+ self.all_extrinsics.append(extrinsic)
152
+ self.all_near_fars.append(near_far)
153
+
154
+ def read_depth(self, filename):
155
+ pass
156
+
157
+ def read_mask(self, filename):
158
+ mask_h = cv2.imread(filename, 0)
159
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
160
+ interpolation=cv2.INTER_NEAREST)
161
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
162
+ interpolation=cv2.INTER_NEAREST)
163
+
164
+ mask[mask > 0] = 1 # the masks stored in png are not binary
165
+ mask_h[mask_h > 0] = 1
166
+
167
+ return mask, mask_h
168
+
169
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
170
+
171
+ center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
172
+ # print("center", center)
173
+ # print("radius", radius)
174
+ # print("bounds", bounds)
175
+ # import ipdb; ipdb.set_trace()
176
+ radius = radius * factor
177
+ scale_mat = np.diag([radius, radius, radius, 1.0])
178
+ scale_mat[:3, 3] = center.cpu().numpy()
179
+ scale_mat = scale_mat.astype(np.float32)
180
+
181
+ return scale_mat, 1. / radius.cpu().numpy()
182
+
183
+ def __len__(self):
184
+ return 32*len(self.lvis_paths)
185
+
186
+
187
+ def read_depth(self, filename, near_bound, noisy_factor=1.0):
188
+ pass
189
+
190
+
191
+ def __getitem__(self, idx):
192
+ sample = {}
193
+ origin_idx = idx
194
+ imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
195
+ intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
196
+
197
+
198
+ folder_uid_dict = self.lvis_paths[idx//32]
199
+ idx = idx % 32 # [0, 7]
200
+ folder_id = folder_uid_dict['folder_id']
201
+ uid = folder_uid_dict['uid']
202
+
203
+ pose_file = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, 'views.npz')
204
+ pose_array = np.load(pose_file)
205
+ pose = calc_pose(pose_array['elevations'], pose_array['azimuths'], 32) # [32, 3, 4] c2ws
206
+
207
+ self.img_wh = (256, 256)
208
+ self.input_poses = np.array(pose)
209
+ self.input_poses = np.concatenate([self.input_poses, np.tile(np.array([0, 0, 0, 1], dtype=np.float32)[None, None, :], [self.input_poses.shape[0], 1, 1])], axis=1)
210
+ self.define_transforms()
211
+ self.blender2opencv = np.array(
212
+ [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
213
+ )
214
+
215
+ self.c2ws = []
216
+ self.w2cs = []
217
+ self.near_fars = []
218
+ # self.root_dir = root_dir
219
+ for image_dix in range(pose.shape[0]):
220
+ pose = self.input_poses[image_dix]
221
+ c2w = pose @ self.blender2opencv
222
+ self.c2ws.append(c2w)
223
+ self.w2cs.append(np.linalg.inv(c2w))
224
+ self.near_fars.append(self.near_far)
225
+ self.c2ws = np.stack(self.c2ws, axis=0)
226
+ self.w2cs = np.stack(self.w2cs, axis=0)
227
+
228
+
229
+ self.all_intrinsics = [] # the cam info of the whole scene
230
+ self.all_extrinsics = []
231
+ self.all_near_fars = []
232
+ self.load_cam_info()
233
+
234
+
235
+
236
+ # target view
237
+ c2w = self.c2ws[idx]
238
+ w2c = np.linalg.inv(c2w)
239
+ w2c_ref = w2c
240
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
241
+
242
+ w2cs.append(w2c @ w2c_ref_inv)
243
+ c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
244
+
245
+ img_filename = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{idx}.png')
246
+
247
+ depth_filename = os.path.join(os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{idx}_depth_mm.png'))
248
+
249
+
250
+ img = Image.open(img_filename)
251
+
252
+ img = self.transform(img) # (4, h, w)
253
+
254
+
255
+ if img.shape[0] == 4:
256
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
257
+ imgs += [img]
258
+
259
+ depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
260
+ mask_h = depth_h > 0
261
+
262
+ directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
263
+ surface_points = directions * depth_h[..., None] # [H, W, 3]
264
+ distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
265
+ depth_h = distance
266
+
267
+
268
+ depths_h.append(depth_h)
269
+ masks_h.append(mask_h)
270
+
271
+ intrinsic = self.intrinsic
272
+ intrinsics.append(intrinsic)
273
+
274
+
275
+ near_fars.append(self.near_fars[idx])
276
+ image_perm = 0 # only supervised on reference view
277
+
278
+ mask_dilated = None
279
+
280
+ # src_views = range(8+idx*4, 8+(idx+1)*4)
281
+ src_views = range(0, 8 * 4)
282
+
283
+ for vid in src_views:
284
+ img_filename = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{vid}.png')
285
+
286
+ img = Image.open(img_filename)
287
+ img_wh = self.img_wh
288
+
289
+ img = self.transform(img)
290
+ if img.shape[0] == 4:
291
+ img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
292
+
293
+ imgs += [img]
294
+ depth_h = np.ones(img.shape[1:], dtype=np.float32)
295
+ depths_h.append(depth_h)
296
+ masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
297
+
298
+ near_fars.append(self.all_near_fars[vid])
299
+ intrinsics.append(self.all_intrinsics[vid])
300
+
301
+ w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
302
+
303
+
304
+ # ! estimate scale_mat
305
+ scale_mat, scale_factor = self.cal_scale_mat(
306
+ img_hw=[img_wh[1], img_wh[0]],
307
+ intrinsics=intrinsics, extrinsics=w2cs,
308
+ near_fars=near_fars, factor=1.1
309
+ )
310
+
311
+
312
+ new_near_fars = []
313
+ new_w2cs = []
314
+ new_c2ws = []
315
+ new_affine_mats = []
316
+ new_depths_h = []
317
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
318
+
319
+ P = intrinsic @ extrinsic @ scale_mat
320
+ P = P[:3, :4]
321
+ # - should use load_K_Rt_from_P() to obtain c2w
322
+ c2w = load_K_Rt_from_P(None, P)[1]
323
+ w2c = np.linalg.inv(c2w)
324
+ new_w2cs.append(w2c)
325
+ new_c2ws.append(c2w)
326
+ affine_mat = np.eye(4)
327
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
328
+ new_affine_mats.append(affine_mat)
329
+
330
+ camera_o = c2w[:3, 3]
331
+ dist = np.sqrt(np.sum(camera_o ** 2))
332
+ near = dist - 1
333
+ far = dist + 1
334
+
335
+ new_near_fars.append([0.95 * near, 1.05 * far])
336
+ new_depths_h.append(depth * scale_factor)
337
+
338
+ # print(new_near_fars)
339
+ imgs = torch.stack(imgs).float()
340
+ depths_h = np.stack(new_depths_h)
341
+ masks_h = np.stack(masks_h)
342
+
343
+ affine_mats = np.stack(new_affine_mats)
344
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
345
+ new_near_fars)
346
+
347
+ if self.split == 'train':
348
+ start_idx = 0
349
+ else:
350
+ start_idx = 1
351
+
352
+ view_ids = [idx] + list(src_views)
353
+ sample['origin_idx'] = origin_idx
354
+ sample['images'] = imgs # (V, 3, H, W)
355
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
356
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
357
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
358
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
359
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
360
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
361
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
362
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
363
+
364
+ # sample['light_idx'] = torch.tensor(light_idx)
365
+ sample['scan'] = folder_id
366
+
367
+ sample['scale_factor'] = torch.tensor(scale_factor)
368
+ sample['img_wh'] = torch.from_numpy(np.array(img_wh))
369
+ sample['render_img_idx'] = torch.tensor(image_perm)
370
+ sample['partial_vol_origin'] = self.partial_vol_origin
371
+ sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
372
+
373
+
374
+ # - image to render
375
+ sample['query_image'] = sample['images'][0]
376
+ sample['query_c2w'] = sample['c2ws'][0]
377
+ sample['query_w2c'] = sample['w2cs'][0]
378
+ sample['query_intrinsic'] = sample['intrinsics'][0]
379
+ sample['query_depth'] = sample['depths_h'][0]
380
+ sample['query_mask'] = sample['masks_h'][0]
381
+ sample['query_near_far'] = sample['near_fars'][0]
382
+
383
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
384
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
385
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
386
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
387
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
388
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
389
+ sample['view_ids'] = sample['view_ids'][start_idx:]
390
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
391
+
392
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
393
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
394
+
395
+ # - generate rays
396
+ if ('val' in self.split) or ('test' in self.split):
397
+ sample_rays = gen_rays_from_single_image(
398
+ img_wh[1], img_wh[0],
399
+ sample['query_image'],
400
+ sample['query_intrinsic'],
401
+ sample['query_c2w'],
402
+ depth=sample['query_depth'],
403
+ mask=sample['query_mask'] if self.clean_image else None)
404
+ else:
405
+ sample_rays = gen_random_rays_from_single_image(
406
+ img_wh[1], img_wh[0],
407
+ self.N_rays,
408
+ sample['query_image'],
409
+ sample['query_intrinsic'],
410
+ sample['query_c2w'],
411
+ depth=sample['query_depth'],
412
+ mask=sample['query_mask'] if self.clean_image else None,
413
+ dilated_mask=mask_dilated,
414
+ importance_sample=self.importance_sample)
415
+
416
+
417
+ sample['rays'] = sample_rays
418
+
419
+ return sample
SparseNeuS_demo_v1/data/dtu/dtu_pairs.txt ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 46
2
+ 0
3
+ 10 10 2346.410000 1 2036.530000 9 1243.890000 12 1052.870000 11 1000.840000 13 703.583000 2 604.456000 8 439.759000 14 327.419000 27 249.278000
4
+ 1
5
+ 10 9 2850.870000 10 2583.940000 2 2105.590000 0 2052.840000 8 1868.240000 13 1184.230000 14 1017.510000 12 961.966000 7 670.208000 15 657.218000
6
+ 2
7
+ 10 8 2501.240000 1 2106.880000 7 1856.500000 9 1782.340000 3 1141.770000 15 1061.760000 14 815.457000 16 762.153000 6 709.789000 10 699.921000
8
+ 3
9
+ 10 7 1294.390000 6 1159.130000 2 1134.270000 4 905.717000 8 687.320000 5 600.015000 17 496.958000 16 481.969000 1 379.011000 15 307.450000
10
+ 4
11
+ 10 5 1333.740000 6 1145.150000 3 895.254000 7 486.504000 18 446.420000 2 418.517000 17 326.528000 8 161.115000 16 149.154000 1 103.626000
12
+ 5
13
+ 10 6 1676.060000 18 1555.060000 4 1335.550000 17 868.416000 3 593.755000 7 467.816000 20 440.579000 19 428.255000 16 242.327000 21 210.253000
14
+ 6
15
+ 10 17 2332.350000 7 1848.240000 18 1812.740000 5 1696.070000 16 1273.000000 3 1157.990000 4 1155.410000 20 771.624000 21 744.945000 2 700.368000
16
+ 7
17
+ 10 16 2709.460000 8 2439.700000 15 2078.210000 6 1864.160000 2 1846.600000 17 1791.710000 3 1296.860000 22 957.793000 9 879.088000 21 782.277000
18
+ 8
19
+ 10 15 3124.010000 9 3099.920000 14 2756.290000 2 2501.220000 7 2449.320000 1 1875.940000 16 1726.040000 13 1325.760000 23 1177.090000 24 1108.820000
20
+ 9
21
+ 10 13 3355.620000 14 3226.070000 8 3098.800000 10 3097.070000 1 2861.420000 12 1873.630000 2 1785.980000 15 1753.320000 25 1365.450000 0 1261.590000
22
+ 10
23
+ 10 12 3750.700000 9 3085.870000 13 3028.390000 1 2590.550000 0 2369.790000 11 2266.670000 14 1524.160000 26 1448.150000 27 1293.600000 8 1041.840000
24
+ 11
25
+ 10 12 3543.760000 27 3056.050000 10 2248.070000 26 1524.280000 28 1273.330000 13 1265.900000 29 1129.550000 0 998.164000 9 591.176000 30 572.919000
26
+ 12
27
+ 10 27 3889.870000 10 3754.540000 13 3745.210000 11 3584.260000 26 3574.560000 25 1877.110000 9 1866.340000 29 1482.720000 30 1418.510000 14 1341.860000
28
+ 13
29
+ 10 12 3773.140000 26 3699.280000 25 3657.170000 14 3652.040000 9 3356.290000 10 3049.270000 24 2098.910000 27 1900.960000 31 1460.960000 30 1349.620000
30
+ 14
31
+ 10 13 3663.520000 24 3610.690000 9 3232.550000 25 3216.400000 15 3128.840000 8 2758.040000 23 2219.910000 26 1567.450000 10 1536.600000 32 1419.330000
32
+ 15
33
+ 10 23 3194.920000 14 3126.000000 8 3120.430000 16 2897.020000 24 2562.490000 7 2084.050000 22 2041.630000 9 1752.080000 33 1232.290000 13 1137.550000
34
+ 16
35
+ 10 15 2884.140000 7 2713.880000 22 2708.570000 17 2448.500000 21 2173.300000 23 1908.030000 8 1718.790000 6 1281.960000 35 1047.380000 34 980.064000
36
+ 17
37
+ 10 21 2632.480000 16 2428.000000 6 2343.570000 18 2250.230000 20 2149.750000 7 1779.420000 22 1380.250000 36 957.046000 5 878.398000 15 789.068000
38
+ 18
39
+ 9 17 2219.150000 20 2173.020000 6 1802.390000 19 1575.770000 5 1564.810000 21 1160.130000 16 660.317000 7 589.484000 36 559.983000
40
+ 19
41
+ 7 20 1828.970000 18 1564.630000 17 685.249000 36 613.420000 21 572.770000 5 427.597000 6 368.651000
42
+ 20
43
+ 8 21 2569.790000 36 2258.330000 18 2186.710000 17 2130.670000 19 1865.060000 35 996.122000 16 799.808000 40 778.721000
44
+ 21
45
+ 9 36 2704.590000 35 2639.690000 17 2638.190000 20 2605.430000 22 2604.260000 16 2158.250000 34 1239.250000 18 1178.240000 40 1128.570000
46
+ 22
47
+ 10 23 3232.680000 34 3175.150000 35 2831.090000 16 2712.510000 21 2632.190000 15 2033.390000 33 1712.670000 17 1393.860000 36 1290.960000 24 1195.330000
48
+ 23
49
+ 10 24 3710.900000 33 3603.070000 22 3244.200000 15 3190.620000 34 3086.490000 14 2220.110000 32 2100.000000 16 1917.100000 35 1359.790000 25 1356.710000
50
+ 24
51
+ 10 25 3844.600000 32 3750.750000 23 3710.600000 14 3609.090000 33 3091.040000 15 2559.240000 31 2423.710000 13 2109.360000 26 1440.580000 34 1410.030000
52
+ 25
53
+ 10 26 3951.740000 31 3888.570000 24 3833.070000 13 3667.350000 14 3208.210000 32 2993.460000 30 2681.520000 12 1900.230000 45 1484.030000 27 1462.880000
54
+ 26
55
+ 10 30 4033.350000 27 3970.470000 25 3925.250000 13 3686.340000 12 3595.590000 29 2943.870000 31 2917.000000 14 1556.340000 11 1554.750000 46 1503.840000
56
+ 27
57
+ 10 29 4027.840000 26 3929.940000 12 3875.580000 11 3085.030000 28 2908.600000 30 2792.670000 13 1878.420000 25 1438.550000 47 1425.200000 10 1290.250000
58
+ 28
59
+ 10 29 3687.020000 48 3209.130000 27 2872.860000 47 2014.530000 30 1361.950000 11 1273.600000 26 1062.850000 12 840.841000 46 672.985000 31 271.952000
60
+ 29
61
+ 10 27 4029.430000 30 3909.550000 28 3739.930000 47 3695.230000 48 3135.870000 26 2910.970000 46 2229.550000 12 1479.160000 31 1430.260000 11 1144.560000
62
+ 30
63
+ 10 26 4029.860000 29 3953.720000 31 3811.120000 46 3630.460000 47 3105.960000 27 2824.430000 25 2657.890000 45 2347.750000 32 1459.110000 12 1429.620000
64
+ 31
65
+ 10 25 3882.210000 30 3841.880000 32 3808.500000 45 3649.820000 46 3000.670000 26 2939.940000 24 2409.930000 44 2381.300000 13 1467.590000 29 1459.560000
66
+ 32
67
+ 10 31 3826.500000 24 3744.140000 33 3613.240000 44 3552.040000 25 3004.600000 45 2884.590000 43 2393.340000 23 2095.270000 30 1478.600000 14 1420.780000
68
+ 33
69
+ 10 32 3618.110000 23 3598.100000 34 3530.530000 43 3462.370000 24 3091.530000 44 2608.080000 42 2426.000000 22 1717.940000 31 1407.650000 25 1324.780000
70
+ 34
71
+ 10 33 3523.370000 42 3356.550000 35 3210.340000 22 3178.850000 23 3079.030000 43 2396.450000 41 2386.860000 24 1408.020000 32 1301.340000 21 1256.450000
72
+ 35
73
+ 10 34 3187.880000 41 3106.440000 36 2866.040000 22 2817.740000 21 2654.870000 40 2416.980000 42 2137.810000 23 1346.860000 33 1150.330000 16 1044.660000
74
+ 36
75
+ 8 40 2910.700000 35 2832.660000 21 2689.960000 20 2280.460000 41 1787.970000 22 1268.490000 34 981.636000 17 954.229000
76
+ 40
77
+ 7 36 2918.140000 41 2852.620000 35 2392.960000 21 1124.300000 42 1056.480000 34 877.946000 20 788.701000
78
+ 41
79
+ 9 35 3111.050000 42 3049.710000 40 2885.360000 34 2371.020000 36 1813.690000 43 1164.710000 22 1126.900000 21 906.536000 33 903.238000
80
+ 42
81
+ 10 34 3356.980000 43 3183.000000 41 3070.540000 33 2421.770000 35 2155.080000 44 1278.410000 23 1183.520000 22 1147.070000 40 1077.080000 32 899.646000
82
+ 43
83
+ 10 33 3461.240000 44 3380.740000 42 3188.700000 34 2400.600000 32 2399.090000 45 1359.370000 23 1314.080000 41 1176.120000 24 1159.620000 31 901.556000
84
+ 44
85
+ 10 32 3550.810000 45 3510.160000 43 3373.110000 33 2602.330000 31 2395.930000 24 1410.430000 46 1386.310000 42 1279.000000 25 1095.240000 34 968.440000
86
+ 45
87
+ 10 31 3650.090000 46 3555.090000 44 3491.150000 32 2868.390000 30 2373.590000 25 1485.370000 47 1405.280000 43 1349.540000 33 1104.770000 26 1046.810000
88
+ 46
89
+ 10 30 3635.640000 47 3562.170000 45 3524.170000 31 2976.820000 29 2264.040000 26 1508.870000 44 1367.410000 48 1352.100000 32 1211.240000 25 1102.170000
90
+ 47
91
+ 10 29 3705.310000 46 3519.760000 48 3450.480000 30 3074.770000 28 2054.630000 27 1434.570000 45 1377.340000 31 1268.230000 26 1223.830000 25 471.111000
92
+ 48
93
+ 10 47 3401.950000 28 3224.840000 29 3101.160000 46 1317.100000 30 1306.700000 27 1235.070000 26 537.731000 31 291.919000 45 276.869000 11 258.856000
SparseNeuS_demo_v1/data/dtu/lists/test.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scan24
2
+ scan37
3
+ scan40
4
+ scan55
5
+ scan63
6
+ scan65
7
+ scan69
8
+ scan83
9
+ scan97
10
+ scan105
11
+ scan106
12
+ scan110
13
+ scan114
14
+ scan118
15
+ scan122
SparseNeuS_demo_v1/data/dtu/lists/train.txt ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scan1
2
+ scan4
3
+ scan5
4
+ scan6
5
+ scan8
6
+ scan9
7
+ scan10
8
+ scan11
9
+ scan12
10
+ scan13
11
+ scan14
12
+ scan15
13
+ scan16
14
+ scan17
15
+ scan18
16
+ scan19
17
+ scan20
18
+ scan21
19
+ scan22
20
+ scan23
21
+ scan28
22
+ scan29
23
+ scan30
24
+ scan31
25
+ scan32
26
+ scan33
27
+ scan34
28
+ scan35
29
+ scan36
30
+ scan38
31
+ scan39
32
+ scan41
33
+ scan42
34
+ scan43
35
+ scan44
36
+ scan45
37
+ scan46
38
+ scan47
39
+ scan48
40
+ scan49
41
+ scan50
42
+ scan51
43
+ scan52
44
+ scan59
45
+ scan60
46
+ scan61
47
+ scan62
48
+ scan64
49
+ scan74
50
+ scan75
51
+ scan76
52
+ scan77
53
+ scan84
54
+ scan85
55
+ scan86
56
+ scan87
57
+ scan88
58
+ scan89
59
+ scan90
60
+ scan91
61
+ scan92
62
+ scan93
63
+ scan94
64
+ scan95
65
+ scan96
66
+ scan98
67
+ scan99
68
+ scan100
69
+ scan101
70
+ scan102
71
+ scan103
72
+ scan104
73
+ scan126
74
+ scan127
75
+ scan128
SparseNeuS_demo_v1/data/dtu_fit.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import cv2 as cv
4
+ import numpy as np
5
+ import re
6
+ import os
7
+ import logging
8
+ from glob import glob
9
+
10
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
11
+
12
+ from data.scene import get_boundingbox
13
+
14
+
15
+ def load_K_Rt_from_P(filename, P=None):
16
+ if P is None:
17
+ lines = open(filename).read().splitlines()
18
+ if len(lines) == 4:
19
+ lines = lines[1:]
20
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
21
+ P = np.asarray(lines).astype(np.float32).squeeze()
22
+
23
+ out = cv.decomposeProjectionMatrix(P)
24
+ K = out[0]
25
+ R = out[1]
26
+ t = out[2]
27
+
28
+ K = K / K[2, 2]
29
+ intrinsics = np.eye(4)
30
+ intrinsics[:3, :3] = K
31
+
32
+ pose = np.eye(4, dtype=np.float32)
33
+ pose[:3, :3] = R.transpose() # ? why need transpose here
34
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
35
+
36
+ return intrinsics, pose # ! return cam2world matrix here
37
+
38
+
39
+ class DtuFit:
40
+ def __init__(self, root_dir, split, scan_id, n_views, train_img_idx=[], test_img_idx=[],
41
+ img_wh=[800, 600], clip_wh=[0, 0], original_img_wh=[1600, 1200],
42
+ N_rays=512, h_patch_size=5, near=425, far=900):
43
+ super(DtuFit, self).__init__()
44
+ logging.info('Load data: Begin')
45
+
46
+ self.root_dir = root_dir
47
+ self.split = split
48
+ self.scan_id = scan_id
49
+ self.n_views = n_views
50
+
51
+ self.near = near
52
+ self.far = far
53
+
54
+ if self.scan_id is not None:
55
+ self.data_dir = os.path.join(self.root_dir, self.scan_id)
56
+ else:
57
+ self.data_dir = self.root_dir
58
+
59
+ self.img_wh = img_wh
60
+ self.clip_wh = clip_wh
61
+
62
+ if len(self.clip_wh) == 2:
63
+ self.clip_wh = self.clip_wh + self.clip_wh
64
+
65
+ self.original_img_wh = original_img_wh
66
+ self.N_rays = N_rays
67
+ self.h_patch_size = h_patch_size # used to extract patch for supervision
68
+ self.train_img_idx = train_img_idx
69
+ self.test_img_idx = test_img_idx
70
+
71
+ camera_dict = np.load(os.path.join(self.data_dir, 'cameras.npz'), allow_pickle=True)
72
+ self.images_list = sorted(glob(os.path.join(self.data_dir, "image/*.png")))
73
+ # world_mat: projection matrix: world to image
74
+ self.world_mats_np = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in
75
+ range(len(self.images_list))]
76
+
77
+ self.raw_near_fars = np.stack([np.array([self.near, self.far]) for i in range(len(self.images_list))])
78
+
79
+ # - reference image; transform the world system to the ref-camera system
80
+ self.ref_img_idx = self.train_img_idx[0]
81
+ ref_world_mat = self.world_mats_np[self.ref_img_idx]
82
+ self.ref_w2c = np.linalg.inv(load_K_Rt_from_P(None, ref_world_mat[:3, :4])[1])
83
+
84
+ self.all_images = []
85
+ self.all_intrinsics = []
86
+ self.all_w2cs = []
87
+
88
+ self.load_scene() # load the scene
89
+
90
+ # ! estimate scale_mat
91
+ self.scale_mat, self.scale_factor = self.cal_scale_mat(
92
+ img_hw=[self.img_wh[1], self.img_wh[0]],
93
+ intrinsics=self.all_intrinsics[self.train_img_idx],
94
+ extrinsics=self.all_w2cs[self.train_img_idx],
95
+ near_fars=self.raw_near_fars[self.train_img_idx],
96
+ factor=1.1)
97
+
98
+ # * after scaling and translation, unit bounding box
99
+ self.scaled_intrinsics, self.scaled_w2cs, self.scaled_c2ws, \
100
+ self.scaled_affine_mats, self.scaled_near_fars = self.scale_cam_info()
101
+ # import ipdb; ipdb.set_trace()
102
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
103
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
104
+ self.partial_vol_origin = torch.Tensor([-1., -1., -1.])
105
+
106
+ logging.info('Load data: End')
107
+
108
+ def load_scene(self):
109
+
110
+ scale_x = self.img_wh[0] / self.original_img_wh[0]
111
+ scale_y = self.img_wh[1] / self.original_img_wh[1]
112
+
113
+ for idx in range(len(self.images_list)):
114
+ image = cv.imread(self.images_list[idx])
115
+ image = cv.resize(image, (self.img_wh[0], self.img_wh[1])) / 255.
116
+
117
+ image = image[self.clip_wh[1]:self.img_wh[1] - self.clip_wh[3],
118
+ self.clip_wh[0]:self.img_wh[0] - self.clip_wh[2]]
119
+ self.all_images.append(np.transpose(image[:, :, ::-1], (2, 0, 1))) # append [3,]
120
+
121
+ P = self.world_mats_np[idx]
122
+ P = P[:3, :4]
123
+ intrinsics, c2w = load_K_Rt_from_P(None, P)
124
+ w2c = np.linalg.inv(c2w)
125
+
126
+ intrinsics[:1] *= scale_x
127
+ intrinsics[1:2] *= scale_y
128
+
129
+ intrinsics[0, 2] -= self.clip_wh[0]
130
+ intrinsics[1, 2] -= self.clip_wh[1]
131
+
132
+ self.all_intrinsics.append(intrinsics)
133
+ # - transform from world system to ref-camera system
134
+ self.all_w2cs.append(w2c @ np.linalg.inv(self.ref_w2c))
135
+
136
+
137
+ self.all_images = torch.from_numpy(np.stack(self.all_images)).to(torch.float32)
138
+ self.all_intrinsics = torch.from_numpy(np.stack(self.all_intrinsics)).to(torch.float32)
139
+ self.all_w2cs = torch.from_numpy(np.stack(self.all_w2cs)).to(torch.float32)
140
+ self.img_wh = [self.img_wh[0] - self.clip_wh[0] - self.clip_wh[2],
141
+ self.img_wh[1] - self.clip_wh[1] - self.clip_wh[3]]
142
+
143
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
144
+ center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
145
+ radius = radius * factor
146
+ scale_mat = np.diag([radius, radius, radius, 1.0])
147
+ scale_mat[:3, 3] = center.cpu().numpy()
148
+ scale_mat = scale_mat.astype(np.float32)
149
+
150
+ return scale_mat, 1. / radius.cpu().numpy()
151
+
152
+ def scale_cam_info(self):
153
+ new_intrinsics = []
154
+ new_near_fars = []
155
+ new_w2cs = []
156
+ new_c2ws = []
157
+ new_affine_mats = []
158
+ for idx in range(len(self.all_images)):
159
+ intrinsics = self.all_intrinsics[idx]
160
+ P = intrinsics @ self.all_w2cs[idx] @ self.scale_mat
161
+ P = P.cpu().numpy()[:3, :4]
162
+
163
+ # - should use load_K_Rt_from_P() to obtain c2w
164
+ c2w = load_K_Rt_from_P(None, P)[1]
165
+ w2c = np.linalg.inv(c2w)
166
+ new_w2cs.append(w2c)
167
+ new_c2ws.append(c2w)
168
+ new_intrinsics.append(intrinsics)
169
+ affine_mat = np.eye(4)
170
+ affine_mat[:3, :4] = intrinsics[:3, :3] @ w2c[:3, :4]
171
+ new_affine_mats.append(affine_mat)
172
+
173
+ camera_o = c2w[:3, 3]
174
+ dist = np.sqrt(np.sum(camera_o ** 2))
175
+ near = dist - 1
176
+ far = dist + 1
177
+
178
+ new_near_fars.append([0.95 * near, 1.05 * far])
179
+
180
+ new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars = \
181
+ np.stack(new_intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), \
182
+ np.stack(new_affine_mats), np.stack(new_near_fars)
183
+
184
+ new_intrinsics = torch.from_numpy(np.float32(new_intrinsics))
185
+ new_w2cs = torch.from_numpy(np.float32(new_w2cs))
186
+ new_c2ws = torch.from_numpy(np.float32(new_c2ws))
187
+ new_affine_mats = torch.from_numpy(np.float32(new_affine_mats))
188
+ new_near_fars = torch.from_numpy(np.float32(new_near_fars))
189
+
190
+ return new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars
191
+
192
+
193
+ def get_conditional_sample(self):
194
+ sample = {}
195
+ support_idxs = self.train_img_idx
196
+
197
+ sample['images'] = self.all_images[support_idxs] # (V, 3, H, W)
198
+ sample['w2cs'] = self.scaled_w2cs[self.train_img_idx] # (V, 4, 4)
199
+ sample['c2ws'] = self.scaled_c2ws[self.train_img_idx] # (V, 4, 4)
200
+ sample['near_fars'] = self.scaled_near_fars[self.train_img_idx] # (V, 2)
201
+ sample['intrinsics'] = self.scaled_intrinsics[self.train_img_idx][:, :3, :3] # (V, 3, 3)
202
+ sample['affine_mats'] = self.scaled_affine_mats[self.train_img_idx] # ! in world space
203
+
204
+ sample['scan'] = self.scan_id
205
+ sample['scale_factor'] = torch.tensor(self.scale_factor)
206
+ sample['scale_mat'] = torch.from_numpy(self.scale_mat)
207
+ sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
208
+ sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
209
+ sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
210
+
211
+ return sample
212
+
213
+ def __len__(self):
214
+ if self.split == 'train':
215
+ return self.n_views * 1000
216
+ else:
217
+ return len(self.test_img_idx) * 1000
218
+
219
+ def __getitem__(self, idx):
220
+ sample = {}
221
+
222
+ if self.split == 'train':
223
+ render_idx = self.train_img_idx[idx % self.n_views]
224
+ support_idxs = [idx for idx in self.train_img_idx if idx != render_idx]
225
+ else:
226
+ # render_idx = idx % self.n_test_images + self.n_train_images
227
+ render_idx = self.test_img_idx[idx % len(self.test_img_idx)]
228
+ support_idxs = [render_idx]
229
+
230
+ sample['images'] = self.all_images[support_idxs] # (V, 3, H, W)
231
+ sample['w2cs'] = self.scaled_w2cs[support_idxs] # (V, 4, 4)
232
+ sample['c2ws'] = self.scaled_c2ws[support_idxs] # (V, 4, 4)
233
+ sample['intrinsics'] = self.scaled_intrinsics[support_idxs][:, :3, :3] # (V, 3, 3)
234
+ sample['affine_mats'] = self.scaled_affine_mats[support_idxs] # ! in world space
235
+ sample['scan'] = self.scan_id
236
+ sample['scale_factor'] = torch.tensor(self.scale_factor)
237
+ sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
238
+ sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
239
+ sample['img_index'] = torch.tensor(render_idx)
240
+
241
+ # - query image
242
+ sample['query_image'] = self.all_images[render_idx]
243
+ sample['query_c2w'] = self.scaled_c2ws[render_idx]
244
+ sample['query_w2c'] = self.scaled_w2cs[render_idx]
245
+ sample['query_intrinsic'] = self.scaled_intrinsics[render_idx]
246
+ sample['query_near_far'] = self.scaled_near_fars[render_idx]
247
+ sample['meta'] = str(self.scan_id) + "_" + os.path.basename(self.images_list[render_idx])
248
+ sample['scale_mat'] = torch.from_numpy(self.scale_mat)
249
+ sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
250
+ sample['rendering_c2ws'] = self.scaled_c2ws[self.test_img_idx]
251
+ sample['rendering_imgs_idx'] = torch.Tensor(np.array(self.test_img_idx).astype(np.int32))
252
+
253
+ # - generate rays
254
+ if self.split == 'val' or self.split == 'test':
255
+ sample_rays = gen_rays_from_single_image(
256
+ self.img_wh[1], self.img_wh[0],
257
+ sample['query_image'],
258
+ sample['query_intrinsic'],
259
+ sample['query_c2w'],
260
+ depth=None,
261
+ mask=None)
262
+ else:
263
+ sample_rays = gen_random_rays_from_single_image(
264
+ self.img_wh[1], self.img_wh[0],
265
+ self.N_rays,
266
+ sample['query_image'],
267
+ sample['query_intrinsic'],
268
+ sample['query_c2w'],
269
+ depth=None,
270
+ mask=None,
271
+ dilated_mask=None,
272
+ importance_sample=False,
273
+ h_patch_size=self.h_patch_size
274
+ )
275
+
276
+ sample['rays'] = sample_rays
277
+
278
+ return sample
SparseNeuS_demo_v1/data/dtu_general.py ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import Dataset
2
+ from utils.misc_utils import read_pfm
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ import torch
8
+ from torchvision import transforms as T
9
+ from data.scene import get_boundingbox
10
+
11
+ from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
12
+
13
+ from termcolor import colored
14
+ import pdb
15
+ import random
16
+
17
+
18
+ def load_K_Rt_from_P(filename, P=None):
19
+ if P is None:
20
+ lines = open(filename).read().splitlines()
21
+ if len(lines) == 4:
22
+ lines = lines[1:]
23
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
24
+ P = np.asarray(lines).astype(np.float32).squeeze()
25
+
26
+ out = cv2.decomposeProjectionMatrix(P)
27
+ K = out[0]
28
+ R = out[1]
29
+ t = out[2]
30
+
31
+ K = K / K[2, 2]
32
+ intrinsics = np.eye(4)
33
+ intrinsics[:3, :3] = K
34
+
35
+ pose = np.eye(4, dtype=np.float32)
36
+ pose[:3, :3] = R.transpose() # ? why need transpose here
37
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
38
+
39
+ return intrinsics, pose # ! return cam2world matrix here
40
+
41
+
42
+ # ! load one ref-image with multiple src-images in camera coordinate system
43
+ class MVSDatasetDtuPerView(Dataset):
44
+ def __init__(self, root_dir, split, n_views=3, img_wh=(640, 512), downSample=1.0,
45
+ split_filepath=None, pair_filepath=None,
46
+ N_rays=512,
47
+ vol_dims=[128, 128, 128], batch_size=1,
48
+ clean_image=False, importance_sample=False, test_ref_views=[]):
49
+
50
+ self.root_dir = root_dir
51
+ self.split = split
52
+
53
+ self.img_wh = img_wh
54
+ self.downSample = downSample
55
+ self.num_all_imgs = 49 # this preprocessed DTU dataset has 49 images
56
+ self.n_views = n_views
57
+ self.N_rays = N_rays
58
+ self.batch_size = batch_size # - used for construct new metas for gru fusion training
59
+
60
+ self.clean_image = clean_image
61
+ self.importance_sample = importance_sample
62
+ self.test_ref_views = test_ref_views # used for testing
63
+ self.scale_factor = 1.0
64
+ self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
65
+
66
+ if img_wh is not None:
67
+ assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
68
+ 'img_wh must both be multiples of 32!'
69
+
70
+ self.split_filepath = f'data/dtu/lists/{self.split}.txt' if split_filepath is None else split_filepath
71
+ self.pair_filepath = f'data/dtu/dtu_pairs.txt' if pair_filepath is None else pair_filepath
72
+
73
+ print(colored("loading all scenes together", 'red'))
74
+ with open(self.split_filepath) as f:
75
+ self.scans = [line.rstrip() for line in f.readlines()]
76
+
77
+ self.all_intrinsics = [] # the cam info of the whole scene
78
+ self.all_extrinsics = []
79
+ self.all_near_fars = []
80
+
81
+ self.metas, self.ref_src_pairs = self.build_metas() # load ref-srcs view pairs info of the scene
82
+
83
+ self.allview_ids = [i for i in range(self.num_all_imgs)]
84
+
85
+ self.load_cam_info() # load camera info of DTU, and estimate scale_mat
86
+
87
+ self.build_remap()
88
+ self.define_transforms()
89
+ print(f'==> image down scale: {self.downSample}')
90
+
91
+ # * bounding box for rendering
92
+ self.bbox_min = np.array([-1.0, -1.0, -1.0])
93
+ self.bbox_max = np.array([1.0, 1.0, 1.0])
94
+
95
+ # - used for cost volume regularization
96
+ self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
97
+ self.partial_vol_origin = torch.Tensor([-1., -1., -1.])
98
+
99
+ def build_remap(self):
100
+ self.remap = np.zeros(np.max(self.allview_ids) + 1).astype('int')
101
+ for i, item in enumerate(self.allview_ids):
102
+ self.remap[item] = i
103
+
104
+ def define_transforms(self):
105
+ self.transform = T.Compose([T.ToTensor()])
106
+
107
+ def build_metas(self):
108
+ metas = []
109
+ ref_src_pairs = {}
110
+ # light conditions 0-6 for training
111
+ # light condition 3 for testing (the brightest?)
112
+ light_idxs = [3] if 'train' not in self.split else range(7)
113
+
114
+ with open(self.pair_filepath) as f:
115
+ num_viewpoint = int(f.readline())
116
+ # viewpoints (49)
117
+ for _ in range(num_viewpoint):
118
+ ref_view = int(f.readline().rstrip())
119
+ src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
120
+
121
+ ref_src_pairs[ref_view] = src_views
122
+
123
+ for light_idx in light_idxs:
124
+ for scan in self.scans:
125
+ with open(self.pair_filepath) as f:
126
+ num_viewpoint = int(f.readline())
127
+ # viewpoints (49)
128
+ for _ in range(num_viewpoint):
129
+ ref_view = int(f.readline().rstrip())
130
+ src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
131
+
132
+ # ! only for validation
133
+ if len(self.test_ref_views) > 0 and ref_view not in self.test_ref_views:
134
+ continue
135
+
136
+ metas += [(scan, light_idx, ref_view, src_views)]
137
+
138
+ return metas, ref_src_pairs
139
+
140
+ def read_cam_file(self, filename):
141
+ with open(filename) as f:
142
+ lines = [line.rstrip() for line in f.readlines()]
143
+ # extrinsics: line [1,5), 4x4 matrix
144
+ extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ')
145
+ extrinsics = extrinsics.reshape((4, 4))
146
+ # intrinsics: line [7-10), 3x3 matrix
147
+ intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ')
148
+ intrinsics = intrinsics.reshape((3, 3))
149
+ # depth_min & depth_interval: line 11
150
+ depth_min = float(lines[11].split()[0])
151
+ depth_max = depth_min + float(lines[11].split()[1]) * 192
152
+ self.depth_interval = float(lines[11].split()[1])
153
+ intrinsics_ = np.float32(np.diag([1, 1, 1, 1]))
154
+ intrinsics_[:3, :3] = intrinsics
155
+ return intrinsics_, extrinsics, [depth_min, depth_max]
156
+
157
+ def load_cam_info(self):
158
+ for vid in range(self.num_all_imgs):
159
+ proj_mat_filename = os.path.join(self.root_dir,
160
+ f'Cameras/train/{vid:08d}_cam.txt')
161
+ intrinsic, extrinsic, near_far = self.read_cam_file(proj_mat_filename)
162
+ intrinsic[:2] *= 4 # * the provided intrinsics is 4x downsampled, now keep the same scale with image
163
+ self.all_intrinsics.append(intrinsic)
164
+ self.all_extrinsics.append(extrinsic)
165
+ self.all_near_fars.append(near_far)
166
+
167
+ def read_depth(self, filename):
168
+ # import ipdb; ipdb.set_trace()
169
+ depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
170
+ depth_h = np.ones((1200, 1600))
171
+ # print(depth_h.shape)
172
+ depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
173
+ interpolation=cv2.INTER_NEAREST) # (600, 800)
174
+ depth_h = depth_h[44:556, 80:720] # (512, 640)
175
+ # print(depth_h.shape)
176
+ # import ipdb; ipdb.set_trace()
177
+ depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
178
+ interpolation=cv2.INTER_NEAREST)
179
+ depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
180
+ interpolation=cv2.INTER_NEAREST)
181
+
182
+ return depth, depth_h
183
+
184
+ def read_mask(self, filename):
185
+ mask_h = cv2.imread(filename, 0)
186
+ mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
187
+ interpolation=cv2.INTER_NEAREST)
188
+ mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
189
+ interpolation=cv2.INTER_NEAREST)
190
+
191
+ mask[mask > 0] = 1 # the masks stored in png are not binary
192
+ mask_h[mask_h > 0] = 1
193
+
194
+ return mask, mask_h
195
+
196
+ def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
197
+ center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
198
+ radius = radius * factor
199
+ scale_mat = np.diag([radius, radius, radius, 1.0])
200
+ scale_mat[:3, 3] = center.cpu().numpy()
201
+ scale_mat = scale_mat.astype(np.float32)
202
+
203
+ return scale_mat, 1. / radius.cpu().numpy()
204
+
205
+ def __len__(self):
206
+ return len(self.metas)
207
+
208
+ def __getitem__(self, idx):
209
+ sample = {}
210
+ scan, light_idx, ref_view, src_views = self.metas[idx % len(self.metas)]
211
+
212
+ # generalized, load some images at once
213
+ view_ids = [ref_view] + src_views[:self.n_views]
214
+ # * transform from world system to camera system
215
+ w2c_ref = self.all_extrinsics[self.remap[ref_view]]
216
+ w2c_ref_inv = np.linalg.inv(w2c_ref)
217
+
218
+ image_perm = 0 # only supervised on reference view
219
+
220
+ imgs, depths_h, masks_h = [], [], [] # full size (640, 512)
221
+ intrinsics, w2cs, near_fars = [], [], [] # record proj mats between views
222
+ mask_dilated = None
223
+ for i, vid in enumerate(view_ids):
224
+ # NOTE that the id in image file names is from 1 to 49 (not 0~48)
225
+ img_filename = os.path.join(self.root_dir,
226
+ f'Rectified/{scan}_train/rect_{vid + 1:03d}_{light_idx}_r5000.png')
227
+ depth_filename = os.path.join(self.root_dir,
228
+ f'Depths/{scan}_train/depth_map_{vid:04d}.pfm')
229
+ # print(depth_filename)
230
+ mask_filename = os.path.join(self.root_dir,
231
+ f'Masks_clean_dilated/{scan}_train/mask_{vid:04d}.png')
232
+
233
+ img = Image.open(img_filename)
234
+ img_wh = np.round(np.array(img.size) * self.downSample).astype('int')
235
+ img = img.resize(img_wh, Image.BILINEAR)
236
+
237
+ if os.path.exists(mask_filename) and self.clean_image:
238
+ mask_l, mask_h = self.read_mask(mask_filename)
239
+ else:
240
+ # print(self.split, "don't find mask file", mask_filename)
241
+ mask_h = np.ones([img_wh[1], img_wh[0]])
242
+ masks_h.append(mask_h)
243
+
244
+ if i == 0:
245
+ kernel_size = 101 # default 101
246
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
247
+ mask_dilated = np.float32(cv2.dilate(np.uint8(mask_h * 255), kernel, iterations=1) > 128)
248
+
249
+ if self.clean_image:
250
+ img = np.array(img)
251
+ img[mask_h < 0.5] = 0.0
252
+
253
+ img = self.transform(img)
254
+
255
+ imgs += [img]
256
+
257
+ index_mat = self.remap[vid]
258
+ near_fars.append(self.all_near_fars[index_mat])
259
+ intrinsics.append(self.all_intrinsics[index_mat])
260
+
261
+ w2cs.append(self.all_extrinsics[index_mat] @ w2c_ref_inv)
262
+
263
+ # print(depth_filename)
264
+ if os.path.exists(depth_filename): # and i == 0
265
+ # print("file exists")
266
+ depth_l, depth_h = self.read_depth(depth_filename)
267
+ depths_h.append(depth_h)
268
+ # ! estimate scale_mat
269
+ scale_mat, scale_factor = self.cal_scale_mat(img_hw=[img_wh[1], img_wh[0]],
270
+ intrinsics=intrinsics, extrinsics=w2cs,
271
+ near_fars=near_fars, factor=1.1)
272
+
273
+ # ! calculate the new w2cs after scaling
274
+ new_near_fars = []
275
+ new_w2cs = []
276
+ new_c2ws = []
277
+ new_affine_mats = []
278
+ new_depths_h = []
279
+ for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
280
+ P = intrinsic @ extrinsic @ scale_mat
281
+ P = P[:3, :4]
282
+ # - should use load_K_Rt_from_P() to obtain c2w
283
+ c2w = load_K_Rt_from_P(None, P)[1]
284
+ w2c = np.linalg.inv(c2w)
285
+ new_w2cs.append(w2c)
286
+ new_c2ws.append(c2w)
287
+ affine_mat = np.eye(4)
288
+ affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
289
+ new_affine_mats.append(affine_mat)
290
+
291
+ camera_o = c2w[:3, 3]
292
+ dist = np.sqrt(np.sum(camera_o ** 2))
293
+ near = dist - 1
294
+ far = dist + 1
295
+
296
+ new_near_fars.append([0.95 * near, 1.05 * far])
297
+ new_depths_h.append(depth * scale_factor)
298
+
299
+ imgs = torch.stack(imgs).float()
300
+ print(new_near_fars)
301
+ depths_h = np.stack(new_depths_h)
302
+ masks_h = np.stack(masks_h)
303
+
304
+ affine_mats = np.stack(new_affine_mats)
305
+ intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
306
+ new_near_fars)
307
+
308
+ if 'train' in self.split:
309
+ start_idx = 0
310
+ else:
311
+ start_idx = 1
312
+
313
+ sample['images'] = imgs # (V, 3, H, W)
314
+ sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
315
+ sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
316
+ sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
317
+ sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
318
+ sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
319
+ sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
320
+ sample['view_ids'] = torch.from_numpy(np.array(view_ids))
321
+ sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
322
+
323
+ sample['light_idx'] = torch.tensor(light_idx)
324
+ sample['scan'] = scan
325
+
326
+ sample['scale_factor'] = torch.tensor(scale_factor)
327
+ sample['img_wh'] = torch.from_numpy(img_wh)
328
+ sample['render_img_idx'] = torch.tensor(image_perm)
329
+ sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
330
+ sample['meta'] = str(scan) + "_light" + str(light_idx) + "_refview" + str(ref_view)
331
+
332
+ # - image to render
333
+ sample['query_image'] = sample['images'][0]
334
+ sample['query_c2w'] = sample['c2ws'][0]
335
+ sample['query_w2c'] = sample['w2cs'][0]
336
+ sample['query_intrinsic'] = sample['intrinsics'][0]
337
+ sample['query_depth'] = sample['depths_h'][0]
338
+ sample['query_mask'] = sample['masks_h'][0]
339
+ sample['query_near_far'] = sample['near_fars'][0]
340
+
341
+ sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
342
+ sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
343
+ sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
344
+ sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
345
+ sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
346
+ sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
347
+ sample['view_ids'] = sample['view_ids'][start_idx:]
348
+ sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
349
+
350
+ sample['scale_mat'] = torch.from_numpy(scale_mat)
351
+ sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
352
+
353
+ # - generate rays
354
+ if ('val' in self.split) or ('test' in self.split):
355
+ sample_rays = gen_rays_from_single_image(
356
+ img_wh[1], img_wh[0],
357
+ sample['query_image'],
358
+ sample['query_intrinsic'],
359
+ sample['query_c2w'],
360
+ depth=sample['query_depth'],
361
+ mask=sample['query_mask'] if self.clean_image else None)
362
+ else:
363
+ sample_rays = gen_random_rays_from_single_image(
364
+ img_wh[1], img_wh[0],
365
+ self.N_rays,
366
+ sample['query_image'],
367
+ sample['query_intrinsic'],
368
+ sample['query_c2w'],
369
+ depth=sample['query_depth'],
370
+ mask=sample['query_mask'] if self.clean_image else None,
371
+ dilated_mask=mask_dilated,
372
+ importance_sample=self.importance_sample)
373
+
374
+ sample['rays'] = sample_rays
375
+
376
+ return sample
SparseNeuS_demo_v1/data/scene.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import pdb
4
+
5
+
6
+ def rigid_transform(xyz, transform):
7
+ """Applies a rigid transform (c2w) to an (N, 3) pointcloud.
8
+ """
9
+ device = xyz.device
10
+ xyz_h = torch.cat([xyz, torch.ones((len(xyz), 1)).to(device)], dim=1) # (N, 4)
11
+ xyz_t_h = (transform @ xyz_h.T).T # * checked: the same with the below
12
+
13
+ return xyz_t_h[:, :3]
14
+
15
+
16
+ def get_view_frustum(min_depth, max_depth, size, cam_intr, c2w):
17
+ """Get corners of 3D camera view frustum of depth image
18
+ """
19
+ device = cam_intr.device
20
+ im_h, im_w = size
21
+ im_h = int(im_h)
22
+ im_w = int(im_w)
23
+ view_frust_pts = torch.stack([
24
+ (torch.tensor([0, 0, im_w, im_w, 0, 0, im_w, im_w]).to(device) - cam_intr[0, 2]) * torch.tensor(
25
+ [min_depth, min_depth, min_depth, min_depth, max_depth, max_depth, max_depth, max_depth]).to(device) /
26
+ cam_intr[0, 0],
27
+ (torch.tensor([0, im_h, 0, im_h, 0, im_h, 0, im_h]).to(device) - cam_intr[1, 2]) * torch.tensor(
28
+ [min_depth, min_depth, min_depth, min_depth, max_depth, max_depth, max_depth, max_depth]).to(device) /
29
+ cam_intr[1, 1],
30
+ torch.tensor([min_depth, min_depth, min_depth, min_depth, max_depth, max_depth, max_depth, max_depth]).to(
31
+ device)
32
+ ])
33
+ view_frust_pts = view_frust_pts.type(torch.float32)
34
+ c2w = c2w.type(torch.float32)
35
+ view_frust_pts = rigid_transform(view_frust_pts.T, c2w).T
36
+ return view_frust_pts
37
+
38
+
39
+ def set_pixel_coords(h, w):
40
+ i_range = torch.arange(0, h).view(1, h, 1).expand(1, h, w).type(torch.float32) # [1, H, W]
41
+ j_range = torch.arange(0, w).view(1, 1, w).expand(1, h, w).type(torch.float32) # [1, H, W]
42
+ ones = torch.ones(1, h, w).type(torch.float32)
43
+
44
+ pixel_coords = torch.stack((j_range, i_range, ones), dim=1) # [1, 3, H, W]
45
+
46
+ return pixel_coords
47
+
48
+
49
+ def get_boundingbox(img_hw, intrinsics, extrinsics, near_fars):
50
+ """
51
+ # get the minimum bounding box of all visual hulls
52
+ :param img_hw:
53
+ :param intrinsics:
54
+ :param extrinsics:
55
+ :param near_fars:
56
+ :return:
57
+ """
58
+
59
+ bnds = torch.zeros((3, 2))
60
+ bnds[:, 0] = np.inf
61
+ bnds[:, 1] = -np.inf
62
+
63
+ if isinstance(intrinsics, list):
64
+ num = len(intrinsics)
65
+ else:
66
+ num = intrinsics.shape[0]
67
+ # print("num: ", num)
68
+ view_frust_pts_list = []
69
+ for i in range(num):
70
+ if not isinstance(intrinsics[i], torch.Tensor):
71
+ cam_intr = torch.tensor(intrinsics[i])
72
+ w2c = torch.tensor(extrinsics[i])
73
+ c2w = torch.inverse(w2c)
74
+ else:
75
+ cam_intr = intrinsics[i]
76
+ w2c = extrinsics[i]
77
+ c2w = torch.inverse(w2c)
78
+ min_depth, max_depth = near_fars[i][0], near_fars[i][1]
79
+ # todo: check the coresponding points are matched
80
+
81
+ view_frust_pts = get_view_frustum(min_depth, max_depth, img_hw, cam_intr, c2w)
82
+ bnds[:, 0] = torch.min(bnds[:, 0], torch.min(view_frust_pts, dim=1)[0])
83
+ bnds[:, 1] = torch.max(bnds[:, 1], torch.max(view_frust_pts, dim=1)[0])
84
+ view_frust_pts_list.append(view_frust_pts)
85
+ all_view_frust_pts = torch.cat(view_frust_pts_list, dim=1)
86
+
87
+ # print("all_view_frust_pts: ", all_view_frust_pts.shape)
88
+ # distance = torch.norm(all_view_frust_pts, dim=0)
89
+ # print("distance: ", distance)
90
+
91
+ # print("all_view_frust_pts_z: ", all_view_frust_pts[2, :])
92
+
93
+ center = torch.tensor(((bnds[0, 1] + bnds[0, 0]) / 2, (bnds[1, 1] + bnds[1, 0]) / 2,
94
+ (bnds[2, 1] + bnds[2, 0]) / 2))
95
+
96
+ lengths = bnds[:, 1] - bnds[:, 0]
97
+
98
+ max_length, _ = torch.max(lengths, dim=0)
99
+ radius = max_length / 2
100
+
101
+ # print("radius: ", radius)
102
+ return center, radius, bnds
SparseNeuS_demo_v1/evaluation/__init__.py ADDED
File without changes
SparseNeuS_demo_v1/evaluation/clean_mesh.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2 as cv
3
+ import os
4
+ from glob import glob
5
+ from scipy.io import loadmat
6
+ import trimesh
7
+ import open3d as o3d
8
+ import torch
9
+ from tqdm import tqdm
10
+
11
+ import sys
12
+
13
+ sys.path.append("../")
14
+
15
+
16
+ def gen_rays_from_single_image(H, W, image, intrinsic, c2w, depth=None, mask=None):
17
+ """
18
+ generate rays in world space, for image image
19
+ :param H:
20
+ :param W:
21
+ :param intrinsics: [3,3]
22
+ :param c2ws: [4,4]
23
+ :return:
24
+ """
25
+ device = image.device
26
+ ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H),
27
+ torch.linspace(0, W - 1, W)) # pytorch's meshgrid has indexing='ij'
28
+ p = torch.stack([xs, ys, torch.ones_like(ys)], dim=-1) # H, W, 3
29
+
30
+ # normalized ndc uv coordinates, (-1, 1)
31
+ ndc_u = 2 * xs / (W - 1) - 1
32
+ ndc_v = 2 * ys / (H - 1) - 1
33
+ rays_ndc_uv = torch.stack([ndc_u, ndc_v], dim=-1).view(-1, 2).float().to(device)
34
+
35
+ intrinsic_inv = torch.inverse(intrinsic)
36
+
37
+ p = p.view(-1, 3).float().to(device) # N_rays, 3
38
+ p = torch.matmul(intrinsic_inv[None, :3, :3], p[:, :, None]).squeeze() # N_rays, 3
39
+ rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # N_rays, 3
40
+ rays_v = torch.matmul(c2w[None, :3, :3], rays_v[:, :, None]).squeeze() # N_rays, 3
41
+ rays_o = c2w[None, :3, 3].expand(rays_v.shape) # N_rays, 3
42
+
43
+ image = image.permute(1, 2, 0)
44
+ color = image.view(-1, 3)
45
+ depth = depth.view(-1, 1) if depth is not None else None
46
+ mask = mask.view(-1, 1) if mask is not None else torch.ones([H * W, 1]).to(device)
47
+ sample = {
48
+ 'rays_o': rays_o,
49
+ 'rays_v': rays_v,
50
+ 'rays_ndc_uv': rays_ndc_uv,
51
+ 'rays_color': color,
52
+ # 'rays_depth': depth,
53
+ 'rays_mask': mask,
54
+ 'rays_norm_XYZ_cam': p # - XYZ_cam, before multiply depth
55
+ }
56
+ if depth is not None:
57
+ sample['rays_depth'] = depth
58
+
59
+ return sample
60
+
61
+
62
+ def load_K_Rt_from_P(filename, P=None):
63
+ if P is None:
64
+ lines = open(filename).read().splitlines()
65
+ if len(lines) == 4:
66
+ lines = lines[1:]
67
+ lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
68
+ P = np.asarray(lines).astype(np.float32).squeeze()
69
+
70
+ out = cv.decomposeProjectionMatrix(P)
71
+ K = out[0]
72
+ R = out[1]
73
+ t = out[2]
74
+
75
+ K = K / K[2, 2]
76
+ intrinsics = np.eye(4)
77
+ intrinsics[:3, :3] = K
78
+
79
+ pose = np.eye(4, dtype=np.float32)
80
+ pose[:3, :3] = R.transpose() # ? why need transpose here
81
+ pose[:3, 3] = (t[:3] / t[3])[:, 0]
82
+
83
+ return intrinsics, pose # ! return cam2world matrix here
84
+
85
+
86
+ def clean_points_by_mask(points, scan, imgs_idx=None, minimal_vis=0, mask_dilated_size=11):
87
+ cameras = np.load('{}/scan{}/cameras.npz'.format(DTU_DIR, scan))
88
+ mask_lis = sorted(glob('{}/scan{}/mask/*.png'.format(DTU_DIR, scan)))
89
+ n_images = 49 if scan < 83 else 64
90
+ inside_mask = np.zeros(len(points))
91
+
92
+ if imgs_idx is None:
93
+ imgs_idx = [i for i in range(n_images)]
94
+
95
+ # imgs_idx = [i for i in range(n_images)]
96
+ for i in imgs_idx:
97
+ P = cameras['world_mat_{}'.format(i)]
98
+ pts_image = np.matmul(P[None, :3, :3], points[:, :, None]).squeeze() + P[None, :3, 3]
99
+ pts_image = pts_image / pts_image[:, 2:]
100
+ pts_image = np.round(pts_image).astype(np.int32) + 1
101
+
102
+ mask_image = cv.imread(mask_lis[i])
103
+ kernel_size = mask_dilated_size # default 101
104
+ kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (kernel_size, kernel_size))
105
+ mask_image = cv.dilate(mask_image, kernel, iterations=1)
106
+ mask_image = (mask_image[:, :, 0] > 128)
107
+
108
+ mask_image = np.concatenate([np.ones([1, 1600]), mask_image, np.ones([1, 1600])], axis=0)
109
+ mask_image = np.concatenate([np.ones([1202, 1]), mask_image, np.ones([1202, 1])], axis=1)
110
+
111
+ in_mask = (pts_image[:, 0] >= 0) * (pts_image[:, 0] <= 1600) * (pts_image[:, 1] >= 0) * (
112
+ pts_image[:, 1] <= 1200) > 0
113
+ curr_mask = mask_image[(pts_image[:, 1].clip(0, 1201), pts_image[:, 0].clip(0, 1601))]
114
+
115
+ curr_mask = curr_mask.astype(np.float32) * in_mask
116
+
117
+ inside_mask += curr_mask
118
+
119
+ return inside_mask > minimal_vis
120
+
121
+
122
+ def clean_mesh_faces_by_mask(mesh_file, new_mesh_file, scan, imgs_idx, minimal_vis=0, mask_dilated_size=11):
123
+ old_mesh = trimesh.load(mesh_file)
124
+ old_vertices = old_mesh.vertices[:]
125
+ old_faces = old_mesh.faces[:]
126
+ mask = clean_points_by_mask(old_vertices, scan, imgs_idx, minimal_vis, mask_dilated_size)
127
+ indexes = np.ones(len(old_vertices)) * -1
128
+ indexes = indexes.astype(np.long)
129
+ indexes[np.where(mask)] = np.arange(len(np.where(mask)[0]))
130
+
131
+ faces_mask = mask[old_faces[:, 0]] & mask[old_faces[:, 1]] & mask[old_faces[:, 2]]
132
+ new_faces = old_faces[np.where(faces_mask)]
133
+ new_faces[:, 0] = indexes[new_faces[:, 0]]
134
+ new_faces[:, 1] = indexes[new_faces[:, 1]]
135
+ new_faces[:, 2] = indexes[new_faces[:, 2]]
136
+ new_vertices = old_vertices[np.where(mask)]
137
+
138
+ new_mesh = trimesh.Trimesh(new_vertices, new_faces)
139
+
140
+ new_mesh.export(new_mesh_file)
141
+
142
+
143
+ def clean_mesh_by_faces_num(mesh, faces_num=500):
144
+ old_vertices = mesh.vertices[:]
145
+ old_faces = mesh.faces[:]
146
+
147
+ cc = trimesh.graph.connected_components(mesh.face_adjacency, min_len=faces_num)
148
+ mask = np.zeros(len(mesh.faces), dtype=np.bool)
149
+ mask[np.concatenate(cc)] = True
150
+
151
+ indexes = np.ones(len(old_vertices)) * -1
152
+ indexes = indexes.astype(np.long)
153
+ indexes[np.where(mask)] = np.arange(len(np.where(mask)[0]))
154
+
155
+ faces_mask = mask[old_faces[:, 0]] & mask[old_faces[:, 1]] & mask[old_faces[:, 2]]
156
+ new_faces = old_faces[np.where(faces_mask)]
157
+ new_faces[:, 0] = indexes[new_faces[:, 0]]
158
+ new_faces[:, 1] = indexes[new_faces[:, 1]]
159
+ new_faces[:, 2] = indexes[new_faces[:, 2]]
160
+ new_vertices = old_vertices[np.where(mask)]
161
+
162
+ new_mesh = trimesh.Trimesh(new_vertices, new_faces)
163
+
164
+ return new_mesh
165
+
166
+
167
+ def clean_mesh_faces_outside_frustum(old_mesh_file, new_mesh_file, imgs_idx, H=1200, W=1600, mask_dilated_size=11,
168
+ isolated_face_num=500, keep_largest=True):
169
+ '''Remove faces of mesh which cannot be orserved by all cameras
170
+ '''
171
+ # if path_mask_npz:
172
+ # path_save_clean = IOUtils.add_file_name_suffix(path_save_clean, '_mask')
173
+
174
+ cameras = np.load('{}/scan{}/cameras.npz'.format(DTU_DIR, scan))
175
+ mask_lis = sorted(glob('{}/scan{}/mask/*.png'.format(DTU_DIR, scan)))
176
+
177
+ mesh = trimesh.load(old_mesh_file)
178
+ intersector = trimesh.ray.ray_pyembree.RayMeshIntersector(mesh)
179
+
180
+ all_indices = []
181
+ chunk_size = 5120
182
+ for i in imgs_idx:
183
+ mask_image = cv.imread(mask_lis[i])
184
+ kernel_size = mask_dilated_size # default 101
185
+ kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (kernel_size, kernel_size))
186
+ mask_image = cv.dilate(mask_image, kernel, iterations=1)
187
+
188
+ P = cameras['world_mat_{}'.format(i)]
189
+
190
+ intrinsic, pose = load_K_Rt_from_P(None, P[:3, :])
191
+
192
+ rays = gen_rays_from_single_image(H, W, torch.from_numpy(mask_image).permute(2, 0, 1).float(),
193
+ torch.from_numpy(intrinsic)[:3, :3].float(),
194
+ torch.from_numpy(pose).float())
195
+ rays_o = rays['rays_o']
196
+ rays_d = rays['rays_v']
197
+ rays_mask = rays['rays_color']
198
+
199
+ rays_o = rays_o.split(chunk_size)
200
+ rays_d = rays_d.split(chunk_size)
201
+ rays_mask = rays_mask.split(chunk_size)
202
+
203
+ for rays_o_batch, rays_d_batch, rays_mask_batch in tqdm(zip(rays_o, rays_d, rays_mask)):
204
+ rays_mask_batch = rays_mask_batch[:, 0] > 128
205
+ rays_o_batch = rays_o_batch[rays_mask_batch]
206
+ rays_d_batch = rays_d_batch[rays_mask_batch]
207
+
208
+ idx_faces_hits = intersector.intersects_first(rays_o_batch.cpu().numpy(), rays_d_batch.cpu().numpy())
209
+ all_indices.append(idx_faces_hits)
210
+
211
+ values = np.unique(np.concatenate(all_indices, axis=0))
212
+ mask_faces = np.ones(len(mesh.faces))
213
+ mask_faces[values[1:]] = 0
214
+ print(f'Surfaces/Kept: {len(mesh.faces)}/{len(values)}')
215
+
216
+ mesh_o3d = o3d.io.read_triangle_mesh(old_mesh_file)
217
+ print("removing triangles by mask")
218
+ mesh_o3d.remove_triangles_by_mask(mask_faces)
219
+
220
+ o3d.io.write_triangle_mesh(new_mesh_file, mesh_o3d)
221
+
222
+ # # clean meshes
223
+ new_mesh = trimesh.load(new_mesh_file)
224
+ cc = trimesh.graph.connected_components(new_mesh.face_adjacency, min_len=500)
225
+ mask = np.zeros(len(new_mesh.faces), dtype=np.bool)
226
+ mask[np.concatenate(cc)] = True
227
+ new_mesh.update_faces(mask)
228
+ new_mesh.remove_unreferenced_vertices()
229
+ new_mesh.export(new_mesh_file)
230
+
231
+ # meshes = new_mesh.split(only_watertight=False)
232
+ #
233
+ # if not keep_largest:
234
+ # meshes = [mesh for mesh in meshes if len(mesh.faces) > isolated_face_num]
235
+ # # new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
236
+ # merged_mesh = trimesh.util.concatenate(meshes)
237
+ # merged_mesh.export(new_mesh_file)
238
+ # else:
239
+ # new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
240
+ # new_mesh.export(new_mesh_file)
241
+
242
+ o3d.io.write_triangle_mesh(new_mesh_file.replace(".ply", "_raw.ply"), mesh_o3d)
243
+ print("finishing removing triangles")
244
+
245
+
246
+ def clean_outliers(old_mesh_file, new_mesh_file):
247
+ new_mesh = trimesh.load(old_mesh_file)
248
+
249
+ meshes = new_mesh.split(only_watertight=False)
250
+ new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
251
+
252
+ new_mesh.export(new_mesh_file)
253
+
254
+
255
+ if __name__ == "__main__":
256
+
257
+ scans = [24, 37, 40, 55, 63, 65, 69, 83, 97, 105, 106, 110, 114, 118, 122]
258
+
259
+ mask_kernel_size = 11
260
+
261
+ imgs_idx = [0, 1, 2]
262
+ # imgs_idx = [42, 43, 44]
263
+ # imgs_idx = [1, 8, 9]
264
+
265
+ DTU_DIR = "/home/xiaoxiao/dataset/DTU_IDR/DTU"
266
+ # DTU_DIR = "/userhome/cs/xxlong/dataset/DTU_IDR/DTU"
267
+
268
+ base_path = "/home/xiaoxiao/Workplace/nerf_reconstruction/Volume_NeuS/neus_camsys/exp/dtu/evaluation_23_24_33_new/volsdf"
269
+
270
+ for scan in scans:
271
+ print("processing scan%d" % scan)
272
+ dir_path = os.path.join(base_path, "scan%d" % scan)
273
+
274
+ old_mesh_file = glob(os.path.join(dir_path, "*.ply"))[0]
275
+
276
+ clean_mesh_file = os.path.join(dir_path, "clean_%03d.ply" % scan)
277
+ final_mesh_file = os.path.join(dir_path, "final_%03d.ply" % scan)
278
+
279
+ clean_mesh_faces_by_mask(old_mesh_file, clean_mesh_file, scan, imgs_idx, minimal_vis=1,
280
+ mask_dilated_size=mask_kernel_size)
281
+ clean_mesh_faces_outside_frustum(clean_mesh_file, final_mesh_file, imgs_idx, mask_dilated_size=mask_kernel_size)
282
+
283
+ print("finish processing scan%d" % scan)
SparseNeuS_demo_v1/evaluation/eval_dtu_python.py ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import open3d as o3d
3
+ import sklearn.neighbors as skln
4
+ from tqdm import tqdm
5
+ from scipy.io import loadmat
6
+ import multiprocessing as mp
7
+ import argparse, os, sys
8
+ import cv2 as cv
9
+
10
+ from pathlib import Path
11
+
12
+
13
+ def get_path_components(path):
14
+ path = Path(path)
15
+ ppath = str(path.parent)
16
+ stem = str(path.stem)
17
+ ext = str(path.suffix)
18
+ return ppath, stem, ext
19
+
20
+
21
+ def sample_single_tri(input_):
22
+ n1, n2, v1, v2, tri_vert = input_
23
+ c = np.mgrid[:n1 + 1, :n2 + 1]
24
+ c += 0.5
25
+ c[0] /= max(n1, 1e-7)
26
+ c[1] /= max(n2, 1e-7)
27
+ c = np.transpose(c, (1, 2, 0))
28
+ k = c[c.sum(axis=-1) < 1] # m2
29
+ q = v1 * k[:, :1] + v2 * k[:, 1:] + tri_vert
30
+ return q
31
+
32
+
33
+ def write_vis_pcd(file, points, colors):
34
+ pcd = o3d.geometry.PointCloud()
35
+ pcd.points = o3d.utility.Vector3dVector(points)
36
+ pcd.colors = o3d.utility.Vector3dVector(colors)
37
+ o3d.io.write_point_cloud(file, pcd)
38
+
39
+
40
+ def eval_cloud(args, num_cpu_cores=-1):
41
+ mp.freeze_support()
42
+ os.makedirs(args.vis_out_dir, exist_ok=True)
43
+
44
+ thresh = args.downsample_density
45
+ if args.mode == 'mesh':
46
+ pbar = tqdm(total=9)
47
+ pbar.set_description('read data mesh')
48
+ data_mesh = o3d.io.read_triangle_mesh(args.data)
49
+
50
+ vertices = np.asarray(data_mesh.vertices)
51
+ triangles = np.asarray(data_mesh.triangles)
52
+ tri_vert = vertices[triangles]
53
+
54
+ pbar.update(1)
55
+ pbar.set_description('sample pcd from mesh')
56
+ v1 = tri_vert[:, 1] - tri_vert[:, 0]
57
+ v2 = tri_vert[:, 2] - tri_vert[:, 0]
58
+ l1 = np.linalg.norm(v1, axis=-1, keepdims=True)
59
+ l2 = np.linalg.norm(v2, axis=-1, keepdims=True)
60
+ area2 = np.linalg.norm(np.cross(v1, v2), axis=-1, keepdims=True)
61
+ non_zero_area = (area2 > 0)[:, 0]
62
+ l1, l2, area2, v1, v2, tri_vert = [
63
+ arr[non_zero_area] for arr in [l1, l2, area2, v1, v2, tri_vert]
64
+ ]
65
+ thr = thresh * np.sqrt(l1 * l2 / area2)
66
+ n1 = np.floor(l1 / thr)
67
+ n2 = np.floor(l2 / thr)
68
+
69
+ with mp.Pool() as mp_pool:
70
+ new_pts = mp_pool.map(sample_single_tri,
71
+ ((n1[i, 0], n2[i, 0], v1[i:i + 1], v2[i:i + 1], tri_vert[i:i + 1, 0]) for i in
72
+ range(len(n1))), chunksize=1024)
73
+
74
+ new_pts = np.concatenate(new_pts, axis=0)
75
+ data_pcd = np.concatenate([vertices, new_pts], axis=0)
76
+
77
+ elif args.mode == 'pcd':
78
+ pbar = tqdm(total=8)
79
+ pbar.set_description('read data pcd')
80
+ data_pcd_o3d = o3d.io.read_point_cloud(args.data)
81
+ data_pcd = np.asarray(data_pcd_o3d.points)
82
+
83
+ pbar.update(1)
84
+ pbar.set_description('random shuffle pcd index')
85
+ shuffle_rng = np.random.default_rng()
86
+ shuffle_rng.shuffle(data_pcd, axis=0)
87
+
88
+ pbar.update(1)
89
+ pbar.set_description('downsample pcd')
90
+ nn_engine = skln.NearestNeighbors(n_neighbors=1, radius=thresh, algorithm='kd_tree', n_jobs=num_cpu_cores)
91
+ nn_engine.fit(data_pcd)
92
+ rnn_idxs = nn_engine.radius_neighbors(data_pcd, radius=thresh, return_distance=False)
93
+ mask = np.ones(data_pcd.shape[0], dtype=np.bool_)
94
+ for curr, idxs in enumerate(rnn_idxs):
95
+ if mask[curr]:
96
+ mask[idxs] = 0
97
+ mask[curr] = 1
98
+ data_down = data_pcd[mask]
99
+
100
+ pbar.update(1)
101
+ pbar.set_description('masking data pcd')
102
+ obs_mask_file = loadmat(f'{args.dataset_dir}/ObsMask/ObsMask{args.scan}_10.mat')
103
+ ObsMask, BB, Res = [obs_mask_file[attr] for attr in ['ObsMask', 'BB', 'Res']]
104
+ BB = BB.astype(np.float32)
105
+
106
+ patch = args.patch_size
107
+ inbound = ((data_down >= BB[:1] - patch) & (data_down < BB[1:] + patch * 2)).sum(axis=-1) == 3
108
+ data_in = data_down[inbound]
109
+
110
+ data_grid = np.around((data_in - BB[:1]) / Res).astype(np.int32)
111
+ grid_inbound = ((data_grid >= 0) & (data_grid < np.expand_dims(ObsMask.shape, 0))).sum(axis=-1) == 3
112
+ data_grid_in = data_grid[grid_inbound]
113
+ in_obs = ObsMask[data_grid_in[:, 0], data_grid_in[:, 1], data_grid_in[:, 2]].astype(np.bool_)
114
+ data_in_obs = data_in[grid_inbound][in_obs]
115
+
116
+ pbar.update(1)
117
+ pbar.set_description('read STL pcd')
118
+ stl_pcd = o3d.io.read_point_cloud(args.gt)
119
+ stl = np.asarray(stl_pcd.points)
120
+
121
+ pbar.update(1)
122
+ pbar.set_description('compute data2stl')
123
+ nn_engine.fit(stl)
124
+ dist_d2s, idx_d2s = nn_engine.kneighbors(data_in_obs, n_neighbors=1, return_distance=True)
125
+ max_dist = args.max_dist
126
+ mean_d2s = dist_d2s[dist_d2s < max_dist].mean()
127
+
128
+ pbar.update(1)
129
+ pbar.set_description('compute stl2data')
130
+ ground_plane = loadmat(f'{args.dataset_dir}/ObsMask/Plane{args.scan}.mat')['P']
131
+
132
+ stl_hom = np.concatenate([stl, np.ones_like(stl[:, :1])], -1)
133
+ above = (ground_plane.reshape((1, 4)) * stl_hom).sum(-1) > 0
134
+ stl_above = stl[above]
135
+
136
+ nn_engine.fit(data_in)
137
+ dist_s2d, idx_s2d = nn_engine.kneighbors(stl_above, n_neighbors=1, return_distance=True)
138
+ mean_s2d = dist_s2d[dist_s2d < max_dist].mean()
139
+
140
+ pbar.update(1)
141
+ pbar.set_description('visualize error')
142
+ vis_dist = args.visualize_threshold
143
+ R = np.array([[1, 0, 0]], dtype=np.float64)
144
+ G = np.array([[0, 1, 0]], dtype=np.float64)
145
+ B = np.array([[0, 0, 1]], dtype=np.float64)
146
+ W = np.array([[1, 1, 1]], dtype=np.float64)
147
+ data_color = np.tile(B, (data_down.shape[0], 1))
148
+ data_alpha = dist_d2s.clip(max=vis_dist) / vis_dist
149
+ data_color[np.where(inbound)[0][grid_inbound][in_obs]] = R * data_alpha + W * (1 - data_alpha)
150
+ data_color[np.where(inbound)[0][grid_inbound][in_obs][dist_d2s[:, 0] >= max_dist]] = G
151
+ write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_d2gt.ply', data_down, data_color)
152
+ stl_color = np.tile(B, (stl.shape[0], 1))
153
+ stl_alpha = dist_s2d.clip(max=vis_dist) / vis_dist
154
+ stl_color[np.where(above)[0]] = R * stl_alpha + W * (1 - stl_alpha)
155
+ stl_color[np.where(above)[0][dist_s2d[:, 0] >= max_dist]] = G
156
+ write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_gt2d.ply', stl, stl_color)
157
+
158
+ pbar.update(1)
159
+ pbar.set_description('done')
160
+ pbar.close()
161
+ over_all = (mean_d2s + mean_s2d) / 2
162
+ print(f'ean_d2gt: {mean_d2s}; mean_gt2d: {mean_s2d} over_all: {over_all}; .')
163
+
164
+ pparent, stem, ext = get_path_components(args.data)
165
+ if args.log is None:
166
+ path_log = os.path.join(pparent, 'eval_result.txt')
167
+ else:
168
+ path_log = args.log
169
+ with open(path_log, 'a+') as fLog:
170
+ fLog.write(f'mean_d2gt {np.round(mean_d2s, 3)} '
171
+ f'mean_gt2d {np.round(mean_s2d, 3)} '
172
+ f'Over_all {np.round(over_all, 3)} '
173
+ f'[{stem}] \n')
174
+
175
+ return over_all, mean_d2s, mean_s2d
176
+
177
+
178
+ if __name__ == '__main__':
179
+ from glob import glob
180
+
181
+ mp.freeze_support()
182
+
183
+ parser = argparse.ArgumentParser()
184
+ parser.add_argument('--data', type=str, default='data_in.ply')
185
+ parser.add_argument('--gt', type=str, help='ground truth')
186
+ parser.add_argument('--scan', type=int, default=1)
187
+ parser.add_argument('--mode', type=str, default='mesh', choices=['mesh', 'pcd'])
188
+ parser.add_argument('--dataset_dir', type=str, default='/dataset/dtu_official/SampleSet/MVS_Data')
189
+ parser.add_argument('--vis_out_dir', type=str, default='.')
190
+ parser.add_argument('--downsample_density', type=float, default=0.2)
191
+ parser.add_argument('--patch_size', type=float, default=60)
192
+ parser.add_argument('--max_dist', type=float, default=20)
193
+ parser.add_argument('--visualize_threshold', type=float, default=10)
194
+ parser.add_argument('--log', type=str, default=None)
195
+ args = parser.parse_args()
196
+
197
+ base_dir = "./exp"
198
+
199
+ GT_DIR = "./gt_pcd"
200
+
201
+ scans = [24, 37, 40, 55, 63, 65, 69, 83, 97, 105, 106, 110, 114, 118, 122]
202
+
203
+ for scan in scans:
204
+
205
+ print("processing scan%d" % scan)
206
+
207
+ args.data = os.path.join(base_dir, "scan{}".format(scan), "final_%03d.ply" % scan)
208
+
209
+ if not os.path.exists(args.data):
210
+ continue
211
+
212
+ args.gt = os.path.join(GT_DIR, "stl%03d_total.ply" % scan)
213
+ args.vis_out_dir = os.path.join(base_dir, "scan{}".format(scan))
214
+ args.scan = scan
215
+ os.makedirs(args.vis_out_dir, exist_ok=True)
216
+
217
+ dist_thred1 = 1
218
+ dist_thred2 = 2
219
+
220
+ thresh = args.downsample_density
221
+
222
+ if args.mode == 'mesh':
223
+ pbar = tqdm(total=9)
224
+ pbar.set_description('read data mesh')
225
+ data_mesh = o3d.io.read_triangle_mesh(args.data)
226
+
227
+ vertices = np.asarray(data_mesh.vertices)
228
+ triangles = np.asarray(data_mesh.triangles)
229
+ tri_vert = vertices[triangles]
230
+
231
+ pbar.update(1)
232
+ pbar.set_description('sample pcd from mesh')
233
+ v1 = tri_vert[:, 1] - tri_vert[:, 0]
234
+ v2 = tri_vert[:, 2] - tri_vert[:, 0]
235
+ l1 = np.linalg.norm(v1, axis=-1, keepdims=True)
236
+ l2 = np.linalg.norm(v2, axis=-1, keepdims=True)
237
+ area2 = np.linalg.norm(np.cross(v1, v2), axis=-1, keepdims=True)
238
+ non_zero_area = (area2 > 0)[:, 0]
239
+ l1, l2, area2, v1, v2, tri_vert = [
240
+ arr[non_zero_area] for arr in [l1, l2, area2, v1, v2, tri_vert]
241
+ ]
242
+ thr = thresh * np.sqrt(l1 * l2 / area2)
243
+ n1 = np.floor(l1 / thr)
244
+ n2 = np.floor(l2 / thr)
245
+
246
+ with mp.Pool() as mp_pool:
247
+ new_pts = mp_pool.map(sample_single_tri,
248
+ ((n1[i, 0], n2[i, 0], v1[i:i + 1], v2[i:i + 1], tri_vert[i:i + 1, 0]) for i in
249
+ range(len(n1))), chunksize=1024)
250
+
251
+ new_pts = np.concatenate(new_pts, axis=0)
252
+ data_pcd = np.concatenate([vertices, new_pts], axis=0)
253
+
254
+ elif args.mode == 'pcd':
255
+ pbar = tqdm(total=8)
256
+ pbar.set_description('read data pcd')
257
+ data_pcd_o3d = o3d.io.read_point_cloud(args.data)
258
+ data_pcd = np.asarray(data_pcd_o3d.points)
259
+
260
+ pbar.update(1)
261
+ pbar.set_description('random shuffle pcd index')
262
+ shuffle_rng = np.random.default_rng()
263
+ shuffle_rng.shuffle(data_pcd, axis=0)
264
+
265
+ pbar.update(1)
266
+ pbar.set_description('downsample pcd')
267
+ nn_engine = skln.NearestNeighbors(n_neighbors=1, radius=thresh, algorithm='kd_tree', n_jobs=-1)
268
+ nn_engine.fit(data_pcd)
269
+ rnn_idxs = nn_engine.radius_neighbors(data_pcd, radius=thresh, return_distance=False)
270
+ mask = np.ones(data_pcd.shape[0], dtype=np.bool_)
271
+ for curr, idxs in enumerate(rnn_idxs):
272
+ if mask[curr]:
273
+ mask[idxs] = 0
274
+ mask[curr] = 1
275
+ data_down = data_pcd[mask]
276
+
277
+ pbar.update(1)
278
+ pbar.set_description('masking data pcd')
279
+ obs_mask_file = loadmat(f'{args.dataset_dir}/ObsMask/ObsMask{args.scan}_10.mat')
280
+ ObsMask, BB, Res = [obs_mask_file[attr] for attr in ['ObsMask', 'BB', 'Res']]
281
+ BB = BB.astype(np.float32)
282
+
283
+ patch = args.patch_size
284
+ inbound = ((data_down >= BB[:1] - patch) & (data_down < BB[1:] + patch * 2)).sum(axis=-1) == 3
285
+ data_in = data_down[inbound]
286
+
287
+ data_grid = np.around((data_in - BB[:1]) / Res).astype(np.int32)
288
+ grid_inbound = ((data_grid >= 0) & (data_grid < np.expand_dims(ObsMask.shape, 0))).sum(axis=-1) == 3
289
+ data_grid_in = data_grid[grid_inbound]
290
+ in_obs = ObsMask[data_grid_in[:, 0], data_grid_in[:, 1], data_grid_in[:, 2]].astype(np.bool_)
291
+ data_in_obs = data_in[grid_inbound][in_obs]
292
+
293
+ pbar.update(1)
294
+ pbar.set_description('read STL pcd')
295
+ stl_pcd = o3d.io.read_point_cloud(args.gt)
296
+ stl = np.asarray(stl_pcd.points)
297
+
298
+ pbar.update(1)
299
+ pbar.set_description('compute data2stl')
300
+ nn_engine.fit(stl)
301
+ dist_d2s, idx_d2s = nn_engine.kneighbors(data_in_obs, n_neighbors=1, return_distance=True)
302
+ max_dist = args.max_dist
303
+ mean_d2s = dist_d2s[dist_d2s < max_dist].mean()
304
+
305
+ precision_1 = len(dist_d2s[dist_d2s < dist_thred1]) / len(dist_d2s)
306
+ precision_2 = len(dist_d2s[dist_d2s < dist_thred2]) / len(dist_d2s)
307
+
308
+ pbar.update(1)
309
+ pbar.set_description('compute stl2data')
310
+ ground_plane = loadmat(f'{args.dataset_dir}/ObsMask/Plane{args.scan}.mat')['P']
311
+
312
+ stl_hom = np.concatenate([stl, np.ones_like(stl[:, :1])], -1)
313
+ above = (ground_plane.reshape((1, 4)) * stl_hom).sum(-1) > 0
314
+
315
+ stl_above = stl[above]
316
+
317
+ nn_engine.fit(data_in)
318
+ dist_s2d, idx_s2d = nn_engine.kneighbors(stl_above, n_neighbors=1, return_distance=True)
319
+ mean_s2d = dist_s2d[dist_s2d < max_dist].mean()
320
+
321
+ recall_1 = len(dist_s2d[dist_s2d < dist_thred1]) / len(dist_s2d)
322
+ recall_2 = len(dist_s2d[dist_s2d < dist_thred2]) / len(dist_s2d)
323
+
324
+ pbar.update(1)
325
+ pbar.set_description('visualize error')
326
+ vis_dist = args.visualize_threshold
327
+ R = np.array([[1, 0, 0]], dtype=np.float64)
328
+ G = np.array([[0, 1, 0]], dtype=np.float64)
329
+ B = np.array([[0, 0, 1]], dtype=np.float64)
330
+ W = np.array([[1, 1, 1]], dtype=np.float64)
331
+ data_color = np.tile(B, (data_down.shape[0], 1))
332
+ data_alpha = dist_d2s.clip(max=vis_dist) / vis_dist
333
+ data_color[np.where(inbound)[0][grid_inbound][in_obs]] = R * data_alpha + W * (1 - data_alpha)
334
+ data_color[np.where(inbound)[0][grid_inbound][in_obs][dist_d2s[:, 0] >= max_dist]] = G
335
+ write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_d2gt.ply', data_down, data_color)
336
+ stl_color = np.tile(B, (stl.shape[0], 1))
337
+ stl_alpha = dist_s2d.clip(max=vis_dist) / vis_dist
338
+ stl_color[np.where(above)[0]] = R * stl_alpha + W * (1 - stl_alpha)
339
+ stl_color[np.where(above)[0][dist_s2d[:, 0] >= max_dist]] = G
340
+ write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_gt2d.ply', stl, stl_color)
341
+
342
+ pbar.update(1)
343
+ pbar.set_description('done')
344
+ pbar.close()
345
+ over_all = (mean_d2s + mean_s2d) / 2
346
+
347
+ fscore_1 = 2 * precision_1 * recall_1 / (precision_1 + recall_1 + 1e-6)
348
+ fscore_2 = 2 * precision_2 * recall_2 / (precision_2 + recall_2 + 1e-6)
349
+
350
+ print(f'over_all: {over_all}; mean_d2gt: {mean_d2s}; mean_gt2d: {mean_s2d}.')
351
+ print(f'precision_1mm: {precision_1}; recall_1mm: {recall_1}; fscore_1mm: {fscore_1}')
352
+ print(f'precision_2mm: {precision_2}; recall_2mm: {recall_2}; fscore_2mm: {fscore_2}')
353
+
354
+ pparent, stem, ext = get_path_components(args.data)
355
+ if args.log is None:
356
+ path_log = os.path.join(pparent, 'eval_result.txt')
357
+ else:
358
+ path_log = args.log
359
+ with open(path_log, 'w+') as fLog:
360
+ fLog.write(f'over_all {np.round(over_all, 3)} '
361
+ f'mean_d2gt {np.round(mean_d2s, 3)} '
362
+ f'mean_gt2d {np.round(mean_s2d, 3)} \n'
363
+ f'precision_1mm {np.round(precision_1, 3)} '
364
+ f'recall_1mm {np.round(recall_1, 3)} '
365
+ f'fscore_1mm {np.round(fscore_1, 3)} \n'
366
+ f'precision_2mm {np.round(precision_2, 3)} '
367
+ f'recall_2mm {np.round(recall_2, 3)} '
368
+ f'fscore_2mm {np.round(fscore_2, 3)} \n'
369
+ f'[{stem}] \n')
SparseNeuS_demo_v1/exp/lod0/checkpoint_trash/ckpt_285000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:763c2a4934928cc089342905ba61481d6f9efc977b9729d7fc2d3eae4f0e1f9b
3
+ size 5310703