Spaces:
Runtime error
Runtime error
Upload imagedream/camera_utils.py with huggingface_hub
Browse files- imagedream/camera_utils.py +98 -98
imagedream/camera_utils.py
CHANGED
|
@@ -1,99 +1,99 @@
|
|
| 1 |
-
import numpy as np
|
| 2 |
-
import torch
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
def create_camera_to_world_matrix(elevation, azimuth):
|
| 6 |
-
elevation = np.radians(elevation)
|
| 7 |
-
azimuth = np.radians(azimuth)
|
| 8 |
-
# Convert elevation and azimuth angles to Cartesian coordinates on a unit sphere
|
| 9 |
-
x = np.cos(elevation) * np.sin(azimuth)
|
| 10 |
-
y = np.sin(elevation)
|
| 11 |
-
z = np.cos(elevation) * np.cos(azimuth)
|
| 12 |
-
|
| 13 |
-
# Calculate camera position, target, and up vectors
|
| 14 |
-
camera_pos = np.array([x, y, z])
|
| 15 |
-
target = np.array([0, 0, 0])
|
| 16 |
-
up = np.array([0, 1, 0])
|
| 17 |
-
|
| 18 |
-
# Construct view matrix
|
| 19 |
-
forward = target - camera_pos
|
| 20 |
-
forward /= np.linalg.norm(forward)
|
| 21 |
-
right = np.cross(forward, up)
|
| 22 |
-
right /= np.linalg.norm(right)
|
| 23 |
-
new_up = np.cross(right, forward)
|
| 24 |
-
new_up /= np.linalg.norm(new_up)
|
| 25 |
-
cam2world = np.eye(4)
|
| 26 |
-
cam2world[:3, :3] = np.array([right, new_up, -forward]).T
|
| 27 |
-
cam2world[:3, 3] = camera_pos
|
| 28 |
-
return cam2world
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
def convert_opengl_to_blender(camera_matrix):
|
| 32 |
-
if isinstance(camera_matrix, np.ndarray):
|
| 33 |
-
# Construct transformation matrix to convert from OpenGL space to Blender space
|
| 34 |
-
flip_yz = np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]])
|
| 35 |
-
camera_matrix_blender = np.dot(flip_yz, camera_matrix)
|
| 36 |
-
else:
|
| 37 |
-
# Construct transformation matrix to convert from OpenGL space to Blender space
|
| 38 |
-
flip_yz = torch.tensor(
|
| 39 |
-
[[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]
|
| 40 |
-
)
|
| 41 |
-
if camera_matrix.ndim == 3:
|
| 42 |
-
flip_yz = flip_yz.unsqueeze(0)
|
| 43 |
-
camera_matrix_blender = torch.matmul(flip_yz.to(camera_matrix), camera_matrix)
|
| 44 |
-
return camera_matrix_blender
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
def normalize_camera(camera_matrix):
|
| 48 |
-
"""normalize the camera location onto a unit-sphere"""
|
| 49 |
-
if isinstance(camera_matrix, np.ndarray):
|
| 50 |
-
camera_matrix = camera_matrix.reshape(-1, 4, 4)
|
| 51 |
-
translation = camera_matrix[:, :3, 3]
|
| 52 |
-
translation = translation / (
|
| 53 |
-
np.linalg.norm(translation, axis=1, keepdims=True) + 1e-8
|
| 54 |
-
)
|
| 55 |
-
camera_matrix[:, :3, 3] = translation
|
| 56 |
-
else:
|
| 57 |
-
camera_matrix = camera_matrix.reshape(-1, 4, 4)
|
| 58 |
-
translation = camera_matrix[:, :3, 3]
|
| 59 |
-
translation = translation / (
|
| 60 |
-
torch.norm(translation, dim=1, keepdim=True) + 1e-8
|
| 61 |
-
)
|
| 62 |
-
camera_matrix[:, :3, 3] = translation
|
| 63 |
-
return camera_matrix.reshape(-1, 16)
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
def get_camera(
|
| 67 |
-
num_frames,
|
| 68 |
-
elevation=15,
|
| 69 |
-
azimuth_start=0,
|
| 70 |
-
azimuth_span=360,
|
| 71 |
-
blender_coord=True,
|
| 72 |
-
extra_view=False,
|
| 73 |
-
):
|
| 74 |
-
angle_gap = azimuth_span / num_frames
|
| 75 |
-
cameras = []
|
| 76 |
-
for azimuth in np.arange(azimuth_start, azimuth_span + azimuth_start, angle_gap):
|
| 77 |
-
camera_matrix = create_camera_to_world_matrix(elevation, azimuth)
|
| 78 |
-
if blender_coord:
|
| 79 |
-
camera_matrix = convert_opengl_to_blender(camera_matrix)
|
| 80 |
-
cameras.append(camera_matrix.flatten())
|
| 81 |
-
|
| 82 |
-
if extra_view:
|
| 83 |
-
dim = len(cameras[0])
|
| 84 |
-
cameras.append(np.zeros(dim))
|
| 85 |
-
return torch.tensor(np.stack(cameras, 0)).float()
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
def get_camera_for_index(data_index):
|
| 89 |
-
"""
|
| 90 |
-
按照当前我们的数据格式, 以000为正对我们的情况:
|
| 91 |
-
000是正面, ev: 0, azimuth: 0
|
| 92 |
-
001是左边, ev: 0, azimuth: -90
|
| 93 |
-
002是下面, ev: -90, azimuth: 0
|
| 94 |
-
003是背面, ev: 0, azimuth: 180
|
| 95 |
-
004是右边, ev: 0, azimuth: 90
|
| 96 |
-
005是上面, ev: 90, azimuth: 0
|
| 97 |
-
"""
|
| 98 |
-
params = [(0, 0), (0, -90), (-90, 0), (0, 180), (0, 90), (90, 0)]
|
| 99 |
return get_camera(1, *params[data_index])
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def create_camera_to_world_matrix(elevation, azimuth):
|
| 6 |
+
elevation = np.radians(elevation)
|
| 7 |
+
azimuth = np.radians(azimuth)
|
| 8 |
+
# Convert elevation and azimuth angles to Cartesian coordinates on a unit sphere
|
| 9 |
+
x = np.cos(elevation) * np.sin(azimuth)
|
| 10 |
+
y = np.sin(elevation)
|
| 11 |
+
z = np.cos(elevation) * np.cos(azimuth)
|
| 12 |
+
|
| 13 |
+
# Calculate camera position, target, and up vectors
|
| 14 |
+
camera_pos = np.array([x, y, z])
|
| 15 |
+
target = np.array([0, 0, 0])
|
| 16 |
+
up = np.array([0, 1, 0])
|
| 17 |
+
|
| 18 |
+
# Construct view matrix
|
| 19 |
+
forward = target - camera_pos
|
| 20 |
+
forward /= np.linalg.norm(forward)
|
| 21 |
+
right = np.cross(forward, up)
|
| 22 |
+
right /= np.linalg.norm(right)
|
| 23 |
+
new_up = np.cross(right, forward)
|
| 24 |
+
new_up /= np.linalg.norm(new_up)
|
| 25 |
+
cam2world = np.eye(4)
|
| 26 |
+
cam2world[:3, :3] = np.array([right, new_up, -forward]).T
|
| 27 |
+
cam2world[:3, 3] = camera_pos
|
| 28 |
+
return cam2world
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def convert_opengl_to_blender(camera_matrix):
|
| 32 |
+
if isinstance(camera_matrix, np.ndarray):
|
| 33 |
+
# Construct transformation matrix to convert from OpenGL space to Blender space
|
| 34 |
+
flip_yz = np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]])
|
| 35 |
+
camera_matrix_blender = np.dot(flip_yz, camera_matrix)
|
| 36 |
+
else:
|
| 37 |
+
# Construct transformation matrix to convert from OpenGL space to Blender space
|
| 38 |
+
flip_yz = torch.tensor(
|
| 39 |
+
[[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]
|
| 40 |
+
)
|
| 41 |
+
if camera_matrix.ndim == 3:
|
| 42 |
+
flip_yz = flip_yz.unsqueeze(0)
|
| 43 |
+
camera_matrix_blender = torch.matmul(flip_yz.to(camera_matrix), camera_matrix)
|
| 44 |
+
return camera_matrix_blender
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def normalize_camera(camera_matrix):
|
| 48 |
+
"""normalize the camera location onto a unit-sphere"""
|
| 49 |
+
if isinstance(camera_matrix, np.ndarray):
|
| 50 |
+
camera_matrix = camera_matrix.reshape(-1, 4, 4)
|
| 51 |
+
translation = camera_matrix[:, :3, 3]
|
| 52 |
+
translation = translation / (
|
| 53 |
+
np.linalg.norm(translation, axis=1, keepdims=True) + 1e-8
|
| 54 |
+
)
|
| 55 |
+
camera_matrix[:, :3, 3] = translation
|
| 56 |
+
else:
|
| 57 |
+
camera_matrix = camera_matrix.reshape(-1, 4, 4)
|
| 58 |
+
translation = camera_matrix[:, :3, 3]
|
| 59 |
+
translation = translation / (
|
| 60 |
+
torch.norm(translation, dim=1, keepdim=True) + 1e-8
|
| 61 |
+
)
|
| 62 |
+
camera_matrix[:, :3, 3] = translation
|
| 63 |
+
return camera_matrix.reshape(-1, 16)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def get_camera(
|
| 67 |
+
num_frames,
|
| 68 |
+
elevation=15,
|
| 69 |
+
azimuth_start=0,
|
| 70 |
+
azimuth_span=360,
|
| 71 |
+
blender_coord=True,
|
| 72 |
+
extra_view=False,
|
| 73 |
+
):
|
| 74 |
+
angle_gap = azimuth_span / num_frames
|
| 75 |
+
cameras = []
|
| 76 |
+
for azimuth in np.arange(azimuth_start, azimuth_span + azimuth_start, angle_gap):
|
| 77 |
+
camera_matrix = create_camera_to_world_matrix(elevation, azimuth)
|
| 78 |
+
if blender_coord:
|
| 79 |
+
camera_matrix = convert_opengl_to_blender(camera_matrix)
|
| 80 |
+
cameras.append(camera_matrix.flatten())
|
| 81 |
+
|
| 82 |
+
if extra_view:
|
| 83 |
+
dim = len(cameras[0])
|
| 84 |
+
cameras.append(np.zeros(dim))
|
| 85 |
+
return torch.tensor(np.stack(cameras, 0)).float()
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def get_camera_for_index(data_index):
|
| 89 |
+
"""
|
| 90 |
+
按照当前我们的数据格式, 以000为正对我们的情况:
|
| 91 |
+
000是正面, ev: 0, azimuth: 0
|
| 92 |
+
001是左边, ev: 0, azimuth: -90
|
| 93 |
+
002是下面, ev: -90, azimuth: 0
|
| 94 |
+
003是背面, ev: 0, azimuth: 180
|
| 95 |
+
004是右边, ev: 0, azimuth: 90
|
| 96 |
+
005是上面, ev: 90, azimuth: 0
|
| 97 |
+
"""
|
| 98 |
+
params = [(0, 0), (0, -90), (-90, 0), (0, 180), (0, 90), (90, 0)]
|
| 99 |
return get_camera(1, *params[data_index])
|