Spaces:
Running
on
Zero
Running
on
Zero
import torch | |
import numpy as np | |
from tqdm import tqdm | |
import utils3d | |
from PIL import Image | |
from ..renderers import MeshRenderer | |
from ..representations import Octree, Gaussian, MeshExtractResult | |
from .random_utils import sphere_hammersley_sequence | |
def yaw_pitch_r_fov_to_extrinsics_intrinsics(yaws, pitchs, rs, fovs, device='cuda'): | |
is_list = isinstance(yaws, list) | |
if not is_list: | |
yaws = [yaws] | |
pitchs = [pitchs] | |
if not isinstance(rs, list): | |
rs = [rs] * len(yaws) | |
if not isinstance(fovs, list): | |
fovs = [fovs] * len(yaws) | |
extrinsics = [] | |
intrinsics = [] | |
for yaw, pitch, r, fov in zip(yaws, pitchs, rs, fovs): | |
fov = torch.deg2rad(torch.tensor(float(fov))).to(device) | |
yaw = torch.tensor(float(yaw)).to(device) | |
pitch = torch.tensor(float(pitch)).to(device) | |
orig = torch.tensor([ | |
torch.sin(yaw) * torch.cos(pitch), | |
torch.cos(yaw) * torch.cos(pitch), | |
torch.sin(pitch), | |
]).to(device) * r | |
extr = utils3d.torch.extrinsics_look_at(orig, torch.tensor([0, 0, 0]).float().to(device), torch.tensor([0, 0, 1]).float().to(device)) | |
intr = utils3d.torch.intrinsics_from_fov_xy(fov, fov) | |
extrinsics.append(extr) | |
intrinsics.append(intr) | |
if not is_list: | |
extrinsics = extrinsics[0] | |
intrinsics = intrinsics[0] | |
return extrinsics, intrinsics | |
def render_frames(sample, extrinsics, intrinsics, options={}, colors_overwrite=None, verbose=True, need_depth=False, opt=False, **kwargs): | |
if isinstance(sample, MeshExtractResult): | |
renderer = MeshRenderer() | |
renderer.rendering_options.resolution = options.get('resolution', 1024) | |
renderer.rendering_options.near = options.get('near', 1) | |
renderer.rendering_options.far = options.get('far', 100) | |
renderer.rendering_options.ssaa = options.get('ssaa', 4) | |
elif isinstance(sample, Gaussian): | |
# from ..renderers import GSplatRenderer, GaussianRenderer | |
# renderer = GSplatRenderer() | |
from ..renderers import GaussianRenderer | |
renderer = GaussianRenderer() | |
renderer.rendering_options.resolution = options.get('resolution', 1024) | |
renderer.rendering_options.near = options.get('near', 0.8) | |
renderer.rendering_options.far = options.get('far', 1.6) | |
renderer.rendering_options.bg_color = options.get('bg_color', (0, 0, 0)) | |
renderer.rendering_options.ssaa = options.get('ssaa', 1) | |
renderer.pipe.kernel_size = kwargs.get('kernel_size', 0.1) | |
renderer.pipe.use_mip_gaussian = True | |
elif isinstance(sample, Octree): | |
from ..renderers import OctreeRenderer | |
renderer = OctreeRenderer() | |
renderer.rendering_options.resolution = options.get('resolution', 512) | |
renderer.rendering_options.near = options.get('near', 0.8) | |
renderer.rendering_options.far = options.get('far', 1.6) | |
renderer.rendering_options.bg_color = options.get('bg_color', (0, 0, 0)) | |
renderer.rendering_options.ssaa = options.get('ssaa', 4) | |
renderer.pipe.primitive = sample.primitive | |
else: | |
raise ValueError(f'Unsupported sample type: {type(sample)}') | |
rets = {} | |
for j, (extr, intr) in tqdm(enumerate(zip(extrinsics, intrinsics)), desc='Rendering', disable=not verbose): | |
if not isinstance(sample, MeshExtractResult): | |
res = renderer.render(sample, extr, intr, colors_overwrite=colors_overwrite, need_depth=need_depth) | |
if 'color' not in rets: rets['color'] = [] | |
if 'depth' not in rets: rets['depth'] = [] | |
rets['color'].append(res['color'].clamp(0, 1) if opt else \ | |
np.clip(res['color'].detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255).astype(np.uint8)) | |
if 'percent_depth' in res: | |
rets['depth'].append(res['percent_depth'] if opt else res['percent_depth'].detach().cpu().numpy()) | |
elif 'depth' in res: | |
rets['depth'].append(res['depth'] if opt else res['depth'].detach().cpu().numpy()) | |
else: | |
rets['depth'].append(None) | |
else: | |
return_types = kwargs.get('return_types', ["color", "normal", "nocs", "depth", "mask"]) | |
res = renderer.render(sample, extr, intr, return_types = return_types) | |
if 'normal' not in rets: rets['normal'] = [] | |
if 'color' not in rets: rets['color'] = [] | |
if 'nocs' not in rets: rets['nocs'] = [] | |
if 'depth' not in rets: rets['depth'] = [] | |
if 'mask' not in rets: rets['mask'] = [] | |
if 'color' in return_types: | |
rets['color'].append(res['color'].clamp(0,1) if opt else \ | |
np.clip(res['color'].detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255).astype(np.uint8)) | |
rets['normal'].append(res['normal'].clamp(0,1) if opt else \ | |
np.clip(res['normal'].detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255).astype(np.uint8)) | |
rets['nocs'].append(res['nocs'].clamp(0,1) if opt else \ | |
np.clip(res['nocs'].detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255).astype(np.uint8)) | |
rets['depth'].append(res['depth'] if opt else \ | |
res['depth'].detach().cpu().numpy()) | |
rets['mask'].append(res['mask'].detach().cpu().numpy().astype(np.uint8)) | |
return rets | |
def render_orth_frames(sample, extrinsics, projections, options={}, colors_overwrite=None, verbose=True, **kwargs): | |
# Select renderer according to sample type | |
if isinstance(sample, MeshExtractResult): | |
renderer = MeshRenderer() | |
renderer.rendering_options.resolution = options.get('resolution', 1024) | |
renderer.rendering_options.ssaa = options.get('ssaa', 4) | |
else: | |
raise ValueError(f'Unsupported sample type: {type(sample)}') | |
rets = {} | |
for j, extr in tqdm(enumerate(extrinsics), desc='Rendering Orthographic', disable=not verbose): | |
res = renderer.render(sample, extr, None, perspective=projections[j], return_types=["normal", "nocs", "depth"]) | |
if 'normal' not in rets: | |
rets['normal'] = [] | |
if 'color' not in rets: | |
rets['color'] = [] | |
if 'nocs' not in rets: | |
rets['nocs'] = [] | |
if 'depth' not in rets: | |
rets['depth'] = [] | |
rets['normal'].append(np.clip( | |
res['normal'].detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255 | |
).astype(np.uint8)) | |
rets['nocs'].append(np.clip( | |
res['nocs'].detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255 | |
).astype(np.uint8)) | |
rets['depth'].append(res['depth'].detach().cpu().numpy()) | |
return rets | |
def get_ortho_projection_matrix(left, right, bottom, top, near, far): | |
""" | |
使用 torch 创建正交投影矩阵, 使用标准的正交投影矩阵公式: | |
[ 2/(r-l) 0 0 -(r+l)/(r-l) ] | |
[ 0 2/(t-b) 0 -(t+b)/(t-b) ] | |
[ 0 0 -2/(f-n) -(f+n)/(f-n) ] | |
[ 0 0 0 1 ] | |
""" | |
projection_matrix = torch.zeros((4, 4), dtype=torch.float32) | |
projection_matrix[0, 0] = 2.0 / (right - left) | |
projection_matrix[1, 1] = 2.0 / (top - bottom) | |
projection_matrix[2, 2] = -2.0 / (far - near) | |
projection_matrix[3, 3] = 1.0 | |
projection_matrix[0, 3] = -(right + left) / (right - left) | |
projection_matrix[1, 3] = -(top + bottom) / (top - bottom) | |
projection_matrix[2, 3] = (far + near) / (far - near) | |
return projection_matrix | |
def intrinsics_to_projection( | |
intrinsics: torch.Tensor, | |
near: float, | |
far: float, | |
) -> torch.Tensor: | |
""" | |
OpenCV intrinsics to OpenGL perspective matrix | |
Args: | |
intrinsics (torch.Tensor): [3, 3] OpenCV intrinsics matrix | |
near (float): near plane to clip | |
far (float): far plane to clip | |
Returns: | |
(torch.Tensor): [4, 4] OpenGL perspective matrix | |
""" | |
fx, fy = intrinsics[0, 0], intrinsics[1, 1] | |
cx, cy = intrinsics[0, 2], intrinsics[1, 2] | |
ret = torch.zeros((4, 4), dtype=intrinsics.dtype, device=intrinsics.device) | |
ret[0, 0] = 2 * fx | |
ret[1, 1] = 2 * fy | |
ret[0, 2] = 2 * cx - 1 | |
ret[1, 2] = - 2 * cy + 1 | |
ret[2, 2] = far / (far - near) | |
ret[2, 3] = near * far / (near - far) | |
ret[3, 2] = 1. | |
return ret | |
def render_ortho_video(sample, resolution=512, ssaa=4, bg_color=(0, 0, 0), num_frames=300, r=2, inverse_direction=False, pitch=-1, **kwargs): | |
if inverse_direction: | |
yaws = torch.linspace(3.1415, -3.1415, num_frames) | |
else: | |
yaws = torch.linspace(0, 2 * 3.1415, num_frames) | |
if pitch != -1: | |
pitch = pitch * torch.ones(num_frames) | |
else: | |
pitch = 0.25 + 0.5 * torch.sin(torch.linspace(0, 2 * 3.1415, num_frames)) | |
yaws = yaws.tolist() | |
pitchs = pitch.tolist() | |
ortho_scale = 0.6 | |
extrinsics, intrinsics = yaw_pitch_r_fov_to_extrinsics_intrinsics(yaws, pitchs, r, 40) | |
projection = get_ortho_projection_matrix(-ortho_scale, ortho_scale, -ortho_scale, ortho_scale, 1e-6, 100).to(extrinsics[0].device) | |
projections = [projection] * num_frames | |
render_results = render_orth_frames(sample, extrinsics, projections, {'resolution': resolution, 'bg_color': bg_color, 'ssaa': ssaa}, **kwargs) | |
render_results.update({'extrinsics': extrinsics, 'intrinsics': None, 'projections': projections}) | |
return render_results | |
def render_multiview(sample, resolution=518, ssaa=4, bg_color=(0, 0, 0), num_frames=30, r = 2, fov = 40, random_offset=False, only_color=False, **kwargs): | |
if random_offset: | |
yaws = [] | |
pitchs = [] | |
offset = (np.random.rand(), np.random.rand()) | |
for i in range(num_frames): | |
y, p = sphere_hammersley_sequence(i, num_frames, offset) | |
yaws.append(y) | |
pitchs.append(p) | |
else: | |
cams = [sphere_hammersley_sequence(i, num_frames) for i in range(num_frames)] | |
yaws = [cam[0] for cam in cams] | |
pitchs = [cam[1] for cam in cams] | |
extrinsics, intrinsics = yaw_pitch_r_fov_to_extrinsics_intrinsics(yaws, pitchs, r, fov) | |
res = render_frames(sample, extrinsics, intrinsics, {'resolution': resolution, 'bg_color': bg_color, 'ssaa': ssaa}, **kwargs) | |
return res['color'] if only_color else res, extrinsics, intrinsics | |
def render_video(sample, resolution=512, ssaa=4, bg_color=(0, 0, 0), num_frames=300, r=2, fov=40, | |
inverse_direction=False, pitch=-1, **kwargs): | |
if inverse_direction: | |
yaws = torch.linspace(3.1415, -3.1415, num_frames) | |
# pitch = 0.25 + 0.5 * torch.sin(torch.linspace(2 * 3.1415, 0, num_frames)) | |
else: | |
yaws = torch.linspace(0, 2 * 3.1415, num_frames) | |
if pitch != -1: | |
pitch = pitch * torch.ones(num_frames) | |
else: | |
pitch = 0.25 + 0.5 * torch.sin(torch.linspace(0, 2 * 3.1415, num_frames)) | |
yaws = yaws.tolist() | |
pitch = pitch.tolist() | |
extrinsics, intrinsics = yaw_pitch_r_fov_to_extrinsics_intrinsics(yaws, pitch, r, fov) | |
res = render_frames(sample, extrinsics, intrinsics, {'resolution': resolution, 'bg_color': bg_color, 'ssaa': ssaa}, **kwargs) | |
res.update({'extrinsics': extrinsics, 'intrinsics': intrinsics}) | |
return res | |
def render_condition_images(sample, resolution=512, ssaa=4, bg_color=(0, 0, 0), num_frames=300, r=2, fov=40, **kwargs): | |
yaws = [] | |
pitchs = [] | |
offset = (np.random.rand(), np.random.rand()) | |
for i in range(num_frames): | |
y, p = sphere_hammersley_sequence(i, num_frames, offset) | |
yaws.append(y) | |
pitchs.append(p) | |
fov_min, fov_max = 10, 70 | |
radius_min = np.sqrt(3) / 2 / np.sin(fov_max / 360 * np.pi) | |
radius_max = np.sqrt(3) / 2 / np.sin(fov_min / 360 * np.pi) | |
k_min = 1 / radius_max**2 | |
k_max = 1 / radius_min**2 | |
ks = np.random.uniform(k_min, k_max, (1000000,)) | |
radius = [1 / np.sqrt(k) for k in ks] | |
fov = [2 * np.arcsin(np.sqrt(3) / 2 / r) for r in radius] | |
fov = [value_in_radians * 180 / np.pi for value_in_radians in fov] | |
extrinsics, intrinsics = yaw_pitch_r_fov_to_extrinsics_intrinsics(yaws, pitchs, radius, fov) | |
return render_frames(sample, extrinsics, intrinsics, {'resolution': resolution, 'bg_color': bg_color, 'ssaa': ssaa}, **kwargs), extrinsics, intrinsics | |