|
from spaces import GPU |
|
import torch |
|
|
|
def dummy_warmup(): |
|
if torch.cuda.is_available(): |
|
print("[INFO] CUDA is available. Running warmup.") |
|
|
|
x = torch.tensor([1.0]).cuda() |
|
else: |
|
print("[WARNING] CUDA not available. Skipping warmup.") |
|
|
|
import os, shutil |
|
import numpy as np |
|
import gradio as gr |
|
import rembg |
|
import trimesh |
|
from moge.model.v1 import MoGeModel |
|
from utils.geometry import compute_pointmap |
|
import cv2 |
|
from huggingface_hub import hf_hub_download |
|
from PIL import Image |
|
import matplotlib.pyplot as plt |
|
from eval_wrapper.eval import EvalWrapper, eval_scene |
|
|
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
|
outdir = "/tmp/rayst3r" |
|
print("Loading MoGe model") |
|
moge_model = MoGeModel.from_pretrained("Ruicheng/moge-vitl").to(device) |
|
dino_model = torch.hub.load('facebookresearch/dinov2', "dinov2_vitl14_reg") |
|
dino_model.eval() |
|
dino_model.to(device) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def depth2uint16(depth): |
|
return depth * torch.iinfo(torch.uint16).max / 10.0 |
|
|
|
def save_tensor_as_png(tensor: torch.Tensor, path: str, dtype: torch.dtype | None = None): |
|
if dtype is None: |
|
dtype = tensor.dtype |
|
Image.fromarray(tensor.to(dtype).cpu().numpy()).save(path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def prep_for_rayst3r(img,depth_dict,mask): |
|
H, W = img.shape[:2] |
|
intrinsics = depth_dict["intrinsics"].detach().cpu() |
|
intrinsics[0] *= W |
|
intrinsics[1] *= H |
|
|
|
input_dir = os.path.join(outdir, "input") |
|
if os.path.exists(input_dir): |
|
shutil.rmtree(input_dir) |
|
os.makedirs(input_dir, exist_ok=True) |
|
|
|
torch.save(intrinsics, os.path.join(input_dir, "intrinsics.pt")) |
|
|
|
|
|
depth = depth_dict["depth"].cpu() |
|
depth = depth2uint16(depth) |
|
save_tensor_as_png(depth, os.path.join(input_dir, "depth.png"),dtype=torch.uint16) |
|
|
|
|
|
save_tensor_as_png(torch.from_numpy(mask).bool(), os.path.join(input_dir, "mask.png"),dtype=torch.bool) |
|
|
|
save_tensor_as_png(torch.from_numpy(img), os.path.join(input_dir, "rgb.png")) |
|
|
|
@GPU(duration = 180) |
|
def rayst3r_to_glb(img,depth_dict,mask,max_total_points=10e6,rotated=False): |
|
prep_for_rayst3r(img,depth_dict,mask) |
|
print('Doneneee') |
|
|
|
print("Loading RaySt3R model") |
|
rayst3r_checkpoint = hf_hub_download("bartduis/rayst3r", "rayst3r.pth") |
|
rayst3r_model = EvalWrapper(rayst3r_checkpoint,device='cpu') |
|
rayst3r_model = rayst3r_model.to(device) |
|
print("Loaded rayst3r_model") |
|
print(rayst3r_model) |
|
|
|
rayst3r_points = eval_scene(rayst3r_model,os.path.join(outdir, "input"),do_filter_all_masks=True,dino_model=dino_model, device = device).cpu() |
|
|
|
|
|
n_points = min(max_total_points,rayst3r_points.shape[0]) |
|
rayst3r_points = rayst3r_points[torch.randperm(rayst3r_points.shape[0])[:n_points]].numpy() |
|
|
|
rayst3r_points[:,1] = -rayst3r_points[:,1] |
|
rayst3r_points[:,2] = -rayst3r_points[:,2] |
|
|
|
|
|
colors = colorize_points_with_turbo_all_dims(rayst3r_points) |
|
|
|
|
|
scene = trimesh.Scene() |
|
pct = trimesh.PointCloud(rayst3r_points, colors=colors, radius=0.01) |
|
scene.add_geometry(pct) |
|
|
|
outfile = os.path.join(outdir, "rayst3r.glb") |
|
scene.export(outfile) |
|
return outfile |
|
|
|
|
|
def input_to_glb(outdir,img,depth_dict,mask,rotated=False): |
|
H, W = img.shape[:2] |
|
intrinsics = depth_dict["intrinsics"].cpu().numpy() |
|
intrinsics[0] *= W |
|
intrinsics[1] *= H |
|
|
|
depth = depth_dict["depth"].cpu().numpy() |
|
cam2world = np.eye(4) |
|
points_world = compute_pointmap(depth, cam2world, intrinsics) |
|
|
|
scene = trimesh.Scene() |
|
pts = np.concatenate([p[m] for p,m in zip(points_world,mask)]) |
|
col = np.concatenate([c[m] for c,m in zip(img,mask)]) |
|
|
|
pts = pts.reshape(-1,3) |
|
pts[:,1] = -pts[:,1] |
|
pts[:,2] = -pts[:,2] |
|
|
|
|
|
pct = trimesh.PointCloud(pts, colors=col.reshape(-1,3)) |
|
scene.add_geometry(pct) |
|
|
|
outfile = os.path.join(outdir, "input.glb") |
|
scene.export(outfile) |
|
return outfile |
|
|
|
@GPU(duration = 180) |
|
def depth_moge(input_img): |
|
|
|
input_img_torch = torch.tensor(input_img / 255, dtype=torch.float32, device=device).permute(2, 0, 1) |
|
output = moge_model.infer(input_img_torch) |
|
return output |
|
|
|
@GPU(duration = 180) |
|
def mask_rembg(input_img): |
|
|
|
output_img = rembg.remove(input_img, alpha_matting=False, post_process_mask=True) |
|
|
|
|
|
output_np = np.array(output_img) |
|
alpha = output_np[..., 3] |
|
|
|
|
|
kernel = np.ones((3, 3), np.uint8) |
|
eroded_alpha = cv2.erode(alpha, kernel, iterations=1) |
|
|
|
output_np[..., 3] = eroded_alpha |
|
|
|
mask = output_np[:,:,-1] >= 128 |
|
rgb = output_np[:,:,:3] |
|
return mask, rgb |
|
|
|
@GPU(duration = 180) |
|
def process_image(input_img): |
|
|
|
rotated = False |
|
|
|
|
|
|
|
input_img = cv2.resize(input_img, (640, 480)) |
|
mask, rgb = mask_rembg(input_img) |
|
depth_dict = depth_moge(input_img) |
|
if os.path.exists(outdir): |
|
shutil.rmtree(outdir) |
|
os.makedirs(outdir) |
|
input_glb = input_to_glb(outdir,input_img,depth_dict,mask,rotated=rotated) |
|
print('Input done') |
|
print('calling Ray') |
|
inference_glb = rayst3r_to_glb(input_img,depth_dict,mask,rotated=rotated) |
|
|
|
return input_glb, inference_glb |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
process_image, |
|
gr.Image(), |
|
[gr.Model3D(label="Input"), gr.Model3D(label="RaySt3R",)] |
|
) |
|
|
|
if __name__ == "__main__": |
|
dummy_warmup() |
|
demo.launch() |
|
|
|
|