liguang0115 commited on
Commit
288376d
·
1 Parent(s): 7b64bf4

Update project configuration and dependencies; modify .gitignore, adjust README title, and refine inference settings

Browse files
.gitignore CHANGED
@@ -2,3 +2,9 @@ assets/*
2
  pycache/*
3
  __pycache__/*
4
  .DS_Store
 
 
 
 
 
 
 
2
  pycache/*
3
  __pycache__/*
4
  .DS_Store
5
+ *.pyc
6
+ *.vscode
7
+ visualization/*
8
+ *.pth
9
+ *.o
10
+ *.gradio
README.md CHANGED
@@ -1,10 +1,11 @@
1
  ---
2
- title: Stable Virtual Camera
3
  emoji: ⚡
4
  colorFrom: yellow
5
  colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.33.0
 
8
  app_file: app.py
9
  pinned: false
10
  ---
 
1
  ---
2
+ title: V-MEM
3
  emoji: ⚡
4
  colorFrom: yellow
5
  colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.33.0
8
+ python_version: 3.10.13
9
  app_file: app.py
10
  pinned: false
11
  ---
configs/inference/inference.yaml CHANGED
@@ -24,7 +24,7 @@ model:
24
  camera_scale: 2.0
25
  inference_num_steps: 50
26
  cfg_min: 1.2
27
- cfg: 3.0
28
  guider_types: 1
29
 
30
  samples_dir: "./visualization"
@@ -46,7 +46,7 @@ surfel:
46
  merge_normal_threshold: 0.6
47
  lr: 0.01
48
  niter: 1000
49
- model_path: "./extern/CUT3R/src/cut3r_512_dpt_4_64.pth"
50
  width: 512
51
  height: 288
52
 
 
24
  camera_scale: 2.0
25
  inference_num_steps: 50
26
  cfg_min: 1.2
27
+ cfg: 2.0
28
  guider_types: 1
29
 
30
  samples_dir: "./visualization"
 
46
  merge_normal_threshold: 0.6
47
  lr: 0.01
48
  niter: 1000
49
+ model_path: "liguang0115/cut3r"
50
  width: 512
51
  height: 288
52
 
extern/CUT3R/.gitignore CHANGED
@@ -52,4 +52,6 @@ docs/_build/
52
  # Ignore data and ckpts
53
  *.pth
54
  data
55
- src/checkpoints
 
 
 
52
  # Ignore data and ckpts
53
  *.pth
54
  data
55
+ src/checkpoints
56
+
57
+ *pyc
modeling/__pycache__/__init__.cpython-310.pyc DELETED
Binary file (277 Bytes)
 
modeling/__pycache__/__init__.cpython-39.pyc DELETED
Binary file (275 Bytes)
 
modeling/__pycache__/metrics.cpython-310.pyc DELETED
Binary file (3.68 kB)
 
modeling/__pycache__/metrics.cpython-39.pyc DELETED
Binary file (3.68 kB)
 
modeling/__pycache__/network.cpython-310.pyc DELETED
Binary file (5.28 kB)
 
modeling/__pycache__/network.cpython-39.pyc DELETED
Binary file (5.26 kB)
 
modeling/__pycache__/pipeline.cpython-310.pyc DELETED
Binary file (35 kB)
 
modeling/__pycache__/pipeline.cpython-39.pyc DELETED
Binary file (33.1 kB)
 
modeling/__pycache__/sampling.cpython-310.pyc DELETED
Binary file (15 kB)
 
modeling/__pycache__/sampling.cpython-39.pyc DELETED
Binary file (14.7 kB)
 
modeling/modules/__pycache__/__init__.cpython-310.pyc DELETED
Binary file (151 Bytes)
 
modeling/modules/__pycache__/__init__.cpython-39.pyc DELETED
Binary file (151 Bytes)
 
modeling/modules/__pycache__/autoencoder.cpython-310.pyc DELETED
Binary file (2.2 kB)
 
modeling/modules/__pycache__/autoencoder.cpython-39.pyc DELETED
Binary file (2.3 kB)
 
modeling/modules/__pycache__/conditioner.cpython-310.pyc DELETED
Binary file (1.53 kB)
 
modeling/modules/__pycache__/conditioner.cpython-39.pyc DELETED
Binary file (1.53 kB)
 
modeling/modules/__pycache__/layers.cpython-310.pyc DELETED
Binary file (4.7 kB)
 
modeling/modules/__pycache__/layers.cpython-39.pyc DELETED
Binary file (4.66 kB)
 
modeling/modules/__pycache__/transformer.cpython-310.pyc DELETED
Binary file (7.69 kB)
 
modeling/modules/__pycache__/transformer.cpython-39.pyc DELETED
Binary file (7.48 kB)
 
modeling/modules/preprocessor.py CHANGED
@@ -1,116 +1,116 @@
1
- import contextlib
2
- import os
3
- import os.path as osp
4
- import sys
5
- from typing import cast
6
 
7
- import imageio.v3 as iio
8
- import numpy as np
9
- import torch
10
 
11
 
12
- class Dust3rPipeline(object):
13
- def __init__(self, device: str | torch.device = "cuda"):
14
- submodule_path = osp.realpath(
15
- osp.join(osp.dirname(__file__), "../../third_party/dust3r/")
16
- )
17
- if submodule_path not in sys.path:
18
- sys.path.insert(0, submodule_path)
19
- try:
20
- with open(os.devnull, "w") as f, contextlib.redirect_stdout(f):
21
- from dust3r.cloud_opt import ( # type: ignore[import]
22
- GlobalAlignerMode,
23
- global_aligner,
24
- )
25
- from dust3r.image_pairs import make_pairs # type: ignore[import]
26
- from dust3r.inference import inference # type: ignore[import]
27
- from dust3r.model import AsymmetricCroCo3DStereo # type: ignore[import]
28
- from dust3r.utils.image import load_images # type: ignore[import]
29
- except ImportError:
30
- raise ImportError(
31
- "Missing required submodule: 'dust3r'. Please ensure that all submodules are properly set up.\n\n"
32
- "To initialize them, run the following command in the project root:\n"
33
- " git submodule update --init --recursive"
34
- )
35
 
36
- self.device = torch.device(device)
37
- self.model = AsymmetricCroCo3DStereo.from_pretrained(
38
- "naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt"
39
- ).to(self.device)
40
 
41
- self._GlobalAlignerMode = GlobalAlignerMode
42
- self._global_aligner = global_aligner
43
- self._make_pairs = make_pairs
44
- self._inference = inference
45
- self._load_images = load_images
46
 
47
- def infer_cameras_and_points(
48
- self,
49
- img_paths: list[str],
50
- Ks: list[list] = None,
51
- c2ws: list[list] = None,
52
- batch_size: int = 16,
53
- schedule: str = "cosine",
54
- lr: float = 0.01,
55
- niter: int = 500,
56
- min_conf_thr: int = 3,
57
- ) -> tuple[
58
- list[np.ndarray], np.ndarray, np.ndarray, list[np.ndarray], list[np.ndarray]
59
- ]:
60
- num_img = len(img_paths)
61
- if num_img == 1:
62
- print("Only one image found, duplicating it to create a stereo pair.")
63
- img_paths = img_paths * 2
64
 
65
- images = self._load_images(img_paths, size=512)
66
- pairs = self._make_pairs(
67
- images,
68
- scene_graph="complete",
69
- prefilter=None,
70
- symmetrize=True,
71
- )
72
- output = self._inference(pairs, self.model, self.device, batch_size=batch_size)
73
 
74
- ori_imgs = [iio.imread(p) for p in img_paths]
75
- ori_img_whs = np.array([img.shape[1::-1] for img in ori_imgs])
76
- img_whs = np.concatenate([image["true_shape"][:, ::-1] for image in images], 0)
77
 
78
- scene = self._global_aligner(
79
- output,
80
- device=self.device,
81
- mode=self._GlobalAlignerMode.PointCloudOptimizer,
82
- same_focals=True,
83
- optimize_pp=False, # True,
84
- min_conf_thr=min_conf_thr,
85
- )
86
 
87
- # if Ks is not None:
88
- # scene.preset_focal(
89
- # torch.tensor([[K[0, 0], K[1, 1]] for K in Ks])
90
- # )
91
 
92
- if c2ws is not None:
93
- scene.preset_pose(c2ws)
94
 
95
- _ = scene.compute_global_alignment(
96
- init="msp", niter=niter, schedule=schedule, lr=lr
97
- )
98
 
99
- imgs = cast(list, scene.imgs)
100
- Ks = scene.get_intrinsics().detach().cpu().numpy().copy()
101
- c2ws = scene.get_im_poses().detach().cpu().numpy() # type: ignore
102
- pts3d = [x.detach().cpu().numpy() for x in scene.get_pts3d()] # type: ignore
103
- if num_img > 1:
104
- masks = [x.detach().cpu().numpy() for x in scene.get_masks()]
105
- points = [p[m] for p, m in zip(pts3d, masks)]
106
- point_colors = [img[m] for img, m in zip(imgs, masks)]
107
- else:
108
- points = [p.reshape(-1, 3) for p in pts3d]
109
- point_colors = [img.reshape(-1, 3) for img in imgs]
110
 
111
- # Convert back to the original image size.
112
- imgs = ori_imgs
113
- Ks[:, :2, -1] *= ori_img_whs / img_whs
114
- Ks[:, :2, :2] *= (ori_img_whs / img_whs).mean(axis=1, keepdims=True)[..., None]
115
 
116
- return imgs, Ks, c2ws, points, point_colors
 
1
+ # import contextlib
2
+ # import os
3
+ # import os.path as osp
4
+ # import sys
5
+ # from typing import cast
6
 
7
+ # import imageio.v3 as iio
8
+ # import numpy as np
9
+ # import torch
10
 
11
 
12
+ # class Dust3rPipeline(object):
13
+ # def __init__(self, device: str | torch.device = "cuda"):
14
+ # submodule_path = osp.realpath(
15
+ # osp.join(osp.dirname(__file__), "../../third_party/dust3r/")
16
+ # )
17
+ # if submodule_path not in sys.path:
18
+ # sys.path.insert(0, submodule_path)
19
+ # try:
20
+ # with open(os.devnull, "w") as f, contextlib.redirect_stdout(f):
21
+ # from dust3r.cloud_opt import ( # type: ignore[import]
22
+ # GlobalAlignerMode,
23
+ # global_aligner,
24
+ # )
25
+ # from dust3r.image_pairs import make_pairs # type: ignore[import]
26
+ # from dust3r.inference import inference # type: ignore[import]
27
+ # from dust3r.model import AsymmetricCroCo3DStereo # type: ignore[import]
28
+ # from dust3r.utils.image import load_images # type: ignore[import]
29
+ # except ImportError:
30
+ # raise ImportError(
31
+ # "Missing required submodule: 'dust3r'. Please ensure that all submodules are properly set up.\n\n"
32
+ # "To initialize them, run the following command in the project root:\n"
33
+ # " git submodule update --init --recursive"
34
+ # )
35
 
36
+ # self.device = torch.device(device)
37
+ # self.model = AsymmetricCroCo3DStereo.from_pretrained(
38
+ # "naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt"
39
+ # ).to(self.device)
40
 
41
+ # self._GlobalAlignerMode = GlobalAlignerMode
42
+ # self._global_aligner = global_aligner
43
+ # self._make_pairs = make_pairs
44
+ # self._inference = inference
45
+ # self._load_images = load_images
46
 
47
+ # def infer_cameras_and_points(
48
+ # self,
49
+ # img_paths: list[str],
50
+ # Ks: list[list] = None,
51
+ # c2ws: list[list] = None,
52
+ # batch_size: int = 16,
53
+ # schedule: str = "cosine",
54
+ # lr: float = 0.01,
55
+ # niter: int = 500,
56
+ # min_conf_thr: int = 3,
57
+ # ) -> tuple[
58
+ # list[np.ndarray], np.ndarray, np.ndarray, list[np.ndarray], list[np.ndarray]
59
+ # ]:
60
+ # num_img = len(img_paths)
61
+ # if num_img == 1:
62
+ # print("Only one image found, duplicating it to create a stereo pair.")
63
+ # img_paths = img_paths * 2
64
 
65
+ # images = self._load_images(img_paths, size=512)
66
+ # pairs = self._make_pairs(
67
+ # images,
68
+ # scene_graph="complete",
69
+ # prefilter=None,
70
+ # symmetrize=True,
71
+ # )
72
+ # output = self._inference(pairs, self.model, self.device, batch_size=batch_size)
73
 
74
+ # ori_imgs = [iio.imread(p) for p in img_paths]
75
+ # ori_img_whs = np.array([img.shape[1::-1] for img in ori_imgs])
76
+ # img_whs = np.concatenate([image["true_shape"][:, ::-1] for image in images], 0)
77
 
78
+ # scene = self._global_aligner(
79
+ # output,
80
+ # device=self.device,
81
+ # mode=self._GlobalAlignerMode.PointCloudOptimizer,
82
+ # same_focals=True,
83
+ # optimize_pp=False, # True,
84
+ # min_conf_thr=min_conf_thr,
85
+ # )
86
 
87
+ # # if Ks is not None:
88
+ # # scene.preset_focal(
89
+ # # torch.tensor([[K[0, 0], K[1, 1]] for K in Ks])
90
+ # # )
91
 
92
+ # if c2ws is not None:
93
+ # scene.preset_pose(c2ws)
94
 
95
+ # _ = scene.compute_global_alignment(
96
+ # init="msp", niter=niter, schedule=schedule, lr=lr
97
+ # )
98
 
99
+ # imgs = cast(list, scene.imgs)
100
+ # Ks = scene.get_intrinsics().detach().cpu().numpy().copy()
101
+ # c2ws = scene.get_im_poses().detach().cpu().numpy() # type: ignore
102
+ # pts3d = [x.detach().cpu().numpy() for x in scene.get_pts3d()] # type: ignore
103
+ # if num_img > 1:
104
+ # masks = [x.detach().cpu().numpy() for x in scene.get_masks()]
105
+ # points = [p[m] for p, m in zip(pts3d, masks)]
106
+ # point_colors = [img[m] for img, m in zip(imgs, masks)]
107
+ # else:
108
+ # points = [p.reshape(-1, 3) for p in pts3d]
109
+ # point_colors = [img.reshape(-1, 3) for img in imgs]
110
 
111
+ # # Convert back to the original image size.
112
+ # imgs = ori_imgs
113
+ # Ks[:, :2, -1] *= ori_img_whs / img_whs
114
+ # Ks[:, :2, :2] *= (ori_img_whs / img_whs).mean(axis=1, keepdims=True)[..., None]
115
 
116
+ # return imgs, Ks, c2ws, points, point_colors
modeling/pipeline.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from typing import List, Union
2
  from copy import deepcopy
3
 
@@ -43,10 +44,6 @@ from utils import (encode_vae_image,
43
  get_plucker_coordinates,
44
  do_sample,
45
  average_camera_pose)
46
- from utils.training_utils import load_pretrained_model
47
-
48
-
49
-
50
 
51
 
52
 
@@ -59,17 +56,15 @@ class VMemPipeline:
59
  self.config = config
60
 
61
  model_path = self.config.model.get("model_path", None)
62
- if model_path is None:
63
- self.model = load_pretrained_model(cache_dir=self.config.model.cache_dir, device=device)
64
- else:
65
- self.model = VMemModel(VMemModelParams()).to(device, dtype)
66
- # load from huggingface
67
- from huggingface_hub import hf_hub_download
68
- state_dict = torch.load(hf_hub_download(repo_id=model_path, filename="vmem_weights.pth"), map_location='cpu')
69
- state_dict = {k.replace("module.", "") if "module." in k else k: v for k, v in state_dict.items()}
70
-
71
 
72
- self.model.load_state_dict(state_dict, strict=True)
73
 
74
 
75
  self.model_wrapper = VMemWrapper(self.model)
@@ -100,12 +95,15 @@ class VMemPipeline:
100
  if self.use_surfel:
101
  # Initialize CUT3R-based reconstructor
102
  # Load and prepare the model
103
- surfel_model_path = self.config.surfel.model_path
 
 
104
  print(f"Loading model from {surfel_model_path}...")
105
  add_path_to_dust3r(surfel_model_path)
106
  self.surfel_model = ARCroco3DStereo.from_pretrained(surfel_model_path).to(device)
107
  self.surfel_model.eval()
108
 
 
109
  # Import CUT3R scene alignment module
110
  from extern.CUT3R.cloud_opt.dust3r_opt import global_aligner, GlobalAlignerMode
111
  self.GlobalAlignerMode = GlobalAlignerMode
@@ -145,6 +143,8 @@ class VMemPipeline:
145
  self.surfel_to_timestep = {}
146
  self.pil_frames = []
147
  self.visualize_dir = self.config.model.samples_dir
 
 
148
 
149
  self.global_step = 0
150
 
 
1
+ import os
2
  from typing import List, Union
3
  from copy import deepcopy
4
 
 
44
  get_plucker_coordinates,
45
  do_sample,
46
  average_camera_pose)
 
 
 
 
47
 
48
 
49
 
 
56
  self.config = config
57
 
58
  model_path = self.config.model.get("model_path", None)
59
+
60
+ self.model = VMemModel(VMemModelParams()).to(device, dtype)
61
+ # load from huggingface
62
+ from huggingface_hub import hf_hub_download
63
+ state_dict = torch.load(hf_hub_download(repo_id=model_path, filename="vmem_weights.pth"), map_location='cpu')
64
+ state_dict = {k.replace("module.", "") if "module." in k else k: v for k, v in state_dict.items()}
65
+
 
 
66
 
67
+ self.model.load_state_dict(state_dict, strict=True)
68
 
69
 
70
  self.model_wrapper = VMemWrapper(self.model)
 
95
  if self.use_surfel:
96
  # Initialize CUT3R-based reconstructor
97
  # Load and prepare the model
98
+ # download the model from huggingface
99
+
100
+ surfel_model_path = hf_hub_download(repo_id=self.config.surfel.model_path, filename="cut3r_512_dpt_4_64.pth")
101
  print(f"Loading model from {surfel_model_path}...")
102
  add_path_to_dust3r(surfel_model_path)
103
  self.surfel_model = ARCroco3DStereo.from_pretrained(surfel_model_path).to(device)
104
  self.surfel_model.eval()
105
 
106
+
107
  # Import CUT3R scene alignment module
108
  from extern.CUT3R.cloud_opt.dust3r_opt import global_aligner, GlobalAlignerMode
109
  self.GlobalAlignerMode = GlobalAlignerMode
 
143
  self.surfel_to_timestep = {}
144
  self.pil_frames = []
145
  self.visualize_dir = self.config.model.samples_dir
146
+ if not os.path.exists(self.visualize_dir):
147
+ os.makedirs(self.visualize_dir)
148
 
149
  self.global_step = 0
150
 
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  --extra-index-url https://download.pytorch.org/whl/nightly/cu124
2
- torch==2.7.0
3
- torchvision==0.22.0
4
 
5
  pydantic
6
  gradio
 
1
  --extra-index-url https://download.pytorch.org/whl/nightly/cu124
2
+ torch==2.5.1
3
+ torchvision==0.20.1
4
 
5
  pydantic
6
  gradio
utils/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/__init__.cpython-310.pyc and b/utils/__pycache__/__init__.cpython-310.pyc differ
 
utils/__pycache__/util.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/util.cpython-310.pyc and b/utils/__pycache__/util.cpython-310.pyc differ