Spaces:

liguang0115
/

vmem

Running on L4

App Files Files Community

liguang0115 commited on 14 days ago

Commit

288376d

1 Parent(s): 7b64bf4

Update project configuration and dependencies; modify .gitignore, adjust README title, and refine inference settings

Browse files

Files changed (29) hide show

.gitignore +6 -0
README.md +2 -1
configs/inference/inference.yaml +2 -2
extern/CUT3R/.gitignore +3 -1
modeling/__pycache__/__init__.cpython-310.pyc +0 -0
modeling/__pycache__/__init__.cpython-39.pyc +0 -0
modeling/__pycache__/metrics.cpython-310.pyc +0 -0
modeling/__pycache__/metrics.cpython-39.pyc +0 -0
modeling/__pycache__/network.cpython-310.pyc +0 -0
modeling/__pycache__/network.cpython-39.pyc +0 -0
modeling/__pycache__/pipeline.cpython-310.pyc +0 -0
modeling/__pycache__/pipeline.cpython-39.pyc +0 -0
modeling/__pycache__/sampling.cpython-310.pyc +0 -0
modeling/__pycache__/sampling.cpython-39.pyc +0 -0
modeling/modules/__pycache__/__init__.cpython-310.pyc +0 -0
modeling/modules/__pycache__/__init__.cpython-39.pyc +0 -0
modeling/modules/__pycache__/autoencoder.cpython-310.pyc +0 -0
modeling/modules/__pycache__/autoencoder.cpython-39.pyc +0 -0
modeling/modules/__pycache__/conditioner.cpython-310.pyc +0 -0
modeling/modules/__pycache__/conditioner.cpython-39.pyc +0 -0
modeling/modules/__pycache__/layers.cpython-310.pyc +0 -0
modeling/modules/__pycache__/layers.cpython-39.pyc +0 -0
modeling/modules/__pycache__/transformer.cpython-310.pyc +0 -0
modeling/modules/__pycache__/transformer.cpython-39.pyc +0 -0
modeling/modules/preprocessor.py +101 -101
modeling/pipeline.py +15 -15
requirements.txt +2 -2
utils/__pycache__/__init__.cpython-310.pyc +0 -0
utils/__pycache__/util.cpython-310.pyc +0 -0

.gitignore CHANGED Viewed

@@ -2,3 +2,9 @@ assets/*
 pycache/*
 __pycache__/*
 .DS_Store

 pycache/*
 __pycache__/*
 .DS_Store
+*.pyc
+*.vscode
+visualization/*
+*.pth
+*.o
+*.gradio

README.md CHANGED Viewed

@@ -1,10 +1,11 @@
 ---
-title: Stable Virtual Camera
 emoji: ⚡
 colorFrom: yellow
 colorTo: yellow
 sdk: gradio
 sdk_version: 5.33.0
 app_file: app.py
 pinned: false
 ---

 ---
+title: V-MEM
 emoji: ⚡
 colorFrom: yellow
 colorTo: yellow
 sdk: gradio
 sdk_version: 5.33.0
+python_version: 3.10.13
 app_file: app.py
 pinned: false
 ---

configs/inference/inference.yaml CHANGED Viewed

@@ -24,7 +24,7 @@ model:
     camera_scale: 2.0
     inference_num_steps: 50
     cfg_min: 1.2
-    cfg: 3.0
     guider_types: 1
     samples_dir: "./visualization"
@@ -46,7 +46,7 @@ surfel:
     merge_normal_threshold: 0.6
     lr: 0.01
     niter: 1000
-    model_path: "./extern/CUT3R/src/cut3r_512_dpt_4_64.pth"
     width: 512
     height: 288

     camera_scale: 2.0
     inference_num_steps: 50
     cfg_min: 1.2
+    cfg: 2.0
     guider_types: 1
     samples_dir: "./visualization"
     merge_normal_threshold: 0.6
     lr: 0.01
     niter: 1000
+    model_path: "liguang0115/cut3r"
     width: 512
     height: 288

extern/CUT3R/.gitignore CHANGED Viewed

@@ -52,4 +52,6 @@ docs/_build/
 # Ignore data and ckpts
 *.pth
 data
-src/checkpoints

 # Ignore data and ckpts
 *.pth
 data
+src/checkpoints
+*pyc

modeling/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (277 Bytes)

modeling/__pycache__/__init__.cpython-39.pyc DELETED Viewed

Binary file (275 Bytes)

modeling/__pycache__/metrics.cpython-310.pyc DELETED Viewed

Binary file (3.68 kB)

modeling/__pycache__/metrics.cpython-39.pyc DELETED Viewed

Binary file (3.68 kB)

modeling/__pycache__/network.cpython-310.pyc DELETED Viewed

Binary file (5.28 kB)

modeling/__pycache__/network.cpython-39.pyc DELETED Viewed

Binary file (5.26 kB)

modeling/__pycache__/pipeline.cpython-310.pyc DELETED Viewed

Binary file (35 kB)

modeling/__pycache__/pipeline.cpython-39.pyc DELETED Viewed

Binary file (33.1 kB)

modeling/__pycache__/sampling.cpython-310.pyc DELETED Viewed

Binary file (15 kB)

modeling/__pycache__/sampling.cpython-39.pyc DELETED Viewed

Binary file (14.7 kB)

modeling/modules/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (151 Bytes)

modeling/modules/__pycache__/__init__.cpython-39.pyc DELETED Viewed

Binary file (151 Bytes)

modeling/modules/__pycache__/autoencoder.cpython-310.pyc DELETED Viewed

Binary file (2.2 kB)

modeling/modules/__pycache__/autoencoder.cpython-39.pyc DELETED Viewed

Binary file (2.3 kB)

modeling/modules/__pycache__/conditioner.cpython-310.pyc DELETED Viewed

Binary file (1.53 kB)

modeling/modules/__pycache__/conditioner.cpython-39.pyc DELETED Viewed

Binary file (1.53 kB)

modeling/modules/__pycache__/layers.cpython-310.pyc DELETED Viewed

Binary file (4.7 kB)

modeling/modules/__pycache__/layers.cpython-39.pyc DELETED Viewed

Binary file (4.66 kB)

modeling/modules/__pycache__/transformer.cpython-310.pyc DELETED Viewed

Binary file (7.69 kB)

modeling/modules/__pycache__/transformer.cpython-39.pyc DELETED Viewed

Binary file (7.48 kB)

modeling/modules/preprocessor.py CHANGED Viewed

@@ -1,116 +1,116 @@
-import contextlib
-import os
-import os.path as osp
-import sys
-from typing import cast
-import imageio.v3 as iio
-import numpy as np
-import torch
-class Dust3rPipeline(object):
-    def __init__(self, device: str | torch.device = "cuda"):
-        submodule_path = osp.realpath(
-            osp.join(osp.dirname(__file__), "../../third_party/dust3r/")
-        )
-        if submodule_path not in sys.path:
-            sys.path.insert(0, submodule_path)
-        try:
-            with open(os.devnull, "w") as f, contextlib.redirect_stdout(f):
-                from dust3r.cloud_opt import (  # type: ignore[import]
-                    GlobalAlignerMode,
-                    global_aligner,
-                )
-                from dust3r.image_pairs import make_pairs  # type: ignore[import]
-                from dust3r.inference import inference  # type: ignore[import]
-                from dust3r.model import AsymmetricCroCo3DStereo  # type: ignore[import]
-                from dust3r.utils.image import load_images  # type: ignore[import]
-        except ImportError:
-            raise ImportError(
-                "Missing required submodule: 'dust3r'. Please ensure that all submodules are properly set up.\n\n"
-                "To initialize them, run the following command in the project root:\n"
-                "  git submodule update --init --recursive"
-            )
-        self.device = torch.device(device)
-        self.model = AsymmetricCroCo3DStereo.from_pretrained(
-            "naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt"
-        ).to(self.device)
-        self._GlobalAlignerMode = GlobalAlignerMode
-        self._global_aligner = global_aligner
-        self._make_pairs = make_pairs
-        self._inference = inference
-        self._load_images = load_images
-    def infer_cameras_and_points(
-        self,
-        img_paths: list[str],
-        Ks: list[list] = None,
-        c2ws: list[list] = None,
-        batch_size: int = 16,
-        schedule: str = "cosine",
-        lr: float = 0.01,
-        niter: int = 500,
-        min_conf_thr: int = 3,
-    ) -> tuple[
-        list[np.ndarray], np.ndarray, np.ndarray, list[np.ndarray], list[np.ndarray]
-    ]:
-        num_img = len(img_paths)
-        if num_img == 1:
-            print("Only one image found, duplicating it to create a stereo pair.")
-            img_paths = img_paths * 2
-        images = self._load_images(img_paths, size=512)
-        pairs = self._make_pairs(
-            images,
-            scene_graph="complete",
-            prefilter=None,
-            symmetrize=True,
-        )
-        output = self._inference(pairs, self.model, self.device, batch_size=batch_size)
-        ori_imgs = [iio.imread(p) for p in img_paths]
-        ori_img_whs = np.array([img.shape[1::-1] for img in ori_imgs])
-        img_whs = np.concatenate([image["true_shape"][:, ::-1] for image in images], 0)
-        scene = self._global_aligner(
-            output,
-            device=self.device,
-            mode=self._GlobalAlignerMode.PointCloudOptimizer,
-            same_focals=True,
-            optimize_pp=False,  # True,
-            min_conf_thr=min_conf_thr,
-        )
-        # if Ks is not None:
-        #     scene.preset_focal(
-        #         torch.tensor([[K[0, 0], K[1, 1]] for K in Ks])
-        #     )
-        if c2ws is not None:
-            scene.preset_pose(c2ws)
-        _ = scene.compute_global_alignment(
-            init="msp", niter=niter, schedule=schedule, lr=lr
-        )
-        imgs = cast(list, scene.imgs)
-        Ks = scene.get_intrinsics().detach().cpu().numpy().copy()
-        c2ws = scene.get_im_poses().detach().cpu().numpy()  # type: ignore
-        pts3d = [x.detach().cpu().numpy() for x in scene.get_pts3d()]  # type: ignore
-        if num_img > 1:
-            masks = [x.detach().cpu().numpy() for x in scene.get_masks()]
-            points = [p[m] for p, m in zip(pts3d, masks)]
-            point_colors = [img[m] for img, m in zip(imgs, masks)]
-        else:
-            points = [p.reshape(-1, 3) for p in pts3d]
-            point_colors = [img.reshape(-1, 3) for img in imgs]
-        # Convert back to the original image size.
-        imgs = ori_imgs
-        Ks[:, :2, -1] *= ori_img_whs / img_whs
-        Ks[:, :2, :2] *= (ori_img_whs / img_whs).mean(axis=1, keepdims=True)[..., None]
-        return imgs, Ks, c2ws, points, point_colors

+# import contextlib
+# import os
+# import os.path as osp
+# import sys
+# from typing import cast
+# import imageio.v3 as iio
+# import numpy as np
+# import torch
+# class Dust3rPipeline(object):
+#     def __init__(self, device: str | torch.device = "cuda"):
+#         submodule_path = osp.realpath(
+#             osp.join(osp.dirname(__file__), "../../third_party/dust3r/")
+#         )
+#         if submodule_path not in sys.path:
+#             sys.path.insert(0, submodule_path)
+#         try:
+#             with open(os.devnull, "w") as f, contextlib.redirect_stdout(f):
+#                 from dust3r.cloud_opt import (  # type: ignore[import]
+#                     GlobalAlignerMode,
+#                     global_aligner,
+#                 )
+#                 from dust3r.image_pairs import make_pairs  # type: ignore[import]
+#                 from dust3r.inference import inference  # type: ignore[import]
+#                 from dust3r.model import AsymmetricCroCo3DStereo  # type: ignore[import]
+#                 from dust3r.utils.image import load_images  # type: ignore[import]
+#         except ImportError:
+#             raise ImportError(
+#                 "Missing required submodule: 'dust3r'. Please ensure that all submodules are properly set up.\n\n"
+#                 "To initialize them, run the following command in the project root:\n"
+#                 "  git submodule update --init --recursive"
+#             )
+#         self.device = torch.device(device)
+#         self.model = AsymmetricCroCo3DStereo.from_pretrained(
+#             "naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt"
+#         ).to(self.device)
+#         self._GlobalAlignerMode = GlobalAlignerMode
+#         self._global_aligner = global_aligner
+#         self._make_pairs = make_pairs
+#         self._inference = inference
+#         self._load_images = load_images
+#     def infer_cameras_and_points(
+#         self,
+#         img_paths: list[str],
+#         Ks: list[list] = None,
+#         c2ws: list[list] = None,
+#         batch_size: int = 16,
+#         schedule: str = "cosine",
+#         lr: float = 0.01,
+#         niter: int = 500,
+#         min_conf_thr: int = 3,
+#     ) -> tuple[
+#         list[np.ndarray], np.ndarray, np.ndarray, list[np.ndarray], list[np.ndarray]
+#     ]:
+#         num_img = len(img_paths)
+#         if num_img == 1:
+#             print("Only one image found, duplicating it to create a stereo pair.")
+#             img_paths = img_paths * 2
+#         images = self._load_images(img_paths, size=512)
+#         pairs = self._make_pairs(
+#             images,
+#             scene_graph="complete",
+#             prefilter=None,
+#             symmetrize=True,
+#         )
+#         output = self._inference(pairs, self.model, self.device, batch_size=batch_size)
+#         ori_imgs = [iio.imread(p) for p in img_paths]
+#         ori_img_whs = np.array([img.shape[1::-1] for img in ori_imgs])
+#         img_whs = np.concatenate([image["true_shape"][:, ::-1] for image in images], 0)
+#         scene = self._global_aligner(
+#             output,
+#             device=self.device,
+#             mode=self._GlobalAlignerMode.PointCloudOptimizer,
+#             same_focals=True,
+#             optimize_pp=False,  # True,
+#             min_conf_thr=min_conf_thr,
+#         )
+#         # if Ks is not None:
+#         #     scene.preset_focal(
+#         #         torch.tensor([[K[0, 0], K[1, 1]] for K in Ks])
+#         #     )
+#         if c2ws is not None:
+#             scene.preset_pose(c2ws)
+#         _ = scene.compute_global_alignment(
+#             init="msp", niter=niter, schedule=schedule, lr=lr
+#         )
+#         imgs = cast(list, scene.imgs)
+#         Ks = scene.get_intrinsics().detach().cpu().numpy().copy()
+#         c2ws = scene.get_im_poses().detach().cpu().numpy()  # type: ignore
+#         pts3d = [x.detach().cpu().numpy() for x in scene.get_pts3d()]  # type: ignore
+#         if num_img > 1:
+#             masks = [x.detach().cpu().numpy() for x in scene.get_masks()]
+#             points = [p[m] for p, m in zip(pts3d, masks)]
+#             point_colors = [img[m] for img, m in zip(imgs, masks)]
+#         else:
+#             points = [p.reshape(-1, 3) for p in pts3d]
+#             point_colors = [img.reshape(-1, 3) for img in imgs]
+#         # Convert back to the original image size.
+#         imgs = ori_imgs
+#         Ks[:, :2, -1] *= ori_img_whs / img_whs
+#         Ks[:, :2, :2] *= (ori_img_whs / img_whs).mean(axis=1, keepdims=True)[..., None]
+#         return imgs, Ks, c2ws, points, point_colors

modeling/pipeline.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from typing import List, Union
 from copy import deepcopy
@@ -43,10 +44,6 @@ from utils import (encode_vae_image,
                    get_plucker_coordinates,
                    do_sample,
                    average_camera_pose)
-from utils.training_utils import load_pretrained_model
@@ -59,17 +56,15 @@ class VMemPipeline:
         self.config = config
         model_path = self.config.model.get("model_path", None)
-        if model_path is None:
-            self.model = load_pretrained_model(cache_dir=self.config.model.cache_dir, device=device)
-        else:
-            self.model = VMemModel(VMemModelParams()).to(device, dtype)
-            # load from huggingface
-            from huggingface_hub import hf_hub_download
-            state_dict = torch.load(hf_hub_download(repo_id=model_path, filename="vmem_weights.pth"), map_location='cpu')
-            state_dict = {k.replace("module.", "") if "module." in k else k: v for k, v in state_dict.items()}
-            self.model.load_state_dict(state_dict, strict=True)
         self.model_wrapper = VMemWrapper(self.model)
@@ -100,12 +95,15 @@ class VMemPipeline:
         if self.use_surfel:
             # Initialize CUT3R-based reconstructor
             # Load and prepare the model
-            surfel_model_path = self.config.surfel.model_path
             print(f"Loading model from {surfel_model_path}...")
             add_path_to_dust3r(surfel_model_path)
             self.surfel_model = ARCroco3DStereo.from_pretrained(surfel_model_path).to(device)
             self.surfel_model.eval()
             # Import CUT3R scene alignment module
             from extern.CUT3R.cloud_opt.dust3r_opt import global_aligner, GlobalAlignerMode
             self.GlobalAlignerMode = GlobalAlignerMode
@@ -145,6 +143,8 @@ class VMemPipeline:
         self.surfel_to_timestep = {}
         self.pil_frames = []
         self.visualize_dir = self.config.model.samples_dir
         self.global_step = 0

+import os
 from typing import List, Union
 from copy import deepcopy
                    get_plucker_coordinates,
                    do_sample,
                    average_camera_pose)
         self.config = config
         model_path = self.config.model.get("model_path", None)
+        self.model = VMemModel(VMemModelParams()).to(device, dtype)
+        # load from huggingface
+        from huggingface_hub import hf_hub_download
+        state_dict = torch.load(hf_hub_download(repo_id=model_path, filename="vmem_weights.pth"), map_location='cpu')
+        state_dict = {k.replace("module.", "") if "module." in k else k: v for k, v in state_dict.items()}
+        self.model.load_state_dict(state_dict, strict=True)
         self.model_wrapper = VMemWrapper(self.model)
         if self.use_surfel:
             # Initialize CUT3R-based reconstructor
             # Load and prepare the model
+            # download the model from huggingface
+            surfel_model_path = hf_hub_download(repo_id=self.config.surfel.model_path, filename="cut3r_512_dpt_4_64.pth")
             print(f"Loading model from {surfel_model_path}...")
             add_path_to_dust3r(surfel_model_path)
             self.surfel_model = ARCroco3DStereo.from_pretrained(surfel_model_path).to(device)
             self.surfel_model.eval()
             # Import CUT3R scene alignment module
             from extern.CUT3R.cloud_opt.dust3r_opt import global_aligner, GlobalAlignerMode
             self.GlobalAlignerMode = GlobalAlignerMode
         self.surfel_to_timestep = {}
         self.pil_frames = []
         self.visualize_dir = self.config.model.samples_dir
+        if not os.path.exists(self.visualize_dir):
+            os.makedirs(self.visualize_dir)
         self.global_step = 0

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/nightly/cu124
-torch==2.7.0
-torchvision==0.22.0
 pydantic
 gradio

 --extra-index-url https://download.pytorch.org/whl/nightly/cu124
+torch==2.5.1
+torchvision==0.20.1
 pydantic
 gradio

utils/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/utils/__pycache__/__init__.cpython-310.pyc and b/utils/__pycache__/__init__.cpython-310.pyc differ

utils/__pycache__/util.cpython-310.pyc CHANGED Viewed

Binary files a/utils/__pycache__/util.cpython-310.pyc and b/utils/__pycache__/util.cpython-310.pyc differ