viser_bonn2

Running

App Files Files Community

Junyi42 commited on Apr 19

Commit

a9472e6

1 Parent(s): 0f252ee

code

Browse files

Files changed (6) hide show

Dockerfile +22 -0
README.md +30 -8
app.py +105 -0
requirements.txt +15 -0
vis_st4rtrack.py +781 -0
viser_proxy_manager.py +223 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+FROM python:3.12-slim
+WORKDIR /app
+# Install system dependencies for OpenCV and build tools
+RUN apt-get update && apt-get install -y \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    build-essential \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+# Make port 7860 available to the world outside the container
+EXPOSE 7860
+# Command to run when the container starts
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,12 +1,34 @@
 ---
-title: Viser Bonn
-emoji: 🏆
-colorFrom: indigo
-colorTo: gray
-sdk: gradio
-sdk_version: 5.25.2
-app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Viser Gradio Embed
+emoji: 🚀
+colorFrom: blue
+colorTo: pink
+sdk: docker
+app_port: 7860
 pinned: false
 ---
+# Viser + Gradio
+Demo for integrating [viser](https://github.com/nerfstudio-project/viser) 3D
+visualizations into a [Gradio](https://www.gradio.app/) application.
+- Uses Gradio's session management to create isolated 3D visualization contexts.
+- Exposes both Gradio and Viser over the same port.
+## Deploying on HuggingFace Spaces
+**[ [Live example](https://huggingface.co/spaces/brentyi/viser-gradio-embed) ]**
+This repository should work out-of-the-box with HF Spaces via Docker.
+- Unlike a vanilla Gradio Space, this is unfortunately not supported by [ZeroGPU](https://huggingface.co/docs/hub/en/spaces-zerogpu).
+## Local Demo
+```bash
+pip install -r requirements.txt
+python app.py
+```
+https://github.com/user-attachments/assets/b94a117a-b9e5-4854-805a-8666941c7816

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import random
+import threading
+import psutil
+import fastapi
+import gradio as gr
+import uvicorn
+from viser_proxy_manager import ViserProxyManager
+from vis_st4rtrack import visualize_st4rtrack, load_trajectory_data, log_memory_usage
+# Global cache for loaded data
+global_data_cache = None
+def check_ram_usage(threshold_percent=90):
+    """Check if RAM usage is above the threshold.
+    Args:
+        threshold_percent: Maximum RAM usage percentage allowed
+    Returns:
+        bool: True if RAM usage is below threshold, False otherwise
+    """
+    ram_percent = psutil.virtual_memory().percent
+    print(f"Current RAM usage: {ram_percent}%")
+    return ram_percent < threshold_percent
+def main() -> None:
+    # Load data once at startup using the function from vis_st4rtrack.py
+    global global_data_cache
+    global_data_cache = load_trajectory_data(use_float16=True, max_frames=120, traj_path="bonn_results", mask_folder="./train")
+    app = fastapi.FastAPI()
+    viser_manager = ViserProxyManager(app)
+    # Create a Gradio interface with title, iframe, and buttons
+    with gr.Blocks(title="Viser Viewer") as demo:
+        # Add the iframe with a border
+        iframe_html = gr.HTML("")
+        status_text = gr.Markdown("")  # Add status text component
+        @demo.load(outputs=[iframe_html, status_text])
+        def start_server(request: gr.Request):
+            assert request.session_hash is not None
+            # Check RAM usage before starting visualization
+            if not check_ram_usage(threshold_percent=100):
+                return """
+                <div style="text-align: center; padding: 20px; background-color: #ffeeee; border-radius: 5px;">
+                    <h2>⚠️ Server is currently under high load</h2>
+                    <p>Please try again later when resources are available.</p>
+                </div>
+                """, "**System Status:** High memory usage detected. Visualization not loaded to prevent server overload."
+            viser_manager.start_server(request.session_hash)
+            # Use the request's base URL if available
+            host = request.headers["host"]
+            # Determine protocol (use HTTPS for HuggingFace Spaces or other secure environments)
+            protocol = (
+                "https"
+                if request.headers.get("x-forwarded-proto") == "https"
+                else "http"
+            )
+            # Add visualization in a separate thread
+            server = viser_manager.get_server(request.session_hash)
+            threading.Thread(
+                target=visualize_st4rtrack,
+                kwargs={
+                    "server": server,
+                    "use_float16": True,
+                    "preloaded_data": global_data_cache,  # Pass the preloaded data
+                    "color_code": "jet",
+                    "blue_rgb": (0.0, 0.149, 0.463),  # #002676
+                    "red_rgb": (0.769, 0.510, 0.055),   # #FDB515
+                    "blend_ratio": 0.7
+                },
+                daemon=True
+            ).start()
+            return f"""
+            <iframe
+                src="{protocol}://{host}/viser/{request.session_hash}/"
+                width="100%"
+                height="500px"
+                frameborder="0"
+                style="display: block;"
+                allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
+                loading="lazy"
+            ></iframe>
+            """, "**System Status:** Visualization loaded successfully."
+        @demo.unload
+        def stop(request: gr.Request):
+            assert request.session_hash is not None
+            viser_manager.stop_server(request.session_hash)
+    gr.mount_gradio_app(app, demo, "/")
+    uvicorn.run(app, host="0.0.0.0", port=7860)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+# git+https://github.com/nerfstudio-project/viser.git
+viser>=0.2.23
+gradio==5.23.1
+fastapi==0.115.11
+uvicorn==0.34.0
+httpx==0.27.2
+websockets==15.0.1
+tyro==0.4.1
+numpy>=1.20.0
+tqdm>=4.62.0
+opencv-python>=4.5.0
+imageio>=2.25.0
+matplotlib>=3.5.0
+pyliblzfse>=0.1.0
+psutil>=5.9.0

vis_st4rtrack.py ADDED Viewed

	@@ -0,0 +1,781 @@

+"""Record3D visualizer
+Parse and stream record3d captures. To get the demo data, see `./assets/download_record3d_dance.sh`.
+"""
+import time
+from pathlib import Path
+import numpy as onp
+import tyro
+import cv2
+from tqdm.auto import tqdm
+import viser
+import viser.extras
+import viser.transforms as tf
+from glob import glob
+import numpy as np
+import imageio.v3 as iio
+import matplotlib.pyplot as plt
+import psutil
+def log_memory_usage(message=""):
+    """Log current memory usage with an optional message."""
+    process = psutil.Process()
+    memory_info = process.memory_info()
+    memory_mb = memory_info.rss / (1024 * 1024)  # Convert to MB
+    print(f"Memory usage {message}: {memory_mb:.2f} MB")
+def load_trajectory_data(traj_path="results", use_float16=True, max_frames=None, mask_folder='./train'):
+    """Load trajectory data from files.
+    Args:
+        traj_path: Path to the directory containing trajectory data
+        use_float16: Whether to convert data to float16 to save memory
+        max_frames: Maximum number of frames to load (None for all)
+        mask_folder: Path to the directory containing mask images
+    Returns:
+        A dictionary containing loaded data
+    """
+    log_memory_usage("before loading data")
+    data_cache = {
+        'traj_3d_head1': None,
+        'traj_3d_head2': None,
+        'conf_mask_head1': None,
+        'conf_mask_head2': None,
+        'masks': None,
+        'raw_video': None,
+        'loaded': False
+    }
+    # Load masks
+    masks_paths = sorted(glob(mask_folder + '/*.jpg'))
+    masks = None
+    if masks_paths:
+        masks = [iio.imread(p) for p in masks_paths]
+        masks = np.stack(masks, axis=0)
+        # Convert masks to binary (0 or 1)
+        masks = (masks < 1).astype(np.float32)
+        masks = masks.sum(axis=-1) > 2  # Combine all channels, True where any channel was 1
+        print(f"Original masks shape: {masks.shape}")
+    else:
+        print("No masks found. Will create default masks when needed.")
+    data_cache['masks'] = masks
+    if Path(traj_path).is_dir():
+        # Find all trajectory files
+        traj_3d_paths_head1 = sorted(glob(traj_path + '/pts3d1_p*.npy'),
+                                   key=lambda x: int(x.split('_p')[-1].split('.')[0]))
+        conf_paths_head1 = sorted(glob(traj_path + '/conf1_p*.npy'),
+                                key=lambda x: int(x.split('_p')[-1].split('.')[0]))
+        traj_3d_paths_head2 = sorted(glob(traj_path + '/pts3d2_p*.npy'),
+                                   key=lambda x: int(x.split('_p')[-1].split('.')[0]))
+        conf_paths_head2 = sorted(glob(traj_path + '/conf2_p*.npy'),
+                                key=lambda x: int(x.split('_p')[-1].split('.')[0]))
+        # Limit number of frames if specified
+        if max_frames is not None:
+            traj_3d_paths_head1 = traj_3d_paths_head1[:max_frames]
+            conf_paths_head1 = conf_paths_head1[:max_frames] if conf_paths_head1 else []
+            traj_3d_paths_head2 = traj_3d_paths_head2[:max_frames]
+            conf_paths_head2 = conf_paths_head2[:max_frames] if conf_paths_head2 else []
+        # Process head1
+        if traj_3d_paths_head1:
+            if use_float16:
+                traj_3d_head1 = onp.stack([onp.load(p).astype(onp.float16) for p in traj_3d_paths_head1], axis=0)
+            else:
+                traj_3d_head1 = onp.stack([onp.load(p) for p in traj_3d_paths_head1], axis=0)
+            log_memory_usage("after loading head1 data")
+            h, w, _ = traj_3d_head1.shape[1:]
+            num_frames = traj_3d_head1.shape[0]
+            # If masks is None, create default masks (all ones)
+            if masks is None:
+                masks = np.ones((num_frames, h, w), dtype=bool)
+                print(f"Created default masks with shape: {masks.shape}")
+                data_cache['masks'] = masks
+            else:
+                # Resize masks to match trajectory dimensions using nearest neighbor interpolation
+                masks_resized = np.zeros((masks.shape[0], h, w), dtype=bool)
+                for i in range(masks.shape[0]):
+                    masks_resized[i] = cv2.resize(
+                        masks[i].astype(np.uint8),
+                        (w, h),
+                        interpolation=cv2.INTER_NEAREST
+                    ).astype(bool)
+                print(f"Resized masks shape: {masks_resized.shape}")
+                data_cache['masks'] = masks_resized
+            # Reshape trajectory data
+            traj_3d_head1 = traj_3d_head1.reshape(traj_3d_head1.shape[0], -1, 6)
+            data_cache['traj_3d_head1'] = traj_3d_head1
+            if conf_paths_head1:
+                conf_head1 = onp.stack([onp.load(p).astype(onp.float16) for p in conf_paths_head1], axis=0)
+                conf_head1 = conf_head1.reshape(conf_head1.shape[0], -1)
+                conf_head1 = conf_head1.mean(axis=0)
+                # repeat the conf_head1 to match the number of frames in the dimension 0
+                conf_head1 = np.tile(conf_head1, (num_frames, 1))
+                # Convert to float32 before calculating percentile to avoid overflow
+                conf_thre = np.percentile(conf_head1.astype(np.float32), 1)  # Default percentile
+                conf_mask_head1 = conf_head1 > conf_thre
+                data_cache['conf_mask_head1'] = conf_mask_head1
+        # Process head2
+        if traj_3d_paths_head2:
+            if use_float16:
+                traj_3d_head2 = onp.stack([onp.load(p).astype(onp.float16) for p in traj_3d_paths_head2], axis=0)
+            else:
+                traj_3d_head2 = onp.stack([onp.load(p) for p in traj_3d_paths_head2], axis=0)
+            log_memory_usage("after loading head2 data")
+            # Store raw video data
+            raw_video = traj_3d_head2[:, :, :, 3:6]  # [num_frames, h, w, 3]
+            data_cache['raw_video'] = raw_video
+            traj_3d_head2 = traj_3d_head2.reshape(traj_3d_head2.shape[0], -1, 6)
+            data_cache['traj_3d_head2'] = traj_3d_head2
+            if conf_paths_head2:
+                conf_head2 = onp.stack([onp.load(p).astype(onp.float16) for p in conf_paths_head2], axis=0)
+                conf_head2 = conf_head2.reshape(conf_head2.shape[0], -1)
+                # set conf thre to be 1 percentile of the conf_head2, for each frame
+                conf_thre = np.percentile(conf_head2.astype(np.float32), 1, axis=1)
+                conf_mask_head2 = conf_head2 > conf_thre[:, None]
+                data_cache['conf_mask_head2'] = conf_mask_head2
+    data_cache['loaded'] = True
+    log_memory_usage("after loading all data")
+    return data_cache
+def visualize_st4rtrack(
+    traj_path: str = "results",
+    up_dir: str = "-z", # should be +z or -z
+    max_frames: int = 100,
+    share: bool = False,
+    point_size: float = 0.005,
+    downsample_factor: int = 3,
+    num_traj_points: int = 100,
+    conf_thre_percentile: float = 1,
+    traj_end_frame: int = 100,
+    traj_start_frame: int = 0,
+    traj_line_width: float = 3.,
+    fixed_length_traj: int = 20,
+    server: viser.ViserServer = None,
+    use_float16: bool = True,
+    preloaded_data: dict = None,  # Add this parameter to accept preloaded data
+    color_code: str = "jet",
+    # Updated hex colors: #002676 for blue and #FDB515 for red/gold
+    blue_rgb: tuple[float, float, float] = (0.0, 0.149, 0.463),  # #002676
+    red_rgb: tuple[float, float, float] = (0.769, 0.510, 0.055),   # #FDB515
+    blend_ratio: float = 0.7,
+    mask_folder: str = None,
+    mid_anchor: bool = False,
+    video_width: int = 320,   # Video display width
+    video_height: int = 180,  # Video display height
+    camera_position: tuple[float, float, float] = (1e-3, 1.5, -0.2),
+) -> None:
+    log_memory_usage("at start of visualization")
+    if server is None:
+        server = viser.ViserServer()
+    if share:
+        server.request_share_url()
+    @server.on_client_connect
+    def _(client: viser.ClientHandle) -> None:
+        client.camera.position = camera_position
+        client.camera.look_at = (0, 0, 0)
+    # Configure the GUI panel size and layout
+    server.gui.configure_theme(
+        control_layout="collapsible",
+        control_width="small",
+        dark_mode=False,
+        show_logo=False,
+        show_share_button=True
+    )
+    # Add video preview to the GUI panel - placed at the top
+    video_preview = server.gui.add_image(
+        np.zeros((video_height, video_width, 3), dtype=np.uint8),  # Initial blank image
+        format="jpeg"
+    )
+    # Use preloaded data if available
+    if preloaded_data and preloaded_data.get('loaded', False):
+        traj_3d_head1 = preloaded_data.get('traj_3d_head1')
+        traj_3d_head2 = preloaded_data.get('traj_3d_head2')
+        conf_mask_head1 = preloaded_data.get('conf_mask_head1')
+        conf_mask_head2 = preloaded_data.get('conf_mask_head2')
+        masks = preloaded_data.get('masks')
+        raw_video = preloaded_data.get('raw_video')
+        print("Using preloaded data!")
+    else:
+        # Load data using the shared function
+        print("No preloaded data available, loading from files...")
+        data = load_trajectory_data(traj_path, use_float16, max_frames, mask_folder)
+        traj_3d_head1 = data.get('traj_3d_head1')
+        traj_3d_head2 = data.get('traj_3d_head2')
+        conf_mask_head1 = data.get('conf_mask_head1')
+        conf_mask_head2 = data.get('conf_mask_head2')
+        masks = data.get('masks')
+        raw_video = data.get('raw_video')
+    def process_video_frame(frame_idx):
+        if raw_video is None:
+            return np.zeros((video_height, video_width, 3), dtype=np.uint8)
+        # Get the original frame
+        raw_frame = raw_video[frame_idx]
+        # Adjust value range to 0-255
+        if raw_frame.max() <= 1.0:
+            frame = (raw_frame * 255).astype(np.uint8)
+        else:
+            frame = raw_frame.astype(np.uint8)
+        # Resize to fit the preview window
+        h, w = frame.shape[:2]
+        # Calculate size while maintaining aspect ratio
+        if h/w > video_height/video_width:  # Height limited
+            new_h = video_height
+            new_w = int(w * (new_h / h))
+        else:  # Width limited
+            new_w = video_width
+            new_h = int(h * (new_w / w))
+        # Resize
+        resized_frame = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
+        # Create a black background
+        display_frame = np.zeros((video_height, video_width, 3), dtype=np.uint8)
+        # Place the resized frame in the center
+        y_offset = (video_height - new_h) // 2
+        x_offset = (video_width - new_w) // 2
+        display_frame[y_offset:y_offset+new_h, x_offset:x_offset+new_w] = resized_frame
+        return display_frame
+    server.scene.set_up_direction(up_dir)
+    print("Setting up visualization!")
+    # Add visualization controls
+    with server.gui.add_folder("Visualization"):
+        gui_show_head1 = server.gui.add_checkbox("Tracking Points", True)
+        gui_show_head2 = server.gui.add_checkbox("Recon Points", True)
+        gui_show_trajectories = server.gui.add_checkbox("Trajectories", True)
+        gui_use_color_tint = server.gui.add_checkbox("Use Color Tint", True)
+    # Process and center point clouds
+    center_point = None
+    if traj_3d_head1 is not None:
+        xyz_head1 = traj_3d_head1[:, :, :3]
+        rgb_head1 = traj_3d_head1[:, :, 3:6]
+        if center_point is None:
+            center_point = onp.mean(xyz_head1, axis=(0, 1), keepdims=True)
+        xyz_head1 -= center_point
+        if rgb_head1.sum(axis=(-1)).max() > 125:
+            rgb_head1 /= 255.0
+    if traj_3d_head2 is not None:
+        xyz_head2 = traj_3d_head2[:, :, :3]
+        rgb_head2 = traj_3d_head2[:, :, 3:6]
+        if center_point is None:
+            center_point = onp.mean(xyz_head2, axis=(0, 1), keepdims=True)
+        xyz_head2 -= center_point
+        if rgb_head2.sum(axis=(-1)).max() > 125:
+            rgb_head2 /= 255.0
+    # Determine number of frames
+    F = max(
+        traj_3d_head1.shape[0] if traj_3d_head1 is not None else 0,
+        traj_3d_head2.shape[0] if traj_3d_head2 is not None else 0
+    )
+    num_frames = min(max_frames, F)
+    traj_end_frame = min(traj_end_frame, num_frames)
+    print(f"Number of frames: {num_frames}")
+    xyz_head1 = xyz_head1[:num_frames]
+    xyz_head2 = xyz_head2[:num_frames]
+    rgb_head1 = rgb_head1[:num_frames]
+    rgb_head2 = rgb_head2[:num_frames]
+    # Add playback UI.
+    with server.gui.add_folder("Playback"):
+        gui_timestep = server.gui.add_slider(
+            "Timestep",
+            min=0,
+            max=num_frames - 1,
+            step=1,
+            initial_value=0,
+            disabled=True,
+        )
+        gui_next_frame = server.gui.add_button("Next Frame", disabled=True)
+        gui_prev_frame = server.gui.add_button("Prev Frame", disabled=True)
+        gui_playing = server.gui.add_checkbox("Playing", True)
+        gui_framerate = server.gui.add_slider(
+            "FPS", min=1, max=60, step=0.1, initial_value=20
+        )
+        gui_framerate_options = server.gui.add_button_group(
+            "FPS options", ("10", "20", "30")
+        )
+        gui_show_all_frames = server.gui.add_checkbox("Show all frames", False)
+        gui_stride = server.gui.add_slider(
+            "Stride",
+            min=1,
+            max=num_frames,
+            step=1,
+            initial_value=5,
+            disabled=True,  # Initially disabled
+        )
+    # Frame step buttons.
+    @gui_next_frame.on_click
+    def _(_) -> None:
+        gui_timestep.value = (gui_timestep.value + 1) % num_frames
+    @gui_prev_frame.on_click
+    def _(_) -> None:
+        gui_timestep.value = (gui_timestep.value - 1) % num_frames
+    # Disable frame controls when we're playing.
+    @gui_playing.on_update
+    def _(_) -> None:
+        gui_timestep.disabled = gui_playing.value or gui_show_all_frames.value
+        gui_next_frame.disabled = gui_playing.value or gui_show_all_frames.value
+        gui_prev_frame.disabled = gui_playing.value or gui_show_all_frames.value
+    # Set the framerate when we click one of the options.
+    @gui_framerate_options.on_click
+    def _(_) -> None:
+        gui_framerate.value = int(gui_framerate_options.value)
+    prev_timestep = gui_timestep.value
+    # Toggle frame visibility when the timestep slider changes.
+    @gui_timestep.on_update
+    def _(_) -> None:
+        nonlocal prev_timestep
+        current_timestep = gui_timestep.value
+        if not gui_show_all_frames.value:
+            with server.atomic():
+                if gui_show_head1.value:
+                    frame_nodes_head1[current_timestep].visible = True
+                    frame_nodes_head1[prev_timestep].visible = False
+                if gui_show_head2.value:
+                    frame_nodes_head2[current_timestep].visible = True
+                    frame_nodes_head2[prev_timestep].visible = False
+        prev_timestep = current_timestep
+        server.flush()  # Optional!
+    # Show or hide all frames based on the checkbox.
+    @gui_show_all_frames.on_update
+    def _(_) -> None:
+        gui_stride.disabled = not gui_show_all_frames.value  # Enable/disable stride slider
+        if gui_show_all_frames.value:
+            # Show frames with stride
+            stride = gui_stride.value
+            with server.atomic():
+                for i, (node1, node2) in enumerate(zip(frame_nodes_head1, frame_nodes_head2)):
+                    node1.visible = gui_show_head1.value and (i % stride == 0)
+                    node2.visible = gui_show_head2.value and (i % stride == 0)
+            # Disable playback controls
+            gui_playing.disabled = True
+            gui_timestep.disabled = True
+            gui_next_frame.disabled = True
+            gui_prev_frame.disabled = True
+        else:
+            # Show only the current frame
+            current_timestep = gui_timestep.value
+            with server.atomic():
+                for i, (node1, node2) in enumerate(zip(frame_nodes_head1, frame_nodes_head2)):
+                    node1.visible = gui_show_head1.value and (i == current_timestep)
+                    node2.visible = gui_show_head2.value and (i == current_timestep)
+            # Re-enable playback controls
+            gui_playing.disabled = False
+            gui_timestep.disabled = gui_playing.value
+            gui_next_frame.disabled = gui_playing.value
+            gui_prev_frame.disabled = gui_playing.value
+    # Update frame visibility when the stride changes.
+    @gui_stride.on_update
+    def _(_) -> None:
+        if gui_show_all_frames.value:
+            # Update frame visibility based on new stride
+            stride = gui_stride.value
+            with server.atomic():
+                for i, (node1, node2) in enumerate(zip(frame_nodes_head1, frame_nodes_head2)):
+                    node1.visible = gui_show_head1.value and (i % stride == 0)
+                    node2.visible = gui_show_head2.value and (i % stride == 0)
+    # Load in frames.
+    server.scene.add_frame(
+        "/frames",
+        wxyz=tf.SO3.exp(onp.array([onp.pi / 2.0, 0.0, 0.0])).wxyz,
+        position=(0, 0, 0),
+        show_axes=False,
+    )
+    frame_nodes_head1: list[viser.FrameHandle] = []
+    frame_nodes_head2: list[viser.FrameHandle] = []
+    # Extract RGB components for tinting
+    blue_r, blue_g, blue_b = blue_rgb
+    red_r, red_g, red_b = red_rgb
+    # Create frames for each timestep
+    frame_nodes_head1 = []
+    frame_nodes_head2 = []
+    for i in tqdm(range(num_frames)):
+        # Process head1
+        if traj_3d_head1 is not None:
+            frame_nodes_head1.append(server.scene.add_frame(f"/frames/t{i}/head1", show_axes=False))
+            position = xyz_head1[i]
+            color = rgb_head1[i]
+            if conf_mask_head1 is not None:
+                position = position[conf_mask_head1[i]]
+                color = color[conf_mask_head1[i]]
+            # Add point cloud for head1 with optional blue tint
+            color_head1 = color.copy()
+            if gui_use_color_tint.value:
+                color_head1 *= blend_ratio
+                color_head1[:, 0] = onp.clip(color_head1[:, 0] + blue_r * (1 - blend_ratio), 0, 1)  # R
+                color_head1[:, 1] = onp.clip(color_head1[:, 1] + blue_g * (1 - blend_ratio), 0, 1)  # G
+                color_head1[:, 2] = onp.clip(color_head1[:, 2] + blue_b * (1 - blend_ratio), 0, 1)  # B
+            server.scene.add_point_cloud(
+                name=f"/frames/t{i}/head1/point_cloud",
+                points=position[::downsample_factor],
+                colors=color_head1[::downsample_factor],
+                point_size=point_size,
+                point_shape="rounded",
+            )
+        # Process head2
+        if traj_3d_head2 is not None:
+            frame_nodes_head2.append(server.scene.add_frame(f"/frames/t{i}/head2", show_axes=False))
+            position = xyz_head2[i]
+            color = rgb_head2[i]
+            if conf_mask_head2 is not None:
+                position = position[conf_mask_head2[i]]
+                color = color[conf_mask_head2[i]]
+            # Add point cloud for head2 with optional red tint
+            color_head2 = color.copy()
+            if gui_use_color_tint.value:
+                color_head2 *= blend_ratio
+                color_head2[:, 0] = onp.clip(color_head2[:, 0] + red_r * (1 - blend_ratio), 0, 1)  # R
+                color_head2[:, 1] = onp.clip(color_head2[:, 1] + red_g * (1 - blend_ratio), 0, 1)  # G
+                color_head2[:, 2] = onp.clip(color_head2[:, 2] + red_b * (1 - blend_ratio), 0, 1)  # B
+            server.scene.add_point_cloud(
+                name=f"/frames/t{i}/head2/point_cloud",
+                points=position[::downsample_factor],
+                colors=color_head2[::downsample_factor],
+                point_size=point_size,
+                point_shape="rounded",
+            )
+    # Update visibility based on checkboxes
+    @gui_show_head1.on_update
+    def _(_) -> None:
+        with server.atomic():
+            for frame_node in frame_nodes_head1:
+                frame_node.visible = gui_show_head1.value and (
+                    gui_show_all_frames.value
+                    or (not gui_show_all_frames.value )
+                )
+    @gui_show_head2.on_update
+    def _(_) -> None:
+        with server.atomic():
+            for frame_node in frame_nodes_head2:
+                frame_node.visible = gui_show_head2.value and (
+                    gui_show_all_frames.value
+                    or (not gui_show_all_frames.value )
+                )
+    # Initial visibility
+    for i, (node1, node2) in enumerate(zip(frame_nodes_head1, frame_nodes_head2)):
+        if gui_show_all_frames.value:
+            node1.visible = gui_show_head1.value and (i % gui_stride.value == 0)
+            node2.visible = gui_show_head2.value and (i % gui_stride.value == 0)
+        else:
+            node1.visible = gui_show_head1.value and (i == gui_timestep.value)
+            node2.visible = gui_show_head2.value and (i == gui_timestep.value)
+    # Process and visualize trajectories for head1
+    if traj_3d_head1 is not None:
+        # Get points over time
+        xyz_head1_centered = xyz_head1.copy()
+        # Select points to visualize
+        num_points = xyz_head1.shape[1]
+        points_to_visualize = min(num_points, num_traj_points)
+        # Get the mask for the first frame and reshape it to match point cloud dimensions
+        if mid_anchor:
+            first_frame_mask = masks[num_frames//2].reshape(-1)
+        else:
+            first_frame_mask = masks[0].reshape(-1) #[#points, h]
+        # Calculate trajectory lengths for each point
+        trajectories = xyz_head1_centered[traj_start_frame:traj_end_frame]  # Shape: (num_frames, num_points, 3)
+        traj_diffs = np.diff(trajectories, axis=0)  # Differences between consecutive frames
+        traj_lengths = np.sum(np.sqrt(np.sum(traj_diffs**2, axis=-1)), axis=0)  # Sum of distances for each point
+        # Get points that are within the mask
+        valid_indices = np.where(first_frame_mask)[0]
+        if len(valid_indices) > 0:
+            # Calculate average trajectory length for masked points
+            masked_traj_lengths = traj_lengths[valid_indices]
+            avg_traj_length = np.mean(masked_traj_lengths)
+            if mask_folder is not None:
+                # do not filter points by trajectory length
+                long_traj_indices = valid_indices
+            else:
+                # Filter points by trajectory length
+                long_traj_indices = valid_indices[masked_traj_lengths >= avg_traj_length]
+            # Randomly sample from the filtered points
+            if len(long_traj_indices) > 0:
+                # Random sampling without replacement
+                selected_indices = np.random.choice(
+                    len(long_traj_indices),
+                    min(points_to_visualize, len(long_traj_indices)),
+                    replace=False
+                )
+                # Get the actual indices in their original order
+                valid_point_indices = long_traj_indices[np.sort(selected_indices)]
+            else:
+                valid_point_indices = np.array([])
+        else:
+            valid_point_indices = np.array([])
+        if len(valid_point_indices) > 0:
+            # Get trajectories for all valid points
+            trajectories = xyz_head1_centered[traj_start_frame:traj_end_frame, valid_point_indices]
+            N_point = trajectories.shape[1]
+            if color_code == "rainbow":
+                point_colors = plt.cm.rainbow(np.linspace(0, 1, N_point))[:, :3]
+            elif color_code == "jet":
+                point_colors = plt.cm.jet(np.linspace(0, 1, N_point))[:, :3]
+            # Modify the loop to handle frames less than fixed_length_traj
+            for i in range(traj_end_frame - traj_start_frame):
+                # Calculate the actual trajectory length for this frame
+                actual_length = min(fixed_length_traj, i + 1)
+                if actual_length > 1:  # Need at least 2 points to form a line
+                    # Get the appropriate slice of trajectory data
+                    start_idx = max(0, i - actual_length + 1)
+                    end_idx = i + 1
+                    # Create line segments between consecutive frames
+                    traj_slice = trajectories[start_idx:end_idx]
+                    line_points = np.stack([traj_slice[:-1], traj_slice[1:]], axis=2)
+                    line_points = line_points.reshape(-1, 2, 3)
+                    # Create corresponding colors
+                    line_colors = np.tile(point_colors, (actual_length-1, 1))
+                    line_colors = np.stack([line_colors, line_colors], axis=1)
+                    # Add line segments
+                    server.scene.add_line_segments(
+                        name=f"/frames/t{i+traj_start_frame}/head1/trajectory",
+                        points=line_points,
+                        colors=line_colors,
+                        line_width=traj_line_width,
+                        visible=gui_show_trajectories.value
+                    )
+    # Add trajectory controls functionality
+    @gui_show_trajectories.on_update
+    def _(_) -> None:
+        with server.atomic():
+            # Remove all existing trajectories
+            for i in range(num_frames):
+                try:
+                    server.scene.remove_by_name(f"/frames/t{i}/head1/trajectory")
+                except KeyError:
+                    pass
+            # Create new trajectories if enabled
+            if gui_show_trajectories.value and traj_3d_head1 is not None:
+                # Get the mask for the last frame and reshape it
+                last_frame_mask = masks[traj_end_frame-1].reshape(-1)
+                # Calculate trajectory lengths
+                trajectories = xyz_head1_centered[traj_start_frame:traj_end_frame]
+                traj_diffs = np.diff(trajectories, axis=0)
+                traj_lengths = np.sum(np.sqrt(np.sum(traj_diffs**2, axis=-1)), axis=0)
+                # Get points that are within the mask
+                valid_indices = np.where(last_frame_mask)[0]
+                if len(valid_indices) > 0:
+                    # Filter by trajectory length
+                    masked_traj_lengths = traj_lengths[valid_indices]
+                    avg_traj_length = np.mean(masked_traj_lengths)
+                    long_traj_indices = valid_indices[masked_traj_lengths >= avg_traj_length]
+                    # Randomly sample from the filtered points
+                    if len(long_traj_indices) > 0:
+                        # Random sampling without replacement
+                        selected_indices = np.random.choice(
+                            len(long_traj_indices),
+                            min(points_to_visualize, len(long_traj_indices)),
+                            replace=False
+                        )
+                        # Get the actual indices in their original order
+                        valid_point_indices = long_traj_indices[np.sort(selected_indices)]
+                    else:
+                        valid_point_indices = np.array([])
+                else:
+                    valid_point_indices = np.array([])
+                if len(valid_point_indices) > 0:
+                    # Get trajectories for all valid points
+                    trajectories = xyz_head1_centered[traj_start_frame:traj_end_frame, valid_point_indices]
+                    N_point = trajectories.shape[1]
+                    if color_code == "rainbow":
+                        point_colors = plt.cm.rainbow(np.linspace(0, 1, N_point))[:, :3]
+                    elif color_code == "jet":
+                        point_colors = plt.cm.jet(np.linspace(0, 1, N_point))[:, :3]
+                    # Modify the loop to handle frames less than fixed_length_traj
+                    for i in range(traj_end_frame - traj_start_frame):
+                        # Calculate the actual trajectory length for this frame
+                        actual_length = min(fixed_length_traj, i + 1)
+                        if actual_length > 1:  # Need at least 2 points to form a line
+                            # Get the appropriate slice of trajectory data
+                            start_idx = max(0, i - actual_length + 1)
+                            end_idx = i + 1
+                            # Create line segments between consecutive frames
+                            traj_slice = trajectories[start_idx:end_idx]
+                            line_points = np.stack([traj_slice[:-1], traj_slice[1:]], axis=2)
+                            line_points = line_points.reshape(-1, 2, 3)
+                            # Create corresponding colors
+                            line_colors = np.tile(point_colors, (actual_length-1, 1))
+                            line_colors = np.stack([line_colors, line_colors], axis=1)
+                            # Add line segments
+                            server.scene.add_line_segments(
+                                name=f"/frames/t{i+traj_start_frame}/head1/trajectory",
+                                points=line_points,
+                                colors=line_colors,
+                                line_width=traj_line_width,
+                                visible=True
+                            )
+    # Update color tinting when the checkbox changes
+    @gui_use_color_tint.on_update
+    def _(_) -> None:
+        with server.atomic():
+            for i in range(num_frames):
+                # Update head1 point cloud
+                if traj_3d_head1 is not None:
+                    position = xyz_head1[i]
+                    color = rgb_head1[i]
+                    if conf_mask_head1 is not None:
+                        position = position[conf_mask_head1[i]]
+                        color = color[conf_mask_head1[i]]
+                    color_head1 = color.copy()
+                    if gui_use_color_tint.value:
+                        color_head1 *= blend_ratio
+                        color_head1[:, 0] = onp.clip(color_head1[:, 0] + blue_r * (1 - blend_ratio), 0, 1)  # R
+                        color_head1[:, 1] = onp.clip(color_head1[:, 1] + blue_g * (1 - blend_ratio), 0, 1)  # G
+                        color_head1[:, 2] = onp.clip(color_head1[:, 2] + blue_b * (1 - blend_ratio), 0, 1)  # B
+                    server.scene.remove_by_name(f"/frames/t{i}/head1/point_cloud")
+                    server.scene.add_point_cloud(
+                        name=f"/frames/t{i}/head1/point_cloud",
+                        points=position[::downsample_factor],
+                        colors=color_head1[::downsample_factor],
+                        point_size=point_size,
+                        point_shape="rounded",
+                    )
+                # Update head2 point cloud
+                if traj_3d_head2 is not None:
+                    position = xyz_head2[i]
+                    color = rgb_head2[i]
+                    if conf_mask_head2 is not None:
+                        position = position[conf_mask_head2[i]]
+                        color = color[conf_mask_head2[i]]
+                    color_head2 = color.copy()
+                    if gui_use_color_tint.value:
+                        color_head2 *= blend_ratio
+                        color_head2[:, 0] = onp.clip(color_head2[:, 0] + red_r * (1 - blend_ratio), 0, 1)  # R
+                        color_head2[:, 1] = onp.clip(color_head2[:, 1] + red_g * (1 - blend_ratio), 0, 1)  # G
+                        color_head2[:, 2] = onp.clip(color_head2[:, 2] + red_b * (1 - blend_ratio), 0, 1)  # B
+                    server.scene.remove_by_name(f"/frames/t{i}/head2/point_cloud")
+                    server.scene.add_point_cloud(
+                        name=f"/frames/t{i}/head2/point_cloud",
+                        points=position[::downsample_factor],
+                        colors=color_head2[::downsample_factor],
+                        point_size=point_size,
+                        point_shape="rounded",
+                    )
+    # Initialize video preview
+    if raw_video is not None:
+        video_preview.image = process_video_frame(0)
+    # Update video preview when timestep changes
+    @gui_timestep.on_update
+    def _(_) -> None:
+        current_timestep = gui_timestep.value
+        if raw_video is not None:
+            video_preview.image = process_video_frame(current_timestep)
+    # Playback update loop.
+    log_memory_usage("before starting playback loop")
+    prev_timestep = gui_timestep.value
+    while True:
+        current_timestep = gui_timestep.value
+        # If timestep changes, update frame visibility
+        if current_timestep != prev_timestep:
+            with server.atomic():
+                # ... existing code ...
+                # Update video preview
+                if raw_video is not None:
+                    video_preview.image = process_video_frame(current_timestep)
+        # Update in playback mode
+        if gui_playing.value and not gui_show_all_frames.value:
+            gui_timestep.value = (gui_timestep.value + 1) % num_frames
+            # Update video preview in playback mode
+            if raw_video is not None:
+                video_preview.image = process_video_frame(gui_timestep.value)
+        time.sleep(1.0 / gui_framerate.value)
+if __name__ == "__main__":
+    tyro.cli(visualize_st4rtrack)

viser_proxy_manager.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import asyncio
+import httpx
+import viser
+import websockets
+from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect
+from fastapi.responses import Response
+class ViserProxyManager:
+    """Manages Viser server instances for Gradio applications.
+    This class handles the creation, retrieval, and cleanup of Viser server instances,
+    as well as proxying HTTP and WebSocket requests to the appropriate Viser server.
+    Args:
+        app: The FastAPI application to which the proxy routes will be added.
+        min_local_port: Minimum local port number to use for Viser servers. Defaults to 8000.
+            These ports are used only for internal communication and don't need to be publicly exposed.
+        max_local_port: Maximum local port number to use for Viser servers. Defaults to 9000.
+            These ports are used only for internal communication and don't need to be publicly exposed.
+        max_message_size: Maximum WebSocket message size in bytes. Defaults to 100MB.
+    """
+    def __init__(
+        self,
+        app: FastAPI,
+        min_local_port: int = 8000,
+        max_local_port: int = 9000,
+        max_message_size: int = 300 * 1024 * 1024,  # 300MB default
+    ) -> None:
+        self._min_port = min_local_port
+        self._max_port = max_local_port
+        self._max_message_size = max_message_size
+        self._server_from_session_hash: dict[str, viser.ViserServer] = {}
+        self._last_port = self._min_port - 1  # Track last port tried
+        @app.get("/viser/{server_id}/{proxy_path:path}")
+        async def proxy(request: Request, server_id: str, proxy_path: str):
+            """Proxy HTTP requests to the appropriate Viser server."""
+            # Get the local port for this server ID
+            server = self._server_from_session_hash.get(server_id)
+            if server is None:
+                return Response(content="Server not found", status_code=404)
+            # Build target URL
+            if proxy_path:
+                path_suffix = f"/{proxy_path}"
+            else:
+                path_suffix = "/"
+            target_url = f"http://127.0.0.1:{server.get_port()}{path_suffix}"
+            if request.url.query:
+                target_url += f"?{request.url.query}"
+            # Forward request
+            async with httpx.AsyncClient() as client:
+                # Forward the original headers, but remove any problematic ones
+                headers = dict(request.headers)
+                headers.pop("host", None)  # Remove host header to avoid conflicts
+                headers["accept-encoding"] = "identity"  # Disable compression
+                proxied_req = client.build_request(
+                    method=request.method,
+                    url=target_url,
+                    headers=headers,
+                    content=await request.body(),
+                )
+                proxied_resp = await client.send(proxied_req, stream=True)
+                # Get response headers
+                response_headers = dict(proxied_resp.headers)
+                # Check if this is an HTML response
+                content = await proxied_resp.aread()
+                return Response(
+                    content=content,
+                    status_code=proxied_resp.status_code,
+                    headers=response_headers,
+                )
+        # WebSocket Proxy
+        @app.websocket("/viser/{server_id}")
+        async def websocket_proxy(websocket: WebSocket, server_id: str):
+            """Proxy WebSocket connections to the appropriate Viser server."""
+            try:
+                await websocket.accept()
+                server = self._server_from_session_hash.get(server_id)
+                if server is None:
+                    await websocket.close(code=1008, reason="Not Found")
+                    return
+                # Determine target WebSocket URL
+                target_ws_url = f"ws://127.0.0.1:{server.get_port()}"
+                if not target_ws_url:
+                    await websocket.close(code=1008, reason="Not Found")
+                    return
+                try:
+                    # Connect to the target WebSocket with increased message size and timeout
+                    async with websockets.connect(
+                        target_ws_url,
+                        max_size=self._max_message_size,
+                        ping_interval=30,  # Send ping every 30 seconds
+                        ping_timeout=10,   # Wait 10 seconds for pong response
+                        close_timeout=5,   # Wait 5 seconds for close handshake
+                    ) as ws_target:
+                        # Create tasks for bidirectional communication
+                        async def forward_to_target():
+                            """Forward messages from the client to the target WebSocket."""
+                            try:
+                                while True:
+                                    data = await websocket.receive_bytes()
+                                    await ws_target.send(data, text=False)
+                            except WebSocketDisconnect:
+                                try:
+                                    await ws_target.close()
+                                except RuntimeError:
+                                    pass
+                        async def forward_from_target():
+                            """Forward messages from the target WebSocket to the client."""
+                            try:
+                                while True:
+                                    data = await ws_target.recv(decode=False)
+                                    await websocket.send_bytes(data)
+                            except websockets.exceptions.ConnectionClosed:
+                                try:
+                                    await websocket.close()
+                                except RuntimeError:
+                                    pass
+                        # Run both forwarding tasks concurrently
+                        forward_task = asyncio.create_task(forward_to_target())
+                        backward_task = asyncio.create_task(forward_from_target())
+                        # Wait for either task to complete (which means a connection was closed)
+                        done, pending = await asyncio.wait(
+                            [forward_task, backward_task],
+                            return_when=asyncio.FIRST_COMPLETED,
+                        )
+                        # Cancel the remaining task
+                        for task in pending:
+                            task.cancel()
+                except websockets.exceptions.ConnectionClosedError as e:
+                    print(f"WebSocket connection closed with error: {e}")
+                    await websocket.close(code=1011, reason="Connection to target closed")
+            except Exception as e:
+                print(f"WebSocket proxy error: {e}")
+                try:
+                    await websocket.close(code=1011, reason=str(e)[:120])  # Limit reason length
+                except:
+                    pass  # Already closed
+    def start_server(self, server_id: str) -> viser.ViserServer:
+        """Start a new Viser server and associate it with the given server ID.
+        Finds an available port within the configured min_local_port and max_local_port range.
+        These ports are used only for internal communication and don't need to be publicly exposed.
+        Args:
+            server_id: The unique identifier to associate with the new server.
+        Returns:
+            The newly created Viser server instance.
+        Raises:
+            RuntimeError: If no free ports are available in the configured range.
+        """
+        import socket
+        # Start searching from the last port + 1 (with wraparound)
+        port_range_size = self._max_port - self._min_port + 1
+        start_port = (
+            (self._last_port + 1 - self._min_port) % port_range_size
+        ) + self._min_port
+        # Try each port once
+        for offset in range(port_range_size):
+            port = (
+                (start_port - self._min_port + offset) % port_range_size
+            ) + self._min_port
+            try:
+                # Check if port is available by attempting to bind to it
+                with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+                    s.bind(("127.0.0.1", port))
+                    # Port is available, create server with this port
+                    server = viser.ViserServer(port=port)
+                    self._server_from_session_hash[server_id] = server
+                    self._last_port = port
+                    return server
+            except OSError:
+                # Port is in use, try the next one
+                continue
+        # If we get here, no ports were available
+        raise RuntimeError(
+            f"No available local ports in range {self._min_port}-{self._max_port}"
+        )
+    def get_server(self, server_id: str) -> viser.ViserServer:
+        """Retrieve a Viser server instance by its ID.
+        Args:
+            server_id: The unique identifier of the server to retrieve.
+        Returns:
+            The Viser server instance associated with the given ID.
+        """
+        return self._server_from_session_hash[server_id]
+    def stop_server(self, server_id: str) -> None:
+        """Stop a Viser server and remove it from the manager.
+        Args:
+            server_id: The unique identifier of the server to stop.
+        """
+        self._server_from_session_hash.pop(server_id).stop()