Spaces:

tokenid
/

ID-Pose

Running on Zero

App Files Files Community

tokenid commited on May 15, 2024

Commit

b5dfbe4

1 Parent(s): 15de7a2

lazy cache

Browse files

Files changed (2) hide show

app.py +66 -36
src/pose_funcs.py +3 -3

app.py CHANGED Viewed

@@ -18,6 +18,7 @@ from src.pose_estimation import load_model_from_config, estimate_poses, estimate
 from src.pose_funcs import find_optimal_poses
 from src.utils import spherical_to_cartesian, elu_to_c2w
 if torch.cuda.is_available():
     _device_ = 'cuda:0'
 else:
@@ -139,12 +140,10 @@ def image_to_tensor(img, width=256, height=256):
 @spaces.GPU(duration=110)
-def run_pose_exploration(cam_vis, image1, image2, probe_bsz, adj_bsz, adj_iters, seed_value):
     seed_everything(seed_value)
-    cam_vis.set_images([np.asarray(image1, dtype=np.uint8), np.asarray(image2, dtype=np.uint8)])
     image1 = image_to_tensor(image1).to(_device_)
     image2 = image_to_tensor(image2).to(_device_)
@@ -186,31 +185,20 @@ def run_pose_exploration(cam_vis, image1, image2, probe_bsz, adj_bsz, adj_iters,
     if anchor_polar is None:
         anchor_polar = np.pi/2
-    xyz0 = spherical_to_cartesian((anchor_polar, 0., 4.))
-    c2w0 = elu_to_c2w(xyz0, np.zeros(3), np.array([0., 0., 1.]))
-    xyz1 = spherical_to_cartesian((theta + anchor_polar, 0. + azimuth, 4. + radius))
-    c2w1 = elu_to_c2w(xyz1, np.zeros(3), np.array([0., 0., 1.]))
-    cam_vis._poses = [c2w0, c2w1]
-    fig = cam_vis.update_figure(5, base_radius=-1.2, font_size=16, show_background=True, show_grid=True, show_ticklabels=True)
     explored_sph = (theta, azimuth, radius)
-    return anchor_polar, explored_sph, fig, gr.update(interactive=True)
 @spaces.GPU(duration=110)
-def run_pose_refinement(cam_vis, image1, image2, anchor_polar, explored_sph, refine_iters, seed_value):
     seed_everything(seed_value)
-    cam_vis.set_images([np.asarray(image1, dtype=np.uint8), np.asarray(image2, dtype=np.uint8)])
-    image1 = image_to_tensor(image1).to(_device_)
-    image2 = image_to_tensor(image2).to(_device_)
-    images = [image1, image2]
     images = [ img.permute(0, 2, 3, 1) for img in images ]
     out_poses, _, loss = find_optimal_poses(
@@ -234,10 +222,39 @@ def run_pose_refinement(cam_vis, image1, image2, anchor_polar, explored_sph, ref
     xyz1 = spherical_to_cartesian((theta + anchor_polar, 0. + azimuth, 4. + radius))
     c2w1 = elu_to_c2w(xyz1, np.zeros(3), np.array([0., 0., 1.]))
-    cam_vis._poses = [c2w0, c2w1]
     fig = cam_vis.update_figure(5, base_radius=-1.2, font_size=16, show_background=True, show_grid=True, show_ticklabels=True)
-    return final_sph, fig
 _HEADER_ = '''
@@ -267,6 +284,9 @@ def run_demo():
     demo = gr.Blocks(title='ID-Pose: Sparse-view Camera Pose Estimation By Inverting Diffusion Models')
     with demo:
         gr.Markdown(_HEADER_)
         with gr.Row(variant='panel'):
@@ -327,8 +347,10 @@ def run_demo():
                                 ['data/gradio_demo/circo_0.png', 'data/gradio_demo/circo_1.png'],
                             ],
                             inputs=[input_image1, input_image2],
                             label='Examples (Captured)',
-                            cache_examples=False,
                             examples_per_page=5
                         )
@@ -342,8 +364,10 @@ def run_demo():
                                 ['data/gradio_demo/christ_0.png', 'data/gradio_demo/christ_1.png'],
                             ],
                             inputs=[input_image1, input_image2],
                             label='Examples (Internet)',
-                            cache_examples=False,
                             examples_per_page=5
                         )
@@ -357,31 +381,37 @@ def run_demo():
                                 ['data/gradio_demo/ride_horse_0.png', 'data/gradio_demo/ride_horse_1.png'],
                             ],
                             inputs=[input_image1, input_image2],
                             label='Examples (Generated)',
-                            cache_examples=False,
                             examples_per_page=5
                         )
-        cam_vis = CameraVisualizer([np.eye(4), np.eye(4)], ['Image 1', 'Image 2'], ['red', 'blue'])
-        explored_sph = gr.State()
-        anchor_polar = gr.State()
-        refined_sph = gr.State()
         run_btn.click(
             fn=run_preprocess,
             inputs=[input_image1, input_image2, preprocess_chk, seed_value],
             outputs=[processed_image1, processed_image2],
         ).success(
-            fn=partial(run_pose_exploration, cam_vis),
-            inputs=[processed_image1, processed_image2, probe_bsz, adj_bsz, adj_iters, seed_value],
-            outputs=[anchor_polar, explored_sph, vis_output, refine_btn]
         )
         refine_btn.click(
-            fn=partial(run_pose_refinement, cam_vis),
-            inputs=[processed_image1, processed_image2, anchor_polar, explored_sph, refine_iters, seed_value],
-            outputs=[refined_sph, vis_output]
         )
     demo.launch()

 from src.pose_funcs import find_optimal_poses
 from src.utils import spherical_to_cartesian, elu_to_c2w
 if torch.cuda.is_available():
     _device_ = 'cuda:0'
 else:
 @spaces.GPU(duration=110)
+def run_pose_exploration(image1, image2, probe_bsz, adj_bsz, adj_iters, seed_value):
     seed_everything(seed_value)
     image1 = image_to_tensor(image1).to(_device_)
     image2 = image_to_tensor(image2).to(_device_)
     if anchor_polar is None:
         anchor_polar = np.pi/2
     explored_sph = (theta, azimuth, radius)
+    return anchor_polar, explored_sph
 @spaces.GPU(duration=110)
+def run_pose_refinement(image1, image2, est_result, refine_iters, seed_value):
     seed_everything(seed_value)
+    anchor_polar = est_result[0]
+    explored_sph = est_result[1]
+    images = [image_to_tensor(image1).to(_device_), image_to_tensor(image2).to(_device_)]
     images = [ img.permute(0, 2, 3, 1) for img in images ]
     out_poses, _, loss = find_optimal_poses(
     xyz1 = spherical_to_cartesian((theta + anchor_polar, 0. + azimuth, 4. + radius))
     c2w1 = elu_to_c2w(xyz1, np.zeros(3), np.array([0., 0., 1.]))
+    cam_vis = CameraVisualizer([c2w0, c2w1], ['Image 1', 'Image 2'], ['red', 'blue'], images=[np.asarray(image1, dtype=np.uint8), np.asarray(image2, dtype=np.uint8)])
     fig = cam_vis.update_figure(5, base_radius=-1.2, font_size=16, show_background=True, show_grid=True, show_ticklabels=True)
+    return (anchor_polar, final_sph), fig
+def run_example(image1, image2):
+    image1, image2 = run_preprocess(image1, image2, True, 0)
+    anchor_polar, explored_sph = run_pose_exploration(image1, image2, 16, 4, 10, 0)
+    return (anchor_polar, explored_sph), image1, image2
+def run_or_visualize(image1, image2, probe_bsz, adj_bsz, adj_iters, seed_value, est_result):
+    if est_result is None:
+        anchor_polar, explored_sph = run_pose_exploration(image1, image2, probe_bsz, adj_bsz, adj_iters, seed_value)
+    else:
+        anchor_polar = est_result[0]
+        explored_sph = est_result[1]
+        print('Using cache result.')
+    xyz0 = spherical_to_cartesian((anchor_polar, 0., 4.))
+    c2w0 = elu_to_c2w(xyz0, np.zeros(3), np.array([0., 0., 1.]))
+    xyz1 = spherical_to_cartesian((explored_sph[0] + anchor_polar, 0. + explored_sph[1], 4. + explored_sph[2]))
+    c2w1 = elu_to_c2w(xyz1, np.zeros(3), np.array([0., 0., 1.]))
+    cam_vis = CameraVisualizer([c2w0, c2w1], ['Image 1', 'Image 2'], ['red', 'blue'], images=[np.asarray(image1, dtype=np.uint8), np.asarray(image2, dtype=np.uint8)])
+    fig = cam_vis.update_figure(5, base_radius=-1.2, font_size=16, show_background=True, show_grid=True, show_ticklabels=True)
+    return (anchor_polar, explored_sph), fig, gr.update(interactive=True)
 _HEADER_ = '''
     demo = gr.Blocks(title='ID-Pose: Sparse-view Camera Pose Estimation By Inverting Diffusion Models')
     with demo:
+        est_result = gr.JSON(visible=False)
         gr.Markdown(_HEADER_)
         with gr.Row(variant='panel'):
                                 ['data/gradio_demo/circo_0.png', 'data/gradio_demo/circo_1.png'],
                             ],
                             inputs=[input_image1, input_image2],
+                            fn=run_example,
+                            outputs=[est_result, processed_image1, processed_image2],
                             label='Examples (Captured)',
+                            cache_examples='lazy',
                             examples_per_page=5
                         )
                                 ['data/gradio_demo/christ_0.png', 'data/gradio_demo/christ_1.png'],
                             ],
                             inputs=[input_image1, input_image2],
+                            fn=run_example,
+                            outputs=[est_result, processed_image1, processed_image2],
                             label='Examples (Internet)',
+                            cache_examples='lazy',
                             examples_per_page=5
                         )
                                 ['data/gradio_demo/ride_horse_0.png', 'data/gradio_demo/ride_horse_1.png'],
                             ],
                             inputs=[input_image1, input_image2],
+                            fn=run_example,
+                            outputs=[est_result, processed_image1, processed_image2],
                             label='Examples (Generated)',
+                            cache_examples='lazy',
                             examples_per_page=5
                         )
         run_btn.click(
             fn=run_preprocess,
             inputs=[input_image1, input_image2, preprocess_chk, seed_value],
             outputs=[processed_image1, processed_image2],
         ).success(
+            fn=run_or_visualize,
+            inputs=[processed_image1, processed_image2, probe_bsz, adj_bsz, adj_iters, seed_value, est_result],
+            outputs=[est_result, vis_output, refine_btn]
         )
         refine_btn.click(
+            fn=run_pose_refinement,
+            inputs=[processed_image1, processed_image2, est_result, refine_iters, seed_value],
+            outputs=[est_result, vis_output]
+        )
+        input_image1.clear(
+            fn=lambda: None,
+            outputs=[est_result]
+        )
+        input_image2.clear(
+            fn=lambda: None,
+            outputs=[est_result]
         )
     demo.launch()

src/pose_funcs.py CHANGED Viewed

@@ -101,9 +101,9 @@ def add_pose(pose1, pose2):
 def create_pose_params(pose, device):
-    theta = torch.tensor([pose[0]], requires_grad=True, device=device)
-    azimuth = torch.tensor([pose[1]], requires_grad=True, device=device)
-    radius = torch.tensor([pose[2]], requires_grad=True, device=device)
     return [theta, azimuth, radius]

 def create_pose_params(pose, device):
+    theta = torch.tensor([float(pose[0])], requires_grad=True, device=device)
+    azimuth = torch.tensor([float(pose[1])], requires_grad=True, device=device)
+    radius = torch.tensor([float(pose[2])], requires_grad=True, device=device)
     return [theta, azimuth, radius]