Spaces:
Running
on
Zero
Running
on
Zero
tokenid
commited on
Commit
·
b5dfbe4
1
Parent(s):
15de7a2
lazy cache
Browse files- app.py +66 -36
- src/pose_funcs.py +3 -3
app.py
CHANGED
|
@@ -18,6 +18,7 @@ from src.pose_estimation import load_model_from_config, estimate_poses, estimate
|
|
| 18 |
from src.pose_funcs import find_optimal_poses
|
| 19 |
from src.utils import spherical_to_cartesian, elu_to_c2w
|
| 20 |
|
|
|
|
| 21 |
if torch.cuda.is_available():
|
| 22 |
_device_ = 'cuda:0'
|
| 23 |
else:
|
|
@@ -139,12 +140,10 @@ def image_to_tensor(img, width=256, height=256):
|
|
| 139 |
|
| 140 |
|
| 141 |
@spaces.GPU(duration=110)
|
| 142 |
-
def run_pose_exploration(
|
| 143 |
|
| 144 |
seed_everything(seed_value)
|
| 145 |
|
| 146 |
-
cam_vis.set_images([np.asarray(image1, dtype=np.uint8), np.asarray(image2, dtype=np.uint8)])
|
| 147 |
-
|
| 148 |
image1 = image_to_tensor(image1).to(_device_)
|
| 149 |
image2 = image_to_tensor(image2).to(_device_)
|
| 150 |
|
|
@@ -186,31 +185,20 @@ def run_pose_exploration(cam_vis, image1, image2, probe_bsz, adj_bsz, adj_iters,
|
|
| 186 |
if anchor_polar is None:
|
| 187 |
anchor_polar = np.pi/2
|
| 188 |
|
| 189 |
-
xyz0 = spherical_to_cartesian((anchor_polar, 0., 4.))
|
| 190 |
-
c2w0 = elu_to_c2w(xyz0, np.zeros(3), np.array([0., 0., 1.]))
|
| 191 |
-
|
| 192 |
-
xyz1 = spherical_to_cartesian((theta + anchor_polar, 0. + azimuth, 4. + radius))
|
| 193 |
-
c2w1 = elu_to_c2w(xyz1, np.zeros(3), np.array([0., 0., 1.]))
|
| 194 |
-
|
| 195 |
-
cam_vis._poses = [c2w0, c2w1]
|
| 196 |
-
fig = cam_vis.update_figure(5, base_radius=-1.2, font_size=16, show_background=True, show_grid=True, show_ticklabels=True)
|
| 197 |
-
|
| 198 |
explored_sph = (theta, azimuth, radius)
|
| 199 |
|
| 200 |
-
return anchor_polar, explored_sph
|
| 201 |
|
| 202 |
|
| 203 |
@spaces.GPU(duration=110)
|
| 204 |
-
def run_pose_refinement(
|
| 205 |
|
| 206 |
seed_everything(seed_value)
|
| 207 |
|
| 208 |
-
|
|
|
|
| 209 |
|
| 210 |
-
|
| 211 |
-
image2 = image_to_tensor(image2).to(_device_)
|
| 212 |
-
|
| 213 |
-
images = [image1, image2]
|
| 214 |
images = [ img.permute(0, 2, 3, 1) for img in images ]
|
| 215 |
|
| 216 |
out_poses, _, loss = find_optimal_poses(
|
|
@@ -234,10 +222,39 @@ def run_pose_refinement(cam_vis, image1, image2, anchor_polar, explored_sph, ref
|
|
| 234 |
xyz1 = spherical_to_cartesian((theta + anchor_polar, 0. + azimuth, 4. + radius))
|
| 235 |
c2w1 = elu_to_c2w(xyz1, np.zeros(3), np.array([0., 0., 1.]))
|
| 236 |
|
| 237 |
-
cam_vis
|
| 238 |
fig = cam_vis.update_figure(5, base_radius=-1.2, font_size=16, show_background=True, show_grid=True, show_ticklabels=True)
|
| 239 |
|
| 240 |
-
return final_sph, fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
|
| 243 |
_HEADER_ = '''
|
|
@@ -267,6 +284,9 @@ def run_demo():
|
|
| 267 |
demo = gr.Blocks(title='ID-Pose: Sparse-view Camera Pose Estimation By Inverting Diffusion Models')
|
| 268 |
|
| 269 |
with demo:
|
|
|
|
|
|
|
|
|
|
| 270 |
gr.Markdown(_HEADER_)
|
| 271 |
|
| 272 |
with gr.Row(variant='panel'):
|
|
@@ -327,8 +347,10 @@ def run_demo():
|
|
| 327 |
['data/gradio_demo/circo_0.png', 'data/gradio_demo/circo_1.png'],
|
| 328 |
],
|
| 329 |
inputs=[input_image1, input_image2],
|
|
|
|
|
|
|
| 330 |
label='Examples (Captured)',
|
| 331 |
-
cache_examples=
|
| 332 |
examples_per_page=5
|
| 333 |
)
|
| 334 |
|
|
@@ -342,8 +364,10 @@ def run_demo():
|
|
| 342 |
['data/gradio_demo/christ_0.png', 'data/gradio_demo/christ_1.png'],
|
| 343 |
],
|
| 344 |
inputs=[input_image1, input_image2],
|
|
|
|
|
|
|
| 345 |
label='Examples (Internet)',
|
| 346 |
-
cache_examples=
|
| 347 |
examples_per_page=5
|
| 348 |
)
|
| 349 |
|
|
@@ -357,31 +381,37 @@ def run_demo():
|
|
| 357 |
['data/gradio_demo/ride_horse_0.png', 'data/gradio_demo/ride_horse_1.png'],
|
| 358 |
],
|
| 359 |
inputs=[input_image1, input_image2],
|
|
|
|
|
|
|
| 360 |
label='Examples (Generated)',
|
| 361 |
-
cache_examples=
|
| 362 |
examples_per_page=5
|
| 363 |
)
|
| 364 |
|
| 365 |
-
cam_vis = CameraVisualizer([np.eye(4), np.eye(4)], ['Image 1', 'Image 2'], ['red', 'blue'])
|
| 366 |
-
|
| 367 |
-
explored_sph = gr.State()
|
| 368 |
-
anchor_polar = gr.State()
|
| 369 |
-
refined_sph = gr.State()
|
| 370 |
-
|
| 371 |
run_btn.click(
|
| 372 |
fn=run_preprocess,
|
| 373 |
inputs=[input_image1, input_image2, preprocess_chk, seed_value],
|
| 374 |
outputs=[processed_image1, processed_image2],
|
| 375 |
).success(
|
| 376 |
-
fn=
|
| 377 |
-
inputs=[processed_image1, processed_image2, probe_bsz, adj_bsz, adj_iters, seed_value],
|
| 378 |
-
outputs=[
|
| 379 |
)
|
| 380 |
|
| 381 |
refine_btn.click(
|
| 382 |
-
fn=
|
| 383 |
-
inputs=[processed_image1, processed_image2,
|
| 384 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
)
|
| 386 |
|
| 387 |
demo.launch()
|
|
|
|
| 18 |
from src.pose_funcs import find_optimal_poses
|
| 19 |
from src.utils import spherical_to_cartesian, elu_to_c2w
|
| 20 |
|
| 21 |
+
|
| 22 |
if torch.cuda.is_available():
|
| 23 |
_device_ = 'cuda:0'
|
| 24 |
else:
|
|
|
|
| 140 |
|
| 141 |
|
| 142 |
@spaces.GPU(duration=110)
|
| 143 |
+
def run_pose_exploration(image1, image2, probe_bsz, adj_bsz, adj_iters, seed_value):
|
| 144 |
|
| 145 |
seed_everything(seed_value)
|
| 146 |
|
|
|
|
|
|
|
| 147 |
image1 = image_to_tensor(image1).to(_device_)
|
| 148 |
image2 = image_to_tensor(image2).to(_device_)
|
| 149 |
|
|
|
|
| 185 |
if anchor_polar is None:
|
| 186 |
anchor_polar = np.pi/2
|
| 187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
explored_sph = (theta, azimuth, radius)
|
| 189 |
|
| 190 |
+
return anchor_polar, explored_sph
|
| 191 |
|
| 192 |
|
| 193 |
@spaces.GPU(duration=110)
|
| 194 |
+
def run_pose_refinement(image1, image2, est_result, refine_iters, seed_value):
|
| 195 |
|
| 196 |
seed_everything(seed_value)
|
| 197 |
|
| 198 |
+
anchor_polar = est_result[0]
|
| 199 |
+
explored_sph = est_result[1]
|
| 200 |
|
| 201 |
+
images = [image_to_tensor(image1).to(_device_), image_to_tensor(image2).to(_device_)]
|
|
|
|
|
|
|
|
|
|
| 202 |
images = [ img.permute(0, 2, 3, 1) for img in images ]
|
| 203 |
|
| 204 |
out_poses, _, loss = find_optimal_poses(
|
|
|
|
| 222 |
xyz1 = spherical_to_cartesian((theta + anchor_polar, 0. + azimuth, 4. + radius))
|
| 223 |
c2w1 = elu_to_c2w(xyz1, np.zeros(3), np.array([0., 0., 1.]))
|
| 224 |
|
| 225 |
+
cam_vis = CameraVisualizer([c2w0, c2w1], ['Image 1', 'Image 2'], ['red', 'blue'], images=[np.asarray(image1, dtype=np.uint8), np.asarray(image2, dtype=np.uint8)])
|
| 226 |
fig = cam_vis.update_figure(5, base_radius=-1.2, font_size=16, show_background=True, show_grid=True, show_ticklabels=True)
|
| 227 |
|
| 228 |
+
return (anchor_polar, final_sph), fig
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def run_example(image1, image2):
|
| 232 |
+
|
| 233 |
+
image1, image2 = run_preprocess(image1, image2, True, 0)
|
| 234 |
+
anchor_polar, explored_sph = run_pose_exploration(image1, image2, 16, 4, 10, 0)
|
| 235 |
+
|
| 236 |
+
return (anchor_polar, explored_sph), image1, image2
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def run_or_visualize(image1, image2, probe_bsz, adj_bsz, adj_iters, seed_value, est_result):
|
| 240 |
+
|
| 241 |
+
if est_result is None:
|
| 242 |
+
anchor_polar, explored_sph = run_pose_exploration(image1, image2, probe_bsz, adj_bsz, adj_iters, seed_value)
|
| 243 |
+
else:
|
| 244 |
+
anchor_polar = est_result[0]
|
| 245 |
+
explored_sph = est_result[1]
|
| 246 |
+
print('Using cache result.')
|
| 247 |
+
|
| 248 |
+
xyz0 = spherical_to_cartesian((anchor_polar, 0., 4.))
|
| 249 |
+
c2w0 = elu_to_c2w(xyz0, np.zeros(3), np.array([0., 0., 1.]))
|
| 250 |
+
|
| 251 |
+
xyz1 = spherical_to_cartesian((explored_sph[0] + anchor_polar, 0. + explored_sph[1], 4. + explored_sph[2]))
|
| 252 |
+
c2w1 = elu_to_c2w(xyz1, np.zeros(3), np.array([0., 0., 1.]))
|
| 253 |
+
|
| 254 |
+
cam_vis = CameraVisualizer([c2w0, c2w1], ['Image 1', 'Image 2'], ['red', 'blue'], images=[np.asarray(image1, dtype=np.uint8), np.asarray(image2, dtype=np.uint8)])
|
| 255 |
+
fig = cam_vis.update_figure(5, base_radius=-1.2, font_size=16, show_background=True, show_grid=True, show_ticklabels=True)
|
| 256 |
+
|
| 257 |
+
return (anchor_polar, explored_sph), fig, gr.update(interactive=True)
|
| 258 |
|
| 259 |
|
| 260 |
_HEADER_ = '''
|
|
|
|
| 284 |
demo = gr.Blocks(title='ID-Pose: Sparse-view Camera Pose Estimation By Inverting Diffusion Models')
|
| 285 |
|
| 286 |
with demo:
|
| 287 |
+
|
| 288 |
+
est_result = gr.JSON(visible=False)
|
| 289 |
+
|
| 290 |
gr.Markdown(_HEADER_)
|
| 291 |
|
| 292 |
with gr.Row(variant='panel'):
|
|
|
|
| 347 |
['data/gradio_demo/circo_0.png', 'data/gradio_demo/circo_1.png'],
|
| 348 |
],
|
| 349 |
inputs=[input_image1, input_image2],
|
| 350 |
+
fn=run_example,
|
| 351 |
+
outputs=[est_result, processed_image1, processed_image2],
|
| 352 |
label='Examples (Captured)',
|
| 353 |
+
cache_examples='lazy',
|
| 354 |
examples_per_page=5
|
| 355 |
)
|
| 356 |
|
|
|
|
| 364 |
['data/gradio_demo/christ_0.png', 'data/gradio_demo/christ_1.png'],
|
| 365 |
],
|
| 366 |
inputs=[input_image1, input_image2],
|
| 367 |
+
fn=run_example,
|
| 368 |
+
outputs=[est_result, processed_image1, processed_image2],
|
| 369 |
label='Examples (Internet)',
|
| 370 |
+
cache_examples='lazy',
|
| 371 |
examples_per_page=5
|
| 372 |
)
|
| 373 |
|
|
|
|
| 381 |
['data/gradio_demo/ride_horse_0.png', 'data/gradio_demo/ride_horse_1.png'],
|
| 382 |
],
|
| 383 |
inputs=[input_image1, input_image2],
|
| 384 |
+
fn=run_example,
|
| 385 |
+
outputs=[est_result, processed_image1, processed_image2],
|
| 386 |
label='Examples (Generated)',
|
| 387 |
+
cache_examples='lazy',
|
| 388 |
examples_per_page=5
|
| 389 |
)
|
| 390 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
run_btn.click(
|
| 392 |
fn=run_preprocess,
|
| 393 |
inputs=[input_image1, input_image2, preprocess_chk, seed_value],
|
| 394 |
outputs=[processed_image1, processed_image2],
|
| 395 |
).success(
|
| 396 |
+
fn=run_or_visualize,
|
| 397 |
+
inputs=[processed_image1, processed_image2, probe_bsz, adj_bsz, adj_iters, seed_value, est_result],
|
| 398 |
+
outputs=[est_result, vis_output, refine_btn]
|
| 399 |
)
|
| 400 |
|
| 401 |
refine_btn.click(
|
| 402 |
+
fn=run_pose_refinement,
|
| 403 |
+
inputs=[processed_image1, processed_image2, est_result, refine_iters, seed_value],
|
| 404 |
+
outputs=[est_result, vis_output]
|
| 405 |
+
)
|
| 406 |
+
|
| 407 |
+
input_image1.clear(
|
| 408 |
+
fn=lambda: None,
|
| 409 |
+
outputs=[est_result]
|
| 410 |
+
)
|
| 411 |
+
|
| 412 |
+
input_image2.clear(
|
| 413 |
+
fn=lambda: None,
|
| 414 |
+
outputs=[est_result]
|
| 415 |
)
|
| 416 |
|
| 417 |
demo.launch()
|
src/pose_funcs.py
CHANGED
|
@@ -101,9 +101,9 @@ def add_pose(pose1, pose2):
|
|
| 101 |
|
| 102 |
def create_pose_params(pose, device):
|
| 103 |
|
| 104 |
-
theta = torch.tensor([pose[0]], requires_grad=True, device=device)
|
| 105 |
-
azimuth = torch.tensor([pose[1]], requires_grad=True, device=device)
|
| 106 |
-
radius = torch.tensor([pose[2]], requires_grad=True, device=device)
|
| 107 |
|
| 108 |
return [theta, azimuth, radius]
|
| 109 |
|
|
|
|
| 101 |
|
| 102 |
def create_pose_params(pose, device):
|
| 103 |
|
| 104 |
+
theta = torch.tensor([float(pose[0])], requires_grad=True, device=device)
|
| 105 |
+
azimuth = torch.tensor([float(pose[1])], requires_grad=True, device=device)
|
| 106 |
+
radius = torch.tensor([float(pose[2])], requires_grad=True, device=device)
|
| 107 |
|
| 108 |
return [theta, azimuth, radius]
|
| 109 |
|