Spaces:
Runtime error
Runtime error
added rate slider
Browse files
app.py
CHANGED
|
@@ -103,8 +103,8 @@ def paint_point_track_gpu_scatter(
|
|
| 103 |
point_tracks: np.ndarray,
|
| 104 |
visibles: np.ndarray,
|
| 105 |
colormap: Optional[List[Tuple[int, int, int]]] = None,
|
| 106 |
-
|
| 107 |
-
sharpness: float = 0.
|
| 108 |
) -> np.ndarray:
|
| 109 |
print('starting vis')
|
| 110 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -117,6 +117,20 @@ def paint_point_track_gpu_scatter(
|
|
| 117 |
if colormap is None:
|
| 118 |
colormap = get_colors(P)
|
| 119 |
colors = torch.tensor(colormap, dtype=torch.float32, device=device) # [P,3]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
D = radius * 2 + 1
|
| 121 |
y = torch.arange(D, device=device).float()[:, None] - radius
|
| 122 |
x = torch.arange(D, device=device).float()[None, :] - radius
|
|
@@ -160,7 +174,7 @@ def paint_point_track_gpu_scatter(
|
|
| 160 |
# frames_t[t] = frames_t[t] * (1 - weight) + accum
|
| 161 |
|
| 162 |
# alpha = weight.clamp(0, 1)
|
| 163 |
-
# alpha = weight.clamp(0, 1) * 0.
|
| 164 |
alpha = weight.clamp(0, 1) # transparency
|
| 165 |
accum = accum / (weight + 1e-6) # [3, H, W]
|
| 166 |
frames_t[t] = frames_t[t] * (1 - alpha) + accum * alpha
|
|
@@ -491,6 +505,25 @@ FRAME_LIMIT = 600 # Limit the number of frames to process
|
|
| 491 |
def choose_frame(frame_num, video_preview_array):
|
| 492 |
return video_preview_array[int(frame_num)]
|
| 493 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
|
| 495 |
def preprocess_video_input(video_path):
|
| 496 |
video_arr = mediapy.read_video(video_path)
|
|
@@ -509,7 +542,7 @@ def preprocess_video_input(video_path):
|
|
| 509 |
if height*width > 768*1024:
|
| 510 |
new_height = new_height*3//4
|
| 511 |
new_width = new_width*3//4
|
| 512 |
-
new_height, new_width = new_height//
|
| 513 |
|
| 514 |
|
| 515 |
preview_video = mediapy.resize_video(video_arr, (new_height, new_width))
|
|
@@ -541,8 +574,56 @@ def preprocess_video_input(video_path):
|
|
| 541 |
gr.update(interactive=interactive),
|
| 542 |
gr.update(interactive=interactive),
|
| 543 |
gr.update(interactive=True),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
)
|
| 545 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
@spaces.GPU
|
| 547 |
def track(
|
| 548 |
video_preview,
|
|
@@ -696,78 +777,86 @@ def track(
|
|
| 696 |
# traj_maps_e = traj_maps_e[:,:,:,::4,::4] # subsample
|
| 697 |
# visconf_maps_e = visconf_maps_e[:,:,:,::4,::4] # subsample
|
| 698 |
|
| 699 |
-
traj_maps_e = traj_maps_e[:,:,:,::2,::2] # subsample
|
| 700 |
-
visconf_maps_e = visconf_maps_e[:,:,:,::2,::2] # subsample
|
| 701 |
|
| 702 |
tracks = traj_maps_e.permute(0,3,4,1,2).reshape(-1,T,2).numpy()
|
| 703 |
visibs = visconf_maps_e.permute(0,3,4,1,2).reshape(-1,T,2)[:,:,0].numpy()
|
| 704 |
confs = visconf_maps_e.permute(0,3,4,1,2).reshape(-1,T,2)[:,:,0].numpy()
|
| 705 |
-
|
| 706 |
-
visibs = (visibs * confs) > 0.2 # N,T
|
| 707 |
# visibs = (confs) > 0.1 # N,T
|
| 708 |
|
| 709 |
|
| 710 |
# sc = (np.array([video_preview.shape[2], video_preview.shape[1]]) / np.array([VIDEO_INPUT_RESO[1], VIDEO_INPUT_RESO[0]])).reshape(1,1,2)
|
| 711 |
# print('sc', sc)
|
| 712 |
# tracks = tracks * sc
|
| 713 |
-
|
| 714 |
-
query_count = tracks.shape[0]
|
| 715 |
-
cmap = matplotlib.colormaps.get_cmap("gist_rainbow")
|
| 716 |
-
query_points_color = [[]]
|
| 717 |
-
for i in range(query_count):
|
| 718 |
-
# Choose the color for the point from matplotlib colormap
|
| 719 |
-
color = cmap(i / float(query_count))
|
| 720 |
-
color = (int(color[0] * 255), int(color[1] * 255), int(color[2] * 255))
|
| 721 |
-
query_points_color[0].append(color)
|
| 722 |
-
# make color array
|
| 723 |
-
colors = []
|
| 724 |
-
for frame_colors in query_points_color:
|
| 725 |
-
colors.extend(frame_colors)
|
| 726 |
-
colors = np.array(colors)
|
| 727 |
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
|
|
|
| 734 |
|
| 735 |
-
|
| 736 |
-
#
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
#
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
for ti in range(T):
|
| 750 |
-
temp_out_f = '%s/%03d.jpg' % (video_path, ti)
|
| 751 |
-
# temp_out_f = '%s/%03d.png' % (video_path, ti)
|
| 752 |
-
im = PIL.Image.fromarray(painted_video[ti])
|
| 753 |
-
# im.save(temp_out_f, "PNG", subsampling=0, quality=80)
|
| 754 |
-
im.save(temp_out_f)
|
| 755 |
-
print('saved', temp_out_f)
|
| 756 |
-
# os.system('/usr/bin/ffmpeg -y -hide_banner -loglevel error -f image2 -framerate %d -pattern_type glob -i "%s/*.png" -c:v libx264 -crf 20 -pix_fmt yuv420p %s' % (video_fps, video_path, video_file_path))
|
| 757 |
-
os.system('/usr/bin/ffmpeg -y -hide_banner -loglevel error -f image2 -framerate %d -pattern_type glob -i "%s/*.jpg" -c:v libx264 -crf 20 -pix_fmt yuv420p %s' % (video_fps, video_path, video_file_path))
|
| 758 |
-
print('saved', video_file_path)
|
| 759 |
-
for ti in range(T):
|
| 760 |
-
# temp_out_f = '%s/%03d.png' % (video_path, ti)
|
| 761 |
-
temp_out_f = '%s/%03d.jpg' % (video_path, ti)
|
| 762 |
-
os.remove(temp_out_f)
|
| 763 |
-
print('deleted', temp_out_f)
|
| 764 |
|
| 765 |
-
#
|
| 766 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 767 |
|
| 768 |
|
| 769 |
|
| 770 |
-
return video_file_path
|
| 771 |
|
| 772 |
|
| 773 |
with gr.Blocks() as demo:
|
|
@@ -782,13 +871,17 @@ with gr.Blocks() as demo:
|
|
| 782 |
is_tracked_query = gr.State([])
|
| 783 |
query_count = gr.State(0)
|
| 784 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 785 |
gr.Markdown("# ⚡ AllTracker: Efficient Dense Point Tracking at High Resolution")
|
| 786 |
gr.Markdown("<div style='text-align: left;'> \
|
| 787 |
<p>Welcome to <a href='https://alltracker.github.io/' target='_blank'>AllTracker</a>! This space demonstrates all-pixel tracking in videos.</p> \
|
| 788 |
<p>To get started, simply upload your <b>.mp4</b> video, or click on one of the example videos. The shorter the video, the faster the processing. We recommend submitting videos under 20 seconds long.</p> \
|
| 789 |
<p>After picking a video, click \"Submit\" to load the frames into the app, and optionally choose a frame (using the slider), and then click \"Track\".</p> \
|
| 790 |
<p>For full info on how this works, check out our <a href='https://github.com/aharley/alltracker/' target='_blank'>GitHub Repo</a>!</p> \
|
| 791 |
-
<p>Initial code for this Gradio app came from LocoTrack and CoTracker
|
| 792 |
</div>"
|
| 793 |
)
|
| 794 |
|
|
@@ -797,25 +890,26 @@ with gr.Blocks() as demo:
|
|
| 797 |
with gr.Row():
|
| 798 |
with gr.Column():
|
| 799 |
with gr.Row():
|
| 800 |
-
video_in = gr.Video(label="Video
|
| 801 |
with gr.Row():
|
| 802 |
submit = gr.Button("Submit")
|
| 803 |
with gr.Column():
|
| 804 |
# with gr.Accordion("Sample videos", open=True) as video_in_drawer:
|
| 805 |
with gr.Row():
|
| 806 |
-
|
| 807 |
monkey = os.path.join(os.path.dirname(__file__), "videos", "monkey_28.mp4")
|
|
|
|
| 808 |
apple = os.path.join(os.path.dirname(__file__), "videos", "apple.mp4")
|
|
|
|
|
|
|
| 809 |
bear = os.path.join(os.path.dirname(__file__), "videos", "bear.mp4")
|
| 810 |
-
paragliding_launch = os.path.join(
|
| 811 |
-
os.path.dirname(__file__), "videos", "paragliding-launch.mp4"
|
| 812 |
-
)
|
| 813 |
paragliding = os.path.join(os.path.dirname(__file__), "videos", "paragliding.mp4")
|
| 814 |
cat = os.path.join(os.path.dirname(__file__), "videos", "cat.mp4")
|
| 815 |
pillow = os.path.join(os.path.dirname(__file__), "videos", "pillow.mp4")
|
| 816 |
teddy = os.path.join(os.path.dirname(__file__), "videos", "teddy.mp4")
|
| 817 |
backpack = os.path.join(os.path.dirname(__file__), "videos", "backpack.mp4")
|
| 818 |
-
gr.Examples(examples=[
|
| 819 |
inputs = [
|
| 820 |
video_in
|
| 821 |
],
|
|
@@ -828,7 +922,7 @@ with gr.Blocks() as demo:
|
|
| 828 |
with gr.Column():
|
| 829 |
with gr.Row():
|
| 830 |
query_frame_slider = gr.Slider(
|
| 831 |
-
minimum=0, maximum=100, value=0, step=1, label="Choose
|
| 832 |
# with gr.Row():
|
| 833 |
# undo = gr.Button("Undo", interactive=False)
|
| 834 |
# clear_frame = gr.Button("Clear Frame", interactive=False)
|
|
@@ -846,16 +940,25 @@ with gr.Blocks() as demo:
|
|
| 846 |
track_button = gr.Button("Track", interactive=False)
|
| 847 |
|
| 848 |
with gr.Column():
|
|
|
|
| 849 |
# with gr.Row():
|
| 850 |
-
#
|
| 851 |
-
#
|
| 852 |
-
#
|
| 853 |
-
|
| 854 |
-
|
| 855 |
-
|
| 856 |
-
|
| 857 |
-
|
| 858 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 859 |
|
| 860 |
|
| 861 |
|
|
@@ -892,6 +995,8 @@ with gr.Blocks() as demo:
|
|
| 892 |
queue = False
|
| 893 |
)
|
| 894 |
|
|
|
|
|
|
|
| 895 |
# current_frame.select(
|
| 896 |
# fn = get_point,
|
| 897 |
# inputs = [
|
|
@@ -982,10 +1087,68 @@ with gr.Blocks() as demo:
|
|
| 982 |
],
|
| 983 |
outputs = [
|
| 984 |
output_video,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 985 |
],
|
| 986 |
queue = True,
|
| 987 |
)
|
| 988 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 989 |
|
| 990 |
|
| 991 |
|
|
|
|
| 103 |
point_tracks: np.ndarray,
|
| 104 |
visibles: np.ndarray,
|
| 105 |
colormap: Optional[List[Tuple[int, int, int]]] = None,
|
| 106 |
+
rate: int = 1,
|
| 107 |
+
# sharpness: float = 0.1,
|
| 108 |
) -> np.ndarray:
|
| 109 |
print('starting vis')
|
| 110 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 117 |
if colormap is None:
|
| 118 |
colormap = get_colors(P)
|
| 119 |
colors = torch.tensor(colormap, dtype=torch.float32, device=device) # [P,3]
|
| 120 |
+
|
| 121 |
+
if rate==1:
|
| 122 |
+
radius = 1
|
| 123 |
+
elif rate==2:
|
| 124 |
+
radius = 1
|
| 125 |
+
elif rate== 4:
|
| 126 |
+
radius = 2
|
| 127 |
+
elif rate== 8:
|
| 128 |
+
radius = 4
|
| 129 |
+
else:
|
| 130 |
+
radius = 6
|
| 131 |
+
# radius = max(1, int(np.sqrt(rate)))
|
| 132 |
+
sharpness = 0.15 + 0.05 * np.log2(rate)
|
| 133 |
+
|
| 134 |
D = radius * 2 + 1
|
| 135 |
y = torch.arange(D, device=device).float()[:, None] - radius
|
| 136 |
x = torch.arange(D, device=device).float()[None, :] - radius
|
|
|
|
| 174 |
# frames_t[t] = frames_t[t] * (1 - weight) + accum
|
| 175 |
|
| 176 |
# alpha = weight.clamp(0, 1)
|
| 177 |
+
# alpha = weight.clamp(0, 1) * 0.9 # transparency
|
| 178 |
alpha = weight.clamp(0, 1) # transparency
|
| 179 |
accum = accum / (weight + 1e-6) # [3, H, W]
|
| 180 |
frames_t[t] = frames_t[t] * (1 - alpha) + accum * alpha
|
|
|
|
| 505 |
def choose_frame(frame_num, video_preview_array):
|
| 506 |
return video_preview_array[int(frame_num)]
|
| 507 |
|
| 508 |
+
def choose_rate1(video_preview, video_fps, tracks, visibs):
|
| 509 |
+
return choose_rate(1, video_preview, video_fps, tracks, visibs)
|
| 510 |
+
def choose_rate2(video_preview, video_fps, tracks, visibs):
|
| 511 |
+
return choose_rate(2, video_preview, video_fps, tracks, visibs)
|
| 512 |
+
def choose_rate4(video_preview, video_fps, tracks, visibs):
|
| 513 |
+
return choose_rate(4, video_preview, video_fps, tracks, visibs)
|
| 514 |
+
def choose_rate8(video_preview, video_fps, tracks, visibs):
|
| 515 |
+
return choose_rate(8, video_preview, video_fps, tracks, visibs)
|
| 516 |
+
# def choose_rate16(video_preview, video_fps, tracks, visibs):
|
| 517 |
+
# return choose_rate(16, video_preview, video_fps, tracks, visibs)
|
| 518 |
+
|
| 519 |
+
def choose_rate(rate, video_preview, video_fps, tracks, visibs):
|
| 520 |
+
print('rate', rate)
|
| 521 |
+
print('video_preview', video_preview.shape)
|
| 522 |
+
T, H, W,_ = video_preview.shape
|
| 523 |
+
tracks_ = tracks.reshape(H,W,T,2)[::rate,::rate].reshape(-1,T,2)
|
| 524 |
+
visibs_ = visibs.reshape(H,W,T)[::rate,::rate].reshape(-1,T)
|
| 525 |
+
return paint_video(video_preview, video_fps, tracks_, visibs_, rate=rate)
|
| 526 |
+
# return video_preview_array[int(frame_num)]
|
| 527 |
|
| 528 |
def preprocess_video_input(video_path):
|
| 529 |
video_arr = mediapy.read_video(video_path)
|
|
|
|
| 542 |
if height*width > 768*1024:
|
| 543 |
new_height = new_height*3//4
|
| 544 |
new_width = new_width*3//4
|
| 545 |
+
new_height, new_width = new_height//16 * 16, new_width//16 * 16 # make it divisible by 16, partly to satisfy ffmpeg
|
| 546 |
|
| 547 |
|
| 548 |
preview_video = mediapy.resize_video(video_arr, (new_height, new_width))
|
|
|
|
| 574 |
gr.update(interactive=interactive),
|
| 575 |
gr.update(interactive=interactive),
|
| 576 |
gr.update(interactive=True),
|
| 577 |
+
# gr.update(interactive=True),
|
| 578 |
+
# gr.update(interactive=True),
|
| 579 |
+
# gr.update(interactive=True),
|
| 580 |
+
# gr.update(interactive=True),
|
| 581 |
)
|
| 582 |
|
| 583 |
+
|
| 584 |
+
def paint_video(video_preview, video_fps, tracks, visibs, rate=1):
|
| 585 |
+
print('video_preview', video_preview.shape)
|
| 586 |
+
T, H, W, _ = video_preview.shape
|
| 587 |
+
query_count = tracks.shape[0]
|
| 588 |
+
cmap = matplotlib.colormaps.get_cmap("gist_rainbow")
|
| 589 |
+
query_points_color = [[]]
|
| 590 |
+
for i in range(query_count):
|
| 591 |
+
# Choose the color for the point from matplotlib colormap
|
| 592 |
+
color = cmap(i / float(query_count))
|
| 593 |
+
color = (int(color[0] * 255), int(color[1] * 255), int(color[2] * 255))
|
| 594 |
+
query_points_color[0].append(color)
|
| 595 |
+
# make color array
|
| 596 |
+
colors = []
|
| 597 |
+
for frame_colors in query_points_color:
|
| 598 |
+
colors.extend(frame_colors)
|
| 599 |
+
colors = np.array(colors)
|
| 600 |
+
painted_video = paint_point_track_gpu_scatter(video_preview,tracks,visibs,colors,rate=rate)#=max(rate//2,1))
|
| 601 |
+
# save video
|
| 602 |
+
video_file_name = uuid.uuid4().hex + ".mp4"
|
| 603 |
+
video_path = os.path.join(os.path.dirname(__file__), "tmp")
|
| 604 |
+
video_file_path = os.path.join(video_path, video_file_name)
|
| 605 |
+
os.makedirs(video_path, exist_ok=True)
|
| 606 |
+
if False:
|
| 607 |
+
mediapy.write_video(video_file_path, painted_video, fps=video_fps)
|
| 608 |
+
else:
|
| 609 |
+
for ti in range(T):
|
| 610 |
+
temp_out_f = '%s/%03d.jpg' % (video_path, ti)
|
| 611 |
+
# temp_out_f = '%s/%03d.png' % (video_path, ti)
|
| 612 |
+
im = PIL.Image.fromarray(painted_video[ti])
|
| 613 |
+
# im.save(temp_out_f, "PNG", subsampling=0, quality=80)
|
| 614 |
+
im.save(temp_out_f)
|
| 615 |
+
print('saved', temp_out_f)
|
| 616 |
+
# os.system('/usr/bin/ffmpeg -y -hide_banner -loglevel error -f image2 -framerate %d -pattern_type glob -i "%s/*.png" -c:v libx264 -crf 20 -pix_fmt yuv420p %s' % (video_fps, video_path, video_file_path))
|
| 617 |
+
os.system('/usr/bin/ffmpeg -y -hide_banner -loglevel error -f image2 -framerate %d -pattern_type glob -i "%s/*.jpg" -c:v libx264 -crf 20 -pix_fmt yuv420p %s' % (video_fps, video_path, video_file_path))
|
| 618 |
+
print('saved', video_file_path)
|
| 619 |
+
for ti in range(T):
|
| 620 |
+
# temp_out_f = '%s/%03d.png' % (video_path, ti)
|
| 621 |
+
temp_out_f = '%s/%03d.jpg' % (video_path, ti)
|
| 622 |
+
os.remove(temp_out_f)
|
| 623 |
+
print('deleted', temp_out_f)
|
| 624 |
+
return video_file_path
|
| 625 |
+
|
| 626 |
+
|
| 627 |
@spaces.GPU
|
| 628 |
def track(
|
| 629 |
video_preview,
|
|
|
|
| 777 |
# traj_maps_e = traj_maps_e[:,:,:,::4,::4] # subsample
|
| 778 |
# visconf_maps_e = visconf_maps_e[:,:,:,::4,::4] # subsample
|
| 779 |
|
| 780 |
+
# traj_maps_e = traj_maps_e[:,:,:,::2,::2] # subsample
|
| 781 |
+
# visconf_maps_e = visconf_maps_e[:,:,:,::2,::2] # subsample
|
| 782 |
|
| 783 |
tracks = traj_maps_e.permute(0,3,4,1,2).reshape(-1,T,2).numpy()
|
| 784 |
visibs = visconf_maps_e.permute(0,3,4,1,2).reshape(-1,T,2)[:,:,0].numpy()
|
| 785 |
confs = visconf_maps_e.permute(0,3,4,1,2).reshape(-1,T,2)[:,:,0].numpy()
|
| 786 |
+
visibs = (visibs * confs) > 0.3 # N,T
|
|
|
|
| 787 |
# visibs = (confs) > 0.1 # N,T
|
| 788 |
|
| 789 |
|
| 790 |
# sc = (np.array([video_preview.shape[2], video_preview.shape[1]]) / np.array([VIDEO_INPUT_RESO[1], VIDEO_INPUT_RESO[0]])).reshape(1,1,2)
|
| 791 |
# print('sc', sc)
|
| 792 |
# tracks = tracks * sc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
|
| 794 |
+
return paint_video(video_preview, video_fps, tracks, visibs), tracks, visibs, gr.update(interactive=True, value=1)
|
| 795 |
+
# gr.update(interactive=True),
|
| 796 |
+
# gr.update(interactive=True),
|
| 797 |
+
# gr.update(interactive=True),
|
| 798 |
+
# gr.update(interactive=True),
|
| 799 |
+
# gr.update(interactive=True))
|
| 800 |
+
# # query_count = tracks.shape[0]
|
| 801 |
|
| 802 |
+
|
| 803 |
+
# query_count = tracks.shape[0]
|
| 804 |
+
# cmap = matplotlib.colormaps.get_cmap("gist_rainbow")
|
| 805 |
+
# query_points_color = [[]]
|
| 806 |
+
# for i in range(query_count):
|
| 807 |
+
# # Choose the color for the point from matplotlib colormap
|
| 808 |
+
# color = cmap(i / float(query_count))
|
| 809 |
+
# color = (int(color[0] * 255), int(color[1] * 255), int(color[2] * 255))
|
| 810 |
+
# query_points_color[0].append(color)
|
| 811 |
+
# # make color array
|
| 812 |
+
# colors = []
|
| 813 |
+
# for frame_colors in query_points_color:
|
| 814 |
+
# colors.extend(frame_colors)
|
| 815 |
+
# colors = np.array(colors)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 816 |
|
| 817 |
+
# # visibs_ = visibs * 1.0
|
| 818 |
+
# # visibs_ = visibs_[:,1:] * visibs_[:,:-1]
|
| 819 |
+
# # inds = np.sum(visibs_, axis=1) >= min(T//4,8)
|
| 820 |
+
# # tracks = tracks[inds]
|
| 821 |
+
# # visibs = visibs[inds]
|
| 822 |
+
# # colors = colors[inds]
|
| 823 |
+
|
| 824 |
+
# # painted_video = paint_point_track_parallel(video_preview,tracks,visibs,colors)
|
| 825 |
+
# # painted_video = paint_point_track_gpu(video_preview,tracks,visibs,colors)
|
| 826 |
+
# painted_video = paint_point_track_gpu_scatter(video_preview,tracks,visibs,colors)
|
| 827 |
+
# print("7 torch.cuda.memory_allocated: %.1fGB"%(torch.cuda.memory_allocated(0)/1024/1024/1024))
|
| 828 |
+
|
| 829 |
+
# # save video
|
| 830 |
+
# video_file_name = uuid.uuid4().hex + ".mp4"
|
| 831 |
+
# video_path = os.path.join(os.path.dirname(__file__), "tmp")
|
| 832 |
+
# video_file_path = os.path.join(video_path, video_file_name)
|
| 833 |
+
|
| 834 |
+
# os.makedirs(video_path, exist_ok=True)
|
| 835 |
+
# if False:
|
| 836 |
+
# mediapy.write_video(video_file_path, painted_video, fps=video_fps)
|
| 837 |
+
# else:
|
| 838 |
+
# for ti in range(T):
|
| 839 |
+
# temp_out_f = '%s/%03d.jpg' % (video_path, ti)
|
| 840 |
+
# # temp_out_f = '%s/%03d.png' % (video_path, ti)
|
| 841 |
+
# im = PIL.Image.fromarray(painted_video[ti])
|
| 842 |
+
# # im.save(temp_out_f, "PNG", subsampling=0, quality=80)
|
| 843 |
+
# im.save(temp_out_f)
|
| 844 |
+
# print('saved', temp_out_f)
|
| 845 |
+
# # os.system('/usr/bin/ffmpeg -y -hide_banner -loglevel error -f image2 -framerate %d -pattern_type glob -i "%s/*.png" -c:v libx264 -crf 20 -pix_fmt yuv420p %s' % (video_fps, video_path, video_file_path))
|
| 846 |
+
# os.system('/usr/bin/ffmpeg -y -hide_banner -loglevel error -f image2 -framerate %d -pattern_type glob -i "%s/*.jpg" -c:v libx264 -crf 20 -pix_fmt yuv420p %s' % (video_fps, video_path, video_file_path))
|
| 847 |
+
# print('saved', video_file_path)
|
| 848 |
+
# for ti in range(T):
|
| 849 |
+
# # temp_out_f = '%s/%03d.png' % (video_path, ti)
|
| 850 |
+
# temp_out_f = '%s/%03d.jpg' % (video_path, ti)
|
| 851 |
+
# os.remove(temp_out_f)
|
| 852 |
+
# print('deleted', temp_out_f)
|
| 853 |
+
|
| 854 |
+
# # out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False)
|
| 855 |
+
# # subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {painted_video} -c:v libx264 {out_file.name}".split())
|
| 856 |
|
| 857 |
|
| 858 |
|
| 859 |
+
# return video_file_path
|
| 860 |
|
| 861 |
|
| 862 |
with gr.Blocks() as demo:
|
|
|
|
| 871 |
is_tracked_query = gr.State([])
|
| 872 |
query_count = gr.State(0)
|
| 873 |
|
| 874 |
+
# rate = gr.State([])
|
| 875 |
+
tracks = gr.State([])
|
| 876 |
+
visibs = gr.State([])
|
| 877 |
+
|
| 878 |
gr.Markdown("# ⚡ AllTracker: Efficient Dense Point Tracking at High Resolution")
|
| 879 |
gr.Markdown("<div style='text-align: left;'> \
|
| 880 |
<p>Welcome to <a href='https://alltracker.github.io/' target='_blank'>AllTracker</a>! This space demonstrates all-pixel tracking in videos.</p> \
|
| 881 |
<p>To get started, simply upload your <b>.mp4</b> video, or click on one of the example videos. The shorter the video, the faster the processing. We recommend submitting videos under 20 seconds long.</p> \
|
| 882 |
<p>After picking a video, click \"Submit\" to load the frames into the app, and optionally choose a frame (using the slider), and then click \"Track\".</p> \
|
| 883 |
<p>For full info on how this works, check out our <a href='https://github.com/aharley/alltracker/' target='_blank'>GitHub Repo</a>!</p> \
|
| 884 |
+
<p>Initial code for this Gradio app came from LocoTrack and CoTracker -- big thanks to those authors!</p> \
|
| 885 |
</div>"
|
| 886 |
)
|
| 887 |
|
|
|
|
| 890 |
with gr.Row():
|
| 891 |
with gr.Column():
|
| 892 |
with gr.Row():
|
| 893 |
+
video_in = gr.Video(label="Video input", format="mp4")
|
| 894 |
with gr.Row():
|
| 895 |
submit = gr.Button("Submit")
|
| 896 |
with gr.Column():
|
| 897 |
# with gr.Accordion("Sample videos", open=True) as video_in_drawer:
|
| 898 |
with gr.Row():
|
| 899 |
+
butterfly = os.path.join(os.path.dirname(__file__), "videos", "butterfly_800.mp4")
|
| 900 |
monkey = os.path.join(os.path.dirname(__file__), "videos", "monkey_28.mp4")
|
| 901 |
+
groundbox = os.path.join(os.path.dirname(__file__), "videos", "ground-box-comp.mp4")
|
| 902 |
apple = os.path.join(os.path.dirname(__file__), "videos", "apple.mp4")
|
| 903 |
+
grasp_sponge_800 = os.path.join(os.path.dirname(__file__), "videos", "grasp_sponge_800.mp4")
|
| 904 |
+
# dog = os.path.join(os.path.dirname(__file__), "videos", "dog.mp4")
|
| 905 |
bear = os.path.join(os.path.dirname(__file__), "videos", "bear.mp4")
|
| 906 |
+
paragliding_launch = os.path.join(os.path.dirname(__file__), "videos", "paragliding-launch.mp4")
|
|
|
|
|
|
|
| 907 |
paragliding = os.path.join(os.path.dirname(__file__), "videos", "paragliding.mp4")
|
| 908 |
cat = os.path.join(os.path.dirname(__file__), "videos", "cat.mp4")
|
| 909 |
pillow = os.path.join(os.path.dirname(__file__), "videos", "pillow.mp4")
|
| 910 |
teddy = os.path.join(os.path.dirname(__file__), "videos", "teddy.mp4")
|
| 911 |
backpack = os.path.join(os.path.dirname(__file__), "videos", "backpack.mp4")
|
| 912 |
+
gr.Examples(examples=[butterfly, groundbox, monkey, grasp_sponge_800, bear, apple, paragliding, paragliding_launch, cat, pillow, teddy, backpack],
|
| 913 |
inputs = [
|
| 914 |
video_in
|
| 915 |
],
|
|
|
|
| 922 |
with gr.Column():
|
| 923 |
with gr.Row():
|
| 924 |
query_frame_slider = gr.Slider(
|
| 925 |
+
minimum=0, maximum=100, value=0, step=1, label="Choose frame", interactive=False)
|
| 926 |
# with gr.Row():
|
| 927 |
# undo = gr.Button("Undo", interactive=False)
|
| 928 |
# clear_frame = gr.Button("Clear Frame", interactive=False)
|
|
|
|
| 940 |
track_button = gr.Button("Track", interactive=False)
|
| 941 |
|
| 942 |
with gr.Column():
|
| 943 |
+
|
| 944 |
# with gr.Row():
|
| 945 |
+
# rate1_button = gr.Button("Subsampling", interactive=False)
|
| 946 |
+
# rate2_button = gr.Button("Stride 2", interactive=False)
|
| 947 |
+
# rate4_button = gr.Button("Rate 4", interactive=False)
|
| 948 |
+
# rate8_button = gr.Button("Rate 8", interactive=False)
|
| 949 |
+
# # rate16_button = gr.Button("Rate 16", interactive=False)
|
| 950 |
+
with gr.Row():
|
| 951 |
+
# rate_slider = gr.Slider(
|
| 952 |
+
# minimum=1, maximum=16, value=1, step=1, label="Choose subsampling rate", interactive=False)
|
| 953 |
+
rate_radio = gr.Radio([1, 2, 4, 8, 16], value=1, label="Choose visualization subsampling", interactive=False)
|
| 954 |
+
|
| 955 |
+
with gr.Row():
|
| 956 |
+
output_video = gr.Video(
|
| 957 |
+
label="Output video",
|
| 958 |
+
interactive=False,
|
| 959 |
+
autoplay=True,
|
| 960 |
+
loop=True,
|
| 961 |
+
)
|
| 962 |
|
| 963 |
|
| 964 |
|
|
|
|
| 995 |
queue = False
|
| 996 |
)
|
| 997 |
|
| 998 |
+
|
| 999 |
+
|
| 1000 |
# current_frame.select(
|
| 1001 |
# fn = get_point,
|
| 1002 |
# inputs = [
|
|
|
|
| 1087 |
],
|
| 1088 |
outputs = [
|
| 1089 |
output_video,
|
| 1090 |
+
tracks,
|
| 1091 |
+
visibs,
|
| 1092 |
+
rate_radio,
|
| 1093 |
+
# rate1_button,
|
| 1094 |
+
# rate2_button,
|
| 1095 |
+
# rate4_button,
|
| 1096 |
+
# rate8_button,
|
| 1097 |
+
# rate16_button,
|
| 1098 |
],
|
| 1099 |
queue = True,
|
| 1100 |
)
|
| 1101 |
|
| 1102 |
+
# rate_slider.change(
|
| 1103 |
+
# fn = choose_rate,
|
| 1104 |
+
# inputs = [rate_slider, video_preview, video_fps, tracks, visibs],
|
| 1105 |
+
# outputs = [
|
| 1106 |
+
# output_video,
|
| 1107 |
+
# ],
|
| 1108 |
+
# queue = False
|
| 1109 |
+
# )
|
| 1110 |
+
rate_radio.change(
|
| 1111 |
+
fn = choose_rate,
|
| 1112 |
+
inputs = [rate_radio, video_preview, video_fps, tracks, visibs],
|
| 1113 |
+
outputs = [
|
| 1114 |
+
output_video,
|
| 1115 |
+
],
|
| 1116 |
+
queue = False
|
| 1117 |
+
)
|
| 1118 |
+
|
| 1119 |
+
# rate1_button.click(
|
| 1120 |
+
# fn = choose_rate1,
|
| 1121 |
+
# inputs = [video_preview, video_fps, tracks, visibs],
|
| 1122 |
+
# outputs = [output_video],
|
| 1123 |
+
# queue = False,
|
| 1124 |
+
# )
|
| 1125 |
+
# rate2_button.click(
|
| 1126 |
+
# fn = choose_rate2,
|
| 1127 |
+
# inputs = [video_preview, video_fps, tracks, visibs],
|
| 1128 |
+
# outputs = [output_video],
|
| 1129 |
+
# queue = False,
|
| 1130 |
+
# )
|
| 1131 |
+
# rate4_button.click(
|
| 1132 |
+
# fn = choose_rate4,
|
| 1133 |
+
# inputs = [video_preview, video_fps, tracks, visibs],
|
| 1134 |
+
# outputs = [output_video],
|
| 1135 |
+
# queue = False,
|
| 1136 |
+
# )
|
| 1137 |
+
# rate8_button.click(
|
| 1138 |
+
# fn = choose_rate8,
|
| 1139 |
+
# inputs = [video_preview, video_fps, tracks, visibs],
|
| 1140 |
+
# outputs = [output_video],
|
| 1141 |
+
# queue = False,
|
| 1142 |
+
# )
|
| 1143 |
+
# rate16_button.click(
|
| 1144 |
+
# fn = choose_rate16,
|
| 1145 |
+
# inputs = [video_preview, video_fps, tracks, visibs],
|
| 1146 |
+
# outputs = [output_video],
|
| 1147 |
+
# queue = False,
|
| 1148 |
+
# )
|
| 1149 |
+
|
| 1150 |
+
|
| 1151 |
+
|
| 1152 |
|
| 1153 |
|
| 1154 |
|