Spaces:

prs-eth
/

marigold-dc

Running on Zero

App Files Files Community

toshas commited on Dec 17, 2024

Commit

1619d3a

0 Parent(s):

Initial commit

Browse files

Files changed (15) hide show

.gitattributes +38 -0
.gitignore +3 -0
LICENSE.txt +177 -0
README.md +24 -0
app.py +340 -0
files/kitti_1.npy +3 -0
files/kitti_1.png +3 -0
files/kitti_2.npy +3 -0
files/kitti_2.png +3 -0
files/teaser.png +3 -0
files/teaser_10.npy +3 -0
files/teaser_100.npy +3 -0
files/teaser_1000.npy +3 -0
marigold_dc.py +186 -0
requirements.txt +14 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,38 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.idea
+.DS_Store
+__pycache__

LICENSE.txt ADDED Viewed

	@@ -0,0 +1,177 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS

README.md ADDED Viewed

	@@ -0,0 +1,24 @@

+---
+title: Marigold Depth Completion
+emoji: 🏵️
+colorFrom: blue
+colorTo: red
+sdk: gradio
+sdk_version: 4.44.1
+app_file: app.py
+pinned: true
+license: apache-2.0
+models:
+- prs-eth/marigold-v1-0
+---
+This is a demo of the monocular depth completion pipeline, based on the CVPR 2024 paper titled ["Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation"](https://arxiv.org/abs/2312.02145)
+```
+@InProceedings{ke2023repurposing,
+  title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation},
+  author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler},
+  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year={2024}
+}
+```

app.py ADDED Viewed

	@@ -0,0 +1,340 @@

+# Copyright 2024 Anton Obukhov, ETH Zurich. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --------------------------------------------------------------------------
+# If you find this code useful, we kindly ask you to cite our paper in your work.
+# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
+# More information about the method can be found at https://marigoldmonodepth.github.io
+# --------------------------------------------------------------------------
+import functools
+import os
+import spaces
+import gradio as gr
+import numpy as np
+import plotly.graph_objects as go
+import torch as torch
+from PIL import Image
+from scipy.ndimage import maximum_filter
+from marigold_dc import MarigoldDepthCompletionPipeline
+from gradio_imageslider import ImageSlider
+from huggingface_hub import login
+DRY_RUN = False
+def dilate_rgb_image(image, kernel_size):
+    r_channel, g_channel, b_channel = image[..., 0], image[..., 1], image[..., 2]
+    r_dilated = maximum_filter(r_channel, size=kernel_size)
+    g_dilated = maximum_filter(g_channel, size=kernel_size)
+    b_dilated = maximum_filter(b_channel, size=kernel_size)
+    dilated_image = np.stack([r_dilated, g_dilated, b_dilated], axis=-1)
+    return dilated_image
+def generate_rmse_plot(steps, metrics, denoise_steps):
+    y_min = min(metrics)
+    y_max = max(metrics)
+    fig = go.Figure()
+    fig.add_trace(
+        go.Scatter(
+            x=steps,
+            y=metrics,
+            mode="lines+markers",
+            line=dict(color="#af2928"),
+            name="RMSE",
+        )
+    )
+    if denoise_steps < 20:
+        x_dtick = 1
+    else:
+        x_dtick = 5
+    fig.update_layout(
+        autosize=False,
+        height=300,
+        xaxis_title="Steps",
+        xaxis_range=[0, denoise_steps + 1],
+        xaxis=dict(
+            scaleanchor="y",
+            scaleratio=1.5,
+            dtick=x_dtick,
+        ),
+        yaxis_title="RMSE",
+        yaxis_range=[np.log10(max(y_min - 0.1, 0.1)), np.log10(y_max + 1)],
+        yaxis=dict(
+            type="log",
+        ),
+        hovermode="x unified",
+        template="plotly_white",
+    )
+    return fig
+def process(
+    pipe,
+    path_image,
+    path_sparse,
+    denoise_steps,
+):
+    image = Image.open(path_image)
+    sparse_depth = np.load(path_sparse)
+    sparse_depth_valid = sparse_depth[sparse_depth > 0]
+    sparse_depth_min = np.min(sparse_depth_valid)
+    sparse_depth_max = np.max(sparse_depth_valid)
+    width, height = image.size
+    max_dim = max(width, height)
+    processing_resolution = 0
+    if max_dim > 768:
+        processing_resolution = 768
+    metrics = []
+    steps = []
+    for step, (pred, rmse) in enumerate(
+        pipe(
+            image=Image.open(path_image),
+            sparse_depth=sparse_depth,
+            num_inference_steps=denoise_steps + 1,
+            processing_resolution=processing_resolution,
+            dry_run=DRY_RUN,
+        )
+    ):
+        min_both = min(sparse_depth_min, pred.min().item())
+        max_both = min(sparse_depth_max, pred.max().item())
+        metrics.append(rmse)
+        steps.append(step)
+        vis_pred = pipe.image_processor.visualize_depth(
+            pred, val_min=min_both, val_max=max_both
+        )[0]
+        vis_sparse = pipe.image_processor.visualize_depth(
+            sparse_depth, val_min=min_both, val_max=max_both
+        )[0]
+        vis_sparse = np.array(vis_sparse)
+        vis_sparse[sparse_depth <= 0] = (0, 0, 0)
+        vis_sparse = dilate_rgb_image(vis_sparse, kernel_size=5)
+        vis_sparse = Image.fromarray(vis_sparse)
+        plot = generate_rmse_plot(steps, metrics, denoise_steps)
+        yield (
+            [vis_sparse, vis_pred],
+            plot,
+        )
+def run_demo_server(pipe):
+    process_pipe = spaces.GPU(functools.partial(process, pipe))
+    os.environ["GRADIO_ALLOW_FLAGGING"] = "never"
+    with gr.Blocks(
+        analytics_enabled=False,
+        title="Marigold Depth Completion",
+        css="""
+            #short {
+                height: 130px;
+            }
+            .slider .inner {
+                width: 4px;
+                background: #FFF;
+            }
+            .slider .icon-wrap svg {
+                fill: #FFF;
+                stroke: #FFF;
+                stroke-width: 3px;
+            }
+            .viewport {
+                aspect-ratio: 4/3;
+            }
+            h1 {
+                text-align: center;
+                display: block;
+            }
+            h2 {
+                text-align: center;
+                display: block;
+            }
+            h3 {
+                text-align: center;
+                display: block;
+            }
+        """,
+    ) as demo:
+        gr.HTML(
+            """
+            <h1>⇆ Marigold-DC: Zero-Shot Monocular Depth Completion with Guided Diffusion</h1>
+            <p align="center">
+            <a title="Website" href="https://MarigoldDepthCompletion.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                <img src="https://img.shields.io/badge/%F0%9F%A4%8D%20Project%20-Website-blue" alt="Website Badge">
+            </a>
+            <a title="arXiv" href="https://arxiv.org/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                <img src="https://img.shields.io/badge/%F0%9F%93%84%20Read%20-Paper-af2928" alt="arXiv Badge">
+            </a>
+            <a title="Github" href="https://github.com/prs-eth/marigold-dc" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                <img src="https://img.shields.io/github/stars/prs-eth/marigold-dc?label=GitHub&logo=github&color=C8C" alt="badge-github-stars">
+            </a>
+            <a title="Social" href="https://twitter.com/antonobukhov1" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                <img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
+            </a><br>
+            Start exploring the interactive examples at the bottom of the page!
+            </p>
+        """
+        )
+        with gr.Row():
+            with gr.Column():
+                input_image = gr.Image(
+                    label="Input Image",
+                    type="filepath",
+                )
+                input_sparse = gr.File(
+                    label="Input sparse depth (numpy file)",
+                    elem_id="short",
+                )
+                with gr.Accordion("Advanced options", open=False):
+                    denoise_steps = gr.Slider(
+                        label="Number of denoising steps",
+                        minimum=10,
+                        maximum=50,
+                        step=1,
+                        value=10,
+                    )
+                with gr.Row():
+                    submit_btn = gr.Button(value="Compute Depth", variant="primary")
+                    clear_btn = gr.Button(value="Clear")
+            with gr.Column():
+                output_slider = ImageSlider(
+                    label="Completed depth (red-near, blue-far)",
+                    type="filepath",
+                    show_download_button=True,
+                    show_share_button=True,
+                    interactive=False,
+                    elem_classes="slider",
+                    position=0.25,
+                )
+                plot = gr.Plot(
+                    label="RMSE between input and result",
+                    elem_id="viewport",
+                )
+        inputs = [
+            input_image,
+            input_sparse,
+            denoise_steps,
+        ]
+        outputs = [
+            output_slider,
+            plot,
+        ]
+        def submit_depth_fn(path_image, path_sparse, denoise_steps):
+            for outputs in process_pipe(path_image, path_sparse, denoise_steps):
+                yield outputs
+        submit_btn.click(
+            fn=submit_depth_fn,
+            inputs=inputs,
+            outputs=outputs,
+        )
+        gr.Examples(
+            fn=submit_depth_fn,
+            examples=[
+                [
+                    "files/kitti_1.png",
+                    "files/kitti_1.npy",
+                    10,  # denoise_steps
+                ],
+                [
+                    "files/kitti_2.png",
+                    "files/kitti_2.npy",
+                    10,  # denoise_steps
+                ],
+                [
+                    "files/teaser.png",
+                    "files/teaser_1000.npy",
+                    10,  # denoise_steps
+                ],
+                [
+                    "files/teaser.png",
+                    "files/teaser_100.npy",
+                    10,  # denoise_steps
+                ],
+                [
+                    "files/teaser.png",
+                    "files/teaser_10.npy",
+                    10,  # denoise_steps
+                ],
+            ],
+            inputs=inputs,
+            outputs=outputs,
+            cache_examples="lazy",
+        )
+        def clear_fn():
+            return [
+                gr.Image(value=None, interactive=True),
+                gr.File(None, interactive=True),
+                None,
+            ]
+        clear_btn.click(
+            fn=clear_fn,
+            inputs=[],
+            outputs=[
+                input_image,
+                input_sparse,
+                output_slider,
+            ],
+        )
+        demo.queue(
+            api_open=False,
+        ).launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+        )
+def main():
+    CHECKPOINT = "prs-eth/marigold-depth-v1-0"
+    os.system("pip freeze")
+    if "HF_TOKEN_LOGIN" in os.environ:
+        login(token=os.environ["HF_TOKEN_LOGIN"])
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    pipe = MarigoldDepthCompletionPipeline.from_pretrained(CHECKPOINT)
+    try:
+        import xformers
+        pipe.enable_xformers_memory_efficient_attention()
+    except:
+        pass  # run without xformers
+    pipe = pipe.to(device)
+    run_demo_server(pipe)
+if __name__ == "__main__":
+    main()

files/kitti_1.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7700e39fa4ccacd974ba2d76c3c4d94016e266f1cb99153a9d7ba89b4d46962
+size 3424384

files/kitti_1.png ADDED Viewed

Git LFS Details

SHA256: fde3b58a9c1dfde2dbeb464535df195880c972da1619dc00eaa7fe74fd0784ee
Pointer size: 131 Bytes
Size of remote file: 728 kB

files/kitti_2.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a26a4c670640071c599e068f9b932e22a261150f3ecda1e46827751629c925f
+size 3424384

files/kitti_2.png ADDED Viewed

Git LFS Details

SHA256: 9e93cd1517f28597e0f2726d52f6054c31288a2d67ae0dfaf960db3605843215
Pointer size: 131 Bytes
Size of remote file: 694 kB

files/teaser.png ADDED Viewed

Git LFS Details

SHA256: 6218bd424d631e3f3e22905c900049f6b770e9a18e2562716fc4ad880af939f4
Pointer size: 131 Bytes
Size of remote file: 521 kB

files/teaser_10.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32e88cc8bf7a332d656e7c21996f0fc382072eb6a5a192fc6b03fa199842a65e
+size 2457728

files/teaser_100.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44bf100a969b99061d597850eb0ed039b1cf79a61f9b9aea40e51fff632a6743
+size 2457728

files/teaser_1000.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86c7ef075046d10dd5edee50cca19472b9a268b778a1b1dd01d4474f01b1f3d3
+size 2457728

marigold_dc.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import logging
+import warnings
+import diffusers
+import numpy as np
+import torch
+from diffusers import MarigoldDepthPipeline
+warnings.simplefilter(action="ignore", category=FutureWarning)
+diffusers.utils.logging.disable_progress_bar()
+class MarigoldDepthCompletionPipeline(MarigoldDepthPipeline):
+    def __call__(
+        self,
+        image,
+        sparse_depth,
+        num_inference_steps=50,
+        processing_resolution=0,
+        seed=2024,
+        dry_run=False,
+    ):
+        # Resolving variables
+        device = self._execution_device
+        generator = torch.Generator(device=device).manual_seed(seed)
+        if dry_run:
+            logging.warning("Dry run mode")
+            for i in range(num_inference_steps):
+                yield np.array(image)[:, :, 0].astype(float), float(np.log(i + 1))
+            return
+        # Check inputs.
+        if num_inference_steps is None:
+            raise ValueError("Invalid num_inference_steps")
+        if type(sparse_depth) is not np.ndarray or sparse_depth.ndim != 2:
+            raise ValueError(
+                "Sparse depth should be a 2D numpy ndarray with zeros at missing positions"
+            )
+        with torch.no_grad():
+            # Prepare empty text conditioning
+            if self.empty_text_embedding is None:
+                prompt = ""
+                text_inputs = self.tokenizer(
+                    prompt,
+                    padding="do_not_pad",
+                    max_length=self.tokenizer.model_max_length,
+                    truncation=True,
+                    return_tensors="pt",
+                )
+                text_input_ids = text_inputs.input_ids.to(device)
+                self.empty_text_embedding = self.text_encoder(text_input_ids)[
+                    0
+                ]  # [1,2,1024]
+        # Preprocess input images
+        image, padding, original_resolution = self.image_processor.preprocess(
+            image,
+            processing_resolution=processing_resolution,
+            device=device,
+            dtype=self.dtype,
+        )  # [N,3,PPH,PPW]
+        if sparse_depth.shape != original_resolution:
+            raise ValueError(
+                f"Sparse depth dimensions ({sparse_depth.shape}) must match that of the image ({image.shape[-2:]})"
+            )
+        with torch.no_grad():
+            # Encode input image into latent space
+            image_latent, pred_latent = self.prepare_latents(
+                image, None, generator, 1, 1
+            )  # [N*E,4,h,w], [N*E,4,h,w]
+        del image
+        # Preprocess sparse depth
+        sparse_depth = torch.from_numpy(sparse_depth)[None, None].float()
+        sparse_depth = sparse_depth.to(device)
+        sparse_mask = sparse_depth > 0
+        # Set up optimization targets
+        scale = torch.nn.Parameter(torch.ones(1, device=device), requires_grad=True)
+        shift = torch.nn.Parameter(torch.ones(1, device=device), requires_grad=True)
+        pred_latent = torch.nn.Parameter(pred_latent, requires_grad=True)
+        sparse_range = (
+            sparse_depth[sparse_mask].max() - sparse_depth[sparse_mask].min()
+        ).item()
+        sparse_lower = (sparse_depth[sparse_mask].min()).item()
+        def affine_to_metric(depth):
+            return (scale**2) * sparse_range * depth + (shift**2) * sparse_lower
+        def latent_to_metric(latent):
+            affine_invariant_prediction = self.decode_prediction(
+                latent
+            )  # [E,1,PPH,PPW]
+            prediction = affine_to_metric(affine_invariant_prediction)
+            prediction = self.image_processor.unpad_image(
+                prediction, padding
+            )  # [E,1,PH,PW]
+            prediction = self.image_processor.resize_antialias(
+                prediction, original_resolution, "bilinear", is_aa=False
+            )  # [1,1,H,W]
+            return prediction
+        def loss_l1l2(input, target):
+            out_l1 = torch.nn.functional.l1_loss(input, target)
+            out_l2 = torch.nn.functional.mse_loss(input, target)
+            out = out_l1 + out_l2
+            return out, out_l2.sqrt()
+        optimizer = torch.optim.Adam(
+            [
+                {"params": [scale, shift], "lr": 0.005},
+                {"params": [pred_latent], "lr": 0.05},
+            ]
+        )
+        # Process the denoising loop
+        self.scheduler.set_timesteps(num_inference_steps, device=device)
+        for iter, t in enumerate(
+            self.progress_bar(
+                self.scheduler.timesteps, desc=f"Marigold-DC steps ({str(device)})..."
+            )
+        ):
+            optimizer.zero_grad()
+            batch_latent = torch.cat([image_latent, pred_latent], dim=1)  # [1,8,h,w]
+            noise = self.unet(
+                batch_latent,
+                t,
+                encoder_hidden_states=self.empty_text_embedding,
+                return_dict=False,
+            )[
+                0
+            ]  # [1,4,h,w]
+            # Compute pred_epsilon to later rescale the depth latent gradient
+            with torch.no_grad():
+                alpha_prod_t = self.scheduler.alphas_cumprod[t]
+                beta_prod_t = 1 - alpha_prod_t
+                pred_epsilon = (alpha_prod_t**0.5) * noise + (
+                    beta_prod_t**0.5
+                ) * pred_latent
+            step_output = self.scheduler.step(
+                noise, t, pred_latent, generator=generator
+            )
+            # Preview the final output depth, compute loss with guidance, backprop
+            pred_original_sample = step_output.pred_original_sample
+            current_metric_estimate = latent_to_metric(pred_original_sample)
+            loss, rmse = loss_l1l2(
+                current_metric_estimate[sparse_mask], sparse_depth[sparse_mask]
+            )
+            loss.backward()
+            # Scale gradients up
+            with torch.no_grad():
+                pred_epsilon_norm = torch.linalg.norm(pred_epsilon).item()
+                depth_latent_grad_norm = torch.linalg.norm(pred_latent.grad).item()
+                scaling_factor = pred_epsilon_norm / max(depth_latent_grad_norm, 1e-8)
+                pred_latent.grad *= scaling_factor
+            optimizer.step()
+            with torch.no_grad():
+                pred_latent.data = self.scheduler.step(
+                    noise, t, pred_latent, generator=generator
+                ).prev_sample
+            yield current_metric_estimate, rmse.item()
+            del (
+                pred_original_sample,
+                current_metric_estimate,
+                step_output,
+                pred_epsilon,
+                noise,
+            )
+            torch.cuda.empty_cache()
+        # Offload all models
+        self.maybe_free_model_hooks()

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+diffusers==0.31.0
+gradio==4.44.1
+gradio-imageslider==0.0.20
+accelerate
+matplotlib
+numpy
+pillow
+plotly
+scipy
+spaces
+torch
+transformers
+xformers
+pandas