Spaces:

vivien
/

depth-aware-caption

Sleeping

App Files Files Community

Vivien commited on May 5, 2022

Commit

b57c4d6

1 Parent(s): ac49d38

Recover previous version

Browse files

Files changed (1) hide show

app.py +238 -243

app.py CHANGED Viewed

@@ -1,243 +1,238 @@
-from streamlit import legacy_caching
-legacy_caching.clear_cache()
-# import numpy as np
-# from PIL import ImageDraw, Image, ImageFont
-# from transformers import DPTFeatureExtractor, DPTForDepthEstimation
-# import torch
-# import streamlit as st
-# FONTS = [
-#     "Font: Serif - EBGaramond",
-#     "Font: Serif - Cinzel",
-#     "Font: Sans - Roboto",
-#     "Font: Sans - Lato",
-#     "Font: Display - Lobster",
-#     "Font: Display - LilitaOne",
-#     "Font: Handwriting - GreatVibes",
-#     "Font: Handwriting - Pacifico",
-#     "Font: Mono - Inconsolata",
-#     "Font: Mono - Cutive",
-# ]
-# def hex_to_rgb(hex):
-#     rgb = []
-#     for i in (0, 2, 4):
-#         decimal = int(hex[i : i + 2], 16)
-#         rgb.append(decimal)
-#     return tuple(rgb)
-# @st.cache(allow_output_mutation=True)
-# def load():
-#     feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
-#     model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
-#     return model, feature_extractor
-# model, feature_extractor = load()
-# def compute_depth(image):
-#     inputs = feature_extractor(images=image, return_tensors="pt")
-#     with torch.no_grad():
-#         outputs = model(**inputs)
-#         predicted_depth = outputs.predicted_depth
-#     prediction = torch.nn.functional.interpolate(
-#         predicted_depth.unsqueeze(1),
-#         size=image.size[::-1],
-#         mode="bicubic",
-#         align_corners=False,
-#     )
-#     return prediction.cpu().numpy()[0, 0, :, :]
-# def get_mask1(
-#     shape, x, y, caption, font=None, font_size=0.08, color=(0, 0, 0), alpha=0.8
-# ):
-#     img_text = Image.new("RGBA", (shape[1], shape[0]), (0, 0, 0, 0))
-#     draw = ImageDraw.Draw(img_text)
-#     font = ImageFont.truetype(font, int(font_size * shape[1]))
-#     draw.text(
-#         (x * shape[1], (1 - y) * shape[0]),
-#         caption,
-#         fill=(*color, int(max(min(1, alpha), 0) * 255)),
-#         font=font,
-#     )
-#     text = np.array(img_text)
-#     mask1 = np.dot(np.expand_dims(text[:, :, -1] / 255, -1), np.ones((1, 3)))
-#     return text[:, :, :-1], mask1
-# def get_mask2(depth_map, depth):
-#     return np.expand_dims(
-#         (depth_map[:, :] < depth * np.min(depth_map) + (1 - depth) * np.max(depth_map)),
-#         -1,
-#     )
-# def add_caption(
-#     img,
-#     caption,
-#     depth_map=None,
-#     x=0.5,
-#     y=0.5,
-#     depth=0.5,
-#     font_size=50,
-#     color=(255, 255, 255),
-#     font="",
-#     alpha=1,
-# ):
-#     text, mask1 = get_mask1(
-#         img.shape,
-#         x,
-#         y,
-#         caption,
-#         font=font,
-#         font_size=font_size,
-#         color=color,
-#         alpha=alpha,
-#     )
-#     mask2 = get_mask2(depth_map, depth)
-#     mask = mask1 * np.dot(mask2, np.ones((1, 3)))
-#     return ((1 - mask) * img + mask * text).astype(np.uint8)
-# @st.cache(max_entries=30, show_spinner=False)
-# def load_img(uploaded_file):
-#     if uploaded_file is None:
-#         img = Image.open("pulp.jpg")
-#         default = True
-#     else:
-#         img = Image.open(uploaded_file)
-#         if img.size[0] > 800 or img.size[1] > 800:
-#             if img.size[0] < img.size[1]:
-#                 new_size = (int(800 * img.size[0] / img.size[1]), 800)
-#             else:
-#                 new_size = (800, int(800 * img.size[1] / img.size[0]))
-#             img = img.resize(new_size)
-#         default = False
-#     return np.array(img), compute_depth(img), default
-# def main():
-#     st.markdown(
-#         """
-#     <style>
-#         label{
-#             height: 0px !important;
-#             min-height: 0px !important;
-#             margin-bottom: 0px !important;
-#         }
-#     </style>
-#         """,
-#         unsafe_allow_html=True,
-#     )
-#     st.sidebar.markdown(
-#         """
-#     # Depth-aware text addition
-#     Add text ***inside*** an image!
-#     Upload an image, enter some text and adjust the ***depth*** where you want the text to be displayed. You can also define its location and appearance (font, color, transparency and size).
-#     Built with [PyTorch](https://pytorch.org/), Intel's [MiDaS model](https://pytorch.org/hub/intelisl_midas_v2/), [Streamlit](https://streamlit.io/), [pillow](https://python-pillow.org/) and inspired by the official [video](https://youtu.be/eTa1jHk1Lxc) of *Jenny of Oldstones* by Florence + the Machine
-#     """
-#     )
-#     uploaded_file = st.file_uploader("", type=["jpg", "jpeg"])
-#     with st.spinner("Analyzing the image - Please wait a few seconds"):
-#         img, depth_map, default = load_img(uploaded_file)
-#     if default:
-#         x0, y0, alpha0, font_size0, depth0, font0 = 0.02, 0.68, 0.99, 0.07, 0.12, 4
-#         text0 = "Pulp Fiction"
-#     else:
-#         x0, y0, alpha0, font_size0, depth0, font0 = 0.1, 0.9, 0.8, 0.08, 0.5, 0
-#         text0 = "Enter your text here"
-#     colA, colB, colC = st.columns((13, 1, 1))
-#     with colA:
-#         text = st.text_input("", text0)
-#     with colB:
-#         st.markdown("Color:")
-#     with colC:
-#         color = st.color_picker("", value="#FFFFFF")
-#     col1, _, col2 = st.columns((4, 1, 4))
-#     with col1:
-#         depth = st.select_slider(
-#             "",
-#             options=[i / 100 for i in range(101)],
-#             value=depth0,
-#             format_func=lambda x: "Foreground"
-#             if x == 0.0
-#             else "Background"
-#             if x == 1.0
-#             else "",
-#         )
-#         x = st.select_slider(
-#             "",
-#             options=[i / 100 for i in range(101)],
-#             value=x0,
-#             format_func=lambda x: "Left" if x == 0.0 else "Right" if x == 1.0 else "",
-#         )
-#         y = st.select_slider(
-#             "",
-#             options=[i / 100 for i in range(101)],
-#             value=y0,
-#             format_func=lambda x: "Bottom" if x == 0.0 else "Top" if x == 1.0 else "",
-#         )
-#     with col2:
-#         font_size = st.select_slider(
-#             "",
-#             options=[0.04 + i / 100 for i in range(0, 17)],
-#             value=font_size0,
-#             format_func=lambda x: "Small font"
-#             if x == 0.04
-#             else "Large font"
-#             if x == 0.2
-#             else "",
-#         )
-#         alpha = st.select_slider(
-#             "",
-#             options=[i / 100 for i in range(101)],
-#             value=alpha0,
-#             format_func=lambda x: "Transparent"
-#             if x == 0.0
-#             else "Opaque"
-#             if x == 1.0
-#             else "",
-#         )
-#         font = st.selectbox("", FONTS, index=font0)
-#     font = f"fonts/{font[6:]}.ttf"
-#     captioned = add_caption(
-#         img,
-#         text,
-#         x=x,
-#         y=y,
-#         depth=depth,
-#         depth_map=depth_map,
-#         font=font,
-#         font_size=font_size,
-#         alpha=alpha,
-#         color=hex_to_rgb(color[1:]),
-#     )
-#     st.image(captioned)
-# if __name__ == "__main__":
-#     main()

+import numpy as np
+from PIL import ImageDraw, Image, ImageFont
+from transformers import DPTFeatureExtractor, DPTForDepthEstimation
+import torch
+import streamlit as st
+FONTS = [
+    "Font: Serif - EBGaramond",
+    "Font: Serif - Cinzel",
+    "Font: Sans - Roboto",
+    "Font: Sans - Lato",
+    "Font: Display - Lobster",
+    "Font: Display - LilitaOne",
+    "Font: Handwriting - GreatVibes",
+    "Font: Handwriting - Pacifico",
+    "Font: Mono - Inconsolata",
+    "Font: Mono - Cutive",
+]
+def hex_to_rgb(hex):
+    rgb = []
+    for i in (0, 2, 4):
+        decimal = int(hex[i : i + 2], 16)
+        rgb.append(decimal)
+    return tuple(rgb)
+@st.cache(allow_output_mutation=True)
+def load():
+    feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
+    model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+    return model, feature_extractor
+model, feature_extractor = load()
+def compute_depth(image):
+    inputs = feature_extractor(images=image, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+        predicted_depth = outputs.predicted_depth
+    prediction = torch.nn.functional.interpolate(
+        predicted_depth.unsqueeze(1),
+        size=image.size[::-1],
+        mode="bicubic",
+        align_corners=False,
+    )
+    return prediction.cpu().numpy()[0, 0, :, :]
+def get_mask1(
+    shape, x, y, caption, font=None, font_size=0.08, color=(0, 0, 0), alpha=0.8
+):
+    img_text = Image.new("RGBA", (shape[1], shape[0]), (0, 0, 0, 0))
+    draw = ImageDraw.Draw(img_text)
+    font = ImageFont.truetype(font, int(font_size * shape[1]))
+    draw.text(
+        (x * shape[1], (1 - y) * shape[0]),
+        caption,
+        fill=(*color, int(max(min(1, alpha), 0) * 255)),
+        font=font,
+    )
+    text = np.array(img_text)
+    mask1 = np.dot(np.expand_dims(text[:, :, -1] / 255, -1), np.ones((1, 3)))
+    return text[:, :, :-1], mask1
+def get_mask2(depth_map, depth):
+    return np.expand_dims(
+        (depth_map[:, :] < depth * np.min(depth_map) + (1 - depth) * np.max(depth_map)),
+        -1,
+    )
+def add_caption(
+    img,
+    caption,
+    depth_map=None,
+    x=0.5,
+    y=0.5,
+    depth=0.5,
+    font_size=50,
+    color=(255, 255, 255),
+    font="",
+    alpha=1,
+):
+    text, mask1 = get_mask1(
+        img.shape,
+        x,
+        y,
+        caption,
+        font=font,
+        font_size=font_size,
+        color=color,
+        alpha=alpha,
+    )
+    mask2 = get_mask2(depth_map, depth)
+    mask = mask1 * np.dot(mask2, np.ones((1, 3)))
+    return ((1 - mask) * img + mask * text).astype(np.uint8)
+@st.cache(max_entries=30, show_spinner=False)
+def load_img(uploaded_file):
+    if uploaded_file is None:
+        img = Image.open("pulp.jpg")
+        default = True
+    else:
+        img = Image.open(uploaded_file)
+        if img.size[0] > 800 or img.size[1] > 800:
+            if img.size[0] < img.size[1]:
+                new_size = (int(800 * img.size[0] / img.size[1]), 800)
+            else:
+                new_size = (800, int(800 * img.size[1] / img.size[0]))
+            img = img.resize(new_size)
+        default = False
+    return np.array(img), compute_depth(img), default
+def main():
+    st.markdown(
+        """
+    <style>
+        label{
+            height: 0px !important;
+            min-height: 0px !important;
+            margin-bottom: 0px !important;
+        }
+    </style>
+        """,
+        unsafe_allow_html=True,
+    )
+    st.sidebar.markdown(
+        """
+    # Depth-aware text addition
+    Add text ***inside*** an image!
+    Upload an image, enter some text and adjust the ***depth*** where you want the text to be displayed. You can also define its location and appearance (font, color, transparency and size).
+    Built with [PyTorch](https://pytorch.org/), Intel's [MiDaS model](https://pytorch.org/hub/intelisl_midas_v2/), [Streamlit](https://streamlit.io/), [pillow](https://python-pillow.org/) and inspired by the official [video](https://youtu.be/eTa1jHk1Lxc) of *Jenny of Oldstones* by Florence + the Machine
+    """
+    )
+    uploaded_file = st.file_uploader("", type=["jpg", "jpeg"])
+    with st.spinner("Analyzing the image - Please wait a few seconds"):
+        img, depth_map, default = load_img(uploaded_file)
+    if default:
+        x0, y0, alpha0, font_size0, depth0, font0 = 0.02, 0.68, 0.99, 0.07, 0.12, 4
+        text0 = "Pulp Fiction"
+    else:
+        x0, y0, alpha0, font_size0, depth0, font0 = 0.1, 0.9, 0.8, 0.08, 0.5, 0
+        text0 = "Enter your text here"
+    colA, colB, colC = st.columns((13, 1, 1))
+    with colA:
+        text = st.text_input("", text0)
+    with colB:
+        st.markdown("Color:")
+    with colC:
+        color = st.color_picker("", value="#FFFFFF")
+    col1, _, col2 = st.columns((4, 1, 4))
+    with col1:
+        depth = st.select_slider(
+            "",
+            options=[i / 100 for i in range(101)],
+            value=depth0,
+            format_func=lambda x: "Foreground"
+            if x == 0.0
+            else "Background"
+            if x == 1.0
+            else "",
+        )
+        x = st.select_slider(
+            "",
+            options=[i / 100 for i in range(101)],
+            value=x0,
+            format_func=lambda x: "Left" if x == 0.0 else "Right" if x == 1.0 else "",
+        )
+        y = st.select_slider(
+            "",
+            options=[i / 100 for i in range(101)],
+            value=y0,
+            format_func=lambda x: "Bottom" if x == 0.0 else "Top" if x == 1.0 else "",
+        )
+    with col2:
+        font_size = st.select_slider(
+            "",
+            options=[0.04 + i / 100 for i in range(0, 17)],
+            value=font_size0,
+            format_func=lambda x: "Small font"
+            if x == 0.04
+            else "Large font"
+            if x == 0.2
+            else "",
+        )
+        alpha = st.select_slider(
+            "",
+            options=[i / 100 for i in range(101)],
+            value=alpha0,
+            format_func=lambda x: "Transparent"
+            if x == 0.0
+            else "Opaque"
+            if x == 1.0
+            else "",
+        )
+        font = st.selectbox("", FONTS, index=font0)
+    font = f"fonts/{font[6:]}.ttf"
+    captioned = add_caption(
+        img,
+        text,
+        x=x,
+        y=y,
+        depth=depth,
+        depth_map=depth_map,
+        font=font,
+        font_size=font_size,
+        alpha=alpha,
+        color=hex_to_rgb(color[1:]),
+    )
+    st.image(captioned)
+if __name__ == "__main__":
+    main()