import gradio as gr
from PIL import Image, ImageFilter
# import matplotlib.pyplot as plt
import torch
import cv2
import numpy as np
from torchvision import transforms
from transformers import AutoModelForImageSegmentation, DepthProImageProcessorFast, DepthProForDepthEstimation
import requests

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

birefnet = AutoModelForImageSegmentation.from_pretrained('ZhengPeng7/BiRefNet', trust_remote_code=True)
torch.set_float32_matmul_precision(['high', 'highest'][0])
birefnet.to('cuda')
birefnet.eval()
birefnet.half()

def extract_object(image, t1, t2):
    # Data settings
    image_size = (1024, 1024)
    transform_image = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    # image = Image.open(imagepath)
    image1 = image.copy()
    input_images = transform_image(image1).unsqueeze(0).to('cuda').half()

    # Prediction
    with torch.no_grad():
        preds = birefnet(input_images)[-1].sigmoid().cpu()
    pred = preds[0].squeeze()
    pred_pil = transforms.ToPILImage()(pred)
    mask = pred_pil.resize(image1.size)
    image1.putalpha(mask)

    blurredBg = cv2.GaussianBlur(np.array(imageResized), (0, 0), sigmaX=15, sigmaY=15)

    mask = np.array(result[1].convert("L"))
    _, maskBinary = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
    img = cv2.cvtColor(np.array(imageResized), cv2.COLOR_RGB2BGR)
    
    maskInv = cv2.bitwise_not(maskBinary)
    maskInv3 = cv2.cvtColor(maskInv, cv2.COLOR_GRAY2BGR)
    
    foreground = cv2.bitwise_and(img, cv2.bitwise_not(maskInv3))
    background = cv2.bitwise_and(blurredBg, maskInv3)
    finalImg = cv2.add(cv2.cvtColor(foreground, cv2.COLOR_BGR2RGB), background)

# plt.figure(figsize=(15, 5))
    # return image1, mask

# def depth_estimation():
    imageProcessor = DepthProImageProcessorFast.from_pretrained("apple/DepthPro-hf")
    model = DepthProForDepthEstimation.from_pretrained("apple/DepthPro-hf").to(device)
    
    inputs = imageProcessor(images=imageResized, return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    post_processed_output = imageProcessor.post_process_depth_estimation(
        outputs, target_sizes=[(imageResized.height, imageResized.width)],
    )
    
    field_of_view = post_processed_output[0]["field_of_view"]
    focal_length = post_processed_output[0]["focal_length"]
    depth = post_processed_output[0]["predicted_depth"]
    depth = (depth - depth.min()) / (depth.max() - depth.min())
    depth = depth * 255.
    depth = depth.detach().cpu().numpy()
    # print(depth)
    depthImg = Image.fromarray(depth.astype("uint8"))
    
    # threshold1 = 255 / 20      # ~85
    # threshold2 = 2 * 255 / 3   # ~170

    threshold1 = (t1/10) * 255
    threshold2 = (t2/10) * 255
    
    # Precompute blurred versions for each region
    img_foreground = img.copy()  # No blur for foreground
    img_middleground = cv2.GaussianBlur(img, (0, 0), sigmaX=7, sigmaY=7)
    img_background = cv2.GaussianBlur(img, (0, 0), sigmaX=15, sigmaY=15)
    
    # Create masks for each region (as float arrays for proper blending)
    mask_fg = (depth < threshold1).astype(np.float32)
    mask_mg = ((depth >= threshold1) & (depth < threshold2)).astype(np.float32)
    mask_bg = (depth >= threshold2).astype(np.float32)
    
    # Expand masks to 3 channels (H, W, 3)
    mask_fg = np.stack([mask_fg]*3, axis=-1)
    mask_mg = np.stack([mask_mg]*3, axis=-1)
    mask_bg = np.stack([mask_bg]*3, axis=-1)
    
    # Combine the images using the masks in a vectorized manner.
    final_img = (img_foreground * mask_fg + 
                 img_middleground * mask_mg + 
                 img_background * mask_bg).astype(np.uint8)
    
    # Convert the result back to RGB for display with matplotlib.
    final_img_rgb = cv2.cvtColor(final_img, cv2.COLOR_BGR2RGB)

    return image1, final_img

# Visualization
# plt.axis("off")
# subplots for 3 images: original, segmented, mask

# plt.figure(figsize=(15, 5))

# image = Image.open('/content/drive/MyDrive/eee515-hw3/hw3-q24.jpg')
# #resize the image to 512x512
# imageResized = image.resize((512, 512))

# result = extract_object(birefnet, imageResized)
# plt.subplot(1, 3, 1)
# plt.title("Original Resized Image")
# plt.imshow(imageResized)

# plt.subplot(1, 3, 2)
# plt.title("Segmented Image")
# plt.imshow(result[0])

# plt.subplot(1, 3, 3)
# plt.title("Mask")
# plt.imshow(result[1], cmap="gray")
# plt.show()

# Create a Gradio interface


def build_interface(image1, image2):
    """Build UI for gradio app
    """
    title = "Bokeh and Lens Blur"
    with gr.Blocks(theme=gr.themes.Soft(), title=title, fill_width=True) as interface:
        with gr.Row():
            # with gr.Column(scale=3):
            #     with gr.Group():
            #         input_text_box = gr.Textbox(
            #             value=None,
            #             label="Prompt",
            #             lines=2,
            #         )
            #         # gr.Markdown("### Set the values for Middleground and Background")
            #         # fg = gr.Slider(minimum=0, maximum=99, step=1, value=33, label="Middleground")
            #         # mg = gr.Slider(minimum=0, maximum=99, step=1, value=66, label="Background")
            #     with gr.Row():
            #         submit_button = gr.Button("Submit", variant="primary")
            with gr.Column(scale=3):
                model3d = gr.Model3D(
                    label="Output", height="45em", interactive=False
                )

            with gr.Column(scale=3):
                model3d = gr.Model3D(
                    label="Output", height="45em", interactive=False
                )
    
        submit_button.click(
            handle_text_prompt,
            inputs=[
                input_text_box,
                variance
            ],
            outputs=[
                model3d
            ]
        )
                
    return interface

# demo = gr.Interface(sepia, gr.Image(), "image")

title = "Gaussian Blur Background App"
description = (
    "Upload an image to apply a realistic background blur effect. "
    "The app segments the foreground using RMBG-2.0 and then applies a Gaussian "
    "blur (σ=15) to the background, simulating a video conferencing blur effect."
)

iface = gr.Interface(
    fn=apply_blur_effect,
    inputs=[gr.Image(type="pil", label="Input Image"), gr.Slider(minimum=0, maximum=40, step=1, value=33, label="Middleground"), gr.Slider(minimum=40, maximum=99, step=1, value=66, label="Background")],
    outputs=[gr.Image(type="pil", label="Bokeh Image"), gr.Image(type="pil", label="Lens Blur Image")],
    title=title,
    description=description,
    allow_flagging="never"
)

demo = build_interface()
demo.queue(default_concurrency_limit=1)
demo.launch()