Spaces:
Runtime error
Runtime error
| # Hugging Face Space: 2D to 3D Stereo Pair Generator using Depth + LaMa Inpainting | |
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| import cv2 | |
| from PIL import Image | |
| from transformers import DPTForDepthEstimation, DPTFeatureExtractor | |
| import requests | |
| import tempfile | |
| import subprocess | |
| import os | |
| # === DEVICE === | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # === DEPTH MODEL === | |
| def load_depth_model(): | |
| model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device) | |
| processor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas") | |
| return model, processor | |
| def estimate_depth(image: Image.Image, model, processor): | |
| image = image.resize((384, 384)) | |
| inputs = processor(images=image, return_tensors="pt").to(device) | |
| depth = model(**inputs).predicted_depth | |
| depth = torch.nn.functional.interpolate( | |
| depth.unsqueeze(1), | |
| size=image.size[::-1], | |
| mode="bicubic", | |
| align_corners=False, | |
| ).squeeze().detach().cpu().numpy() | |
| depth_min, depth_max = depth.min(), depth.max() | |
| return (depth - depth_min) / (depth_max - depth_min) | |
| def depth_to_disparity(depth, max_disp=32): | |
| return (1.0 - depth) * max_disp | |
| def generate_right_and_mask(image, disparity): | |
| h, w = image.shape[:2] | |
| right = np.zeros_like(image) | |
| mask = np.ones((h, w), dtype=np.uint8) | |
| for y in range(h): | |
| for x in range(w): | |
| d = int(round(disparity[y, x])) | |
| x_r = x - d | |
| if 0 <= x_r < w: | |
| right[y, x_r] = image[y, x] | |
| mask[y, x_r] = 0 | |
| return right, mask | |
| # === LAMA INPAINTING === | |
| LAMA_API = "https://huggingface.co/spaces/saic-mdal/lama-inpainting" | |
| def run_lama_inpainting(image_bgr, mask): | |
| img = Image.fromarray(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)) | |
| mask_img = Image.fromarray(mask * 255).convert("RGB") | |
| # Save temporarily | |
| tmp_dir = tempfile.mkdtemp() | |
| img_path = os.path.join(tmp_dir, "input.png") | |
| mask_path = os.path.join(tmp_dir, "mask.png") | |
| img.save(img_path) | |
| mask_img.save(mask_path) | |
| # Use Hugging Face's API-compatible request | |
| files = {"image": open(img_path, "rb"), "mask": open(mask_path, "rb")} | |
| response = requests.post(f"{LAMA_API}/run/predict", files=files) | |
| if response.status_code == 200: | |
| result = Image.open(requests.get(response.json()["data"][0]["name"], stream=True).raw) | |
| return cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR) | |
| else: | |
| raise Exception("LAMA inpainting failed") | |
| # === APP LOGIC === | |
| depth_model, depth_processor = load_depth_model() | |
| def stereo_pipeline(image_pil): | |
| image = image_pil.convert("RGB") | |
| image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
| depth = estimate_depth(image, depth_model, depth_processor) | |
| disparity = depth_to_disparity(depth) | |
| right_img, mask = generate_right_and_mask(image_cv, disparity) | |
| right_filled = run_lama_inpainting(right_img, mask) | |
| left = image_pil | |
| right = Image.fromarray(cv2.cvtColor(right_filled, cv2.COLOR_BGR2RGB)) | |
| return left, right | |
| # === GRADIO UI === | |
| demo = gr.Interface( | |
| fn=stereo_pipeline, | |
| inputs=gr.Image(type="pil", label="Upload 2D Image"), | |
| outputs=[ | |
| gr.Image(label="Left Eye (Original)"), | |
| gr.Image(label="Right Eye (AI Generated)") | |
| ], | |
| title="2D to 3D Stereo Generator with LaMa Inpainting", | |
| description="Generates a stereo pair from a 2D image using depth estimation and LaMa AI inpainting to handle occluded pixels in the right-eye view." | |
| ) | |
| demo.launch() | |