Spaces:

Sutirtha
/

dadai

Runtime error

App Files Files Community

Sutirtha commited on Apr 23

Commit

f18b0fd

verified ·

1 Parent(s): 8ded0c3

Updated Bria

Browse files

Files changed (1) hide show

app.py +154 -113

app.py CHANGED Viewed

@@ -1,116 +1,157 @@
-import os
-import io
-import base64
-import requests
-import numpy as np
 import gradio as gr
 from PIL import Image
-import onnxruntime
-import cv2
-# ——————————————————————————————————————————————————————————————
-#  Configuration
-# ——————————————————————————————————————————————————————————————
-HF_TOKEN = os.environ["HF_TOKEN_API_DEMO"]
-AUTH_HEADERS = {"api_token": HF_TOKEN}
-BRIA_API_URL = "http://engine.prod.bria-api.com/v1/gen_fill"
-# List your local ONNX upscaler model names (without .ort extension)
-UPSCALE_MODELS = ["modelx2", "modelx4"]
-# ——————————————————————————————————————————————————————————————
-#  Helper Functions
-# ——————————————————————————————————————————————————————————————
-def pil_to_base64(img: Image.Image) -> str:
-    """Convert a PIL image to a base64 string prefixed with a comma."""
-    buf = io.BytesIO()
-    img.save(buf, format="PNG")
-    b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
-    return f",{b64}"
-def download_pil_image(url: str) -> Image.Image:
-    r = requests.get(url)
-    return Image.open(io.BytesIO(r.content)).convert("RGB")
-def gen_fill(image: Image.Image, mask: Image.Image, prompt: str) -> Image.Image:
-    """Call the BRIA Generative Fill API."""
-    payload = {
-        "file": pil_to_base64(image),
-        "mask_file": pil_to_base64(mask),
-        "prompt": prompt,
-        "steps_num": 12,
-        "sync": True,
-    }
-    res = requests.post(BRIA_API_URL, json=payload, headers=AUTH_HEADERS).json()
-    return download_pil_image(res["urls"][0])
-def to_onnx_input(img: np.ndarray) -> np.ndarray:
-    img = img[:, :, :3]  # BGR or RGB first three channels
-    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # ensure RGB
-    img = img.astype(np.float32) / 255.0
-    img = np.transpose(img, (2, 0, 1))[None, ...]
-    return img
-def from_onnx_output(arr: np.ndarray) -> np.ndarray:
-    arr = np.squeeze(arr, axis=0)
-    arr = np.clip(arr, 0, 1) * 255
-    arr = np.transpose(arr, (1, 2, 0)).astype(np.uint8)
-    return arr
-def upscale_image(img: Image.Image, model_name: str) -> Image.Image:
-    """Run ONNX upscaler on a PIL image."""
-    model_path = f"models/{model_name}.ort"
-    sess = onnxruntime.InferenceSession(model_path, sess_options=onnxruntime.SessionOptions())
-    inp = to_onnx_input(np.array(img)[:, :, ::-1])  # PIL is RGB, convert to BGR
-    out = sess.run(None, {sess.get_inputs()[0].name: inp})[0]
-    arr = from_onnx_output(out)
-    # The ONNX model outputs BGR; convert back to RGB
-    rgb = cv2.cvtColor(arr, cv2.COLOR_BGR2RGB)
-    return Image.fromarray(rgb)
-# ——————————————————————————————————————————————————————————————
-#  Gradio Interface
-# ——————————————————————————————————————————————————————————————
-with gr.Blocks(css="""
-    .gradio-container {max-width: 900px;}
-    #run_button {width:100%; height:48px;}
-    #image_editor img {object-fit: contain; width:100%; height:auto;}
-    #output_col img {object-fit: contain; width:100%; height:auto;}
-""") as demo:
-    gr.Markdown("## BRIA Generative Fill + ONNX Upscaler")
-    gr.Markdown("1. Upload your image and draw a mask.  2. Enter a prompt.  3. Choose an upscaler and click **Run**.")
     with gr.Row():
-        with gr.Column(scale=1):
-            editor = gr.ImageEditor(
-                label="Input Image & Mask",
-                tool="editor", brush=gr.Brush(color_mode="binary"),
-                height=400
-            )
-            prompt = gr.Textbox(label="Prompt", placeholder="e.g. “Add a sunset sky”")
-            upscaler = gr.Radio(
-                choices=UPSCALE_MODELS,
-                label="Select Upscaler Model",
-                value=UPSCALE_MODELS[0]
-            )
-            btn = gr.Button("Run", elem_id="run_button")
-        with gr.Column(scale=1, elem_id="output_col"):
-            output = gr.Image(label="High-Def Output", height=400)
-    def run_pipeline(ed_img, txt, model_name):
-        # ed_img is a RGBA numpy array: [:,:,0:3] = image, [:,:,3] = mask
-        pil_in = Image.fromarray(ed_img[:, :, :3], "RGB")
-        pil_mask = Image.fromarray(ed_img[:, :, 3], "L")
-        filled = gen_fill(pil_in, pil_mask, txt)
-        up_img = upscale_image(filled, model_name)
-        return up_img
-    btn.click(fn=run_pipeline, inputs=[editor, prompt, upscaler], outputs=[output])
-demo.launch()

 import gradio as gr
+import torch
+import torch.nn.functional as F
+import numpy as np
 from PIL import Image
+from io import BytesIO
+import requests
+from torchvision import transforms
+from diffusers import AutoencoderKL, LCMScheduler
+from pipeline_controlnet_sd_xl import StableDiffusionXLControlNetPipeline
+from controlnet import ControlNetModel
+# -- Utility Functions --
+def resize_image_to_retain_ratio(image: Image.Image) -> Image.Image:
+    pixel_number = 1024 * 1024
+    granularity = 8
+    ratio = image.width / image.height
+    width = int((pixel_number * ratio) ** 0.5)
+    width -= width % granularity
+    height = int(pixel_number / width)
+    height -= height % granularity
+    return image.resize((width, height))
+def prepare_mask(image: Image.Image, mask: Image.Image) -> Image.Image:
+    return mask.convert("L").resize(image.size)
+def download_image(url: str) -> Image.Image:
+    resp = requests.get(url)
+    return Image.open(BytesIO(resp.content)).convert("RGB")
+# -- Model & Pipeline Initialization --
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load ControlNet model
+controlnet = (
+    ControlNetModel.from_pretrained(
+        "briaai/BRIA-2.3-ControlNet-Generative-Fill", torch_dtype=torch.float16
+    )
+    .to(device)
+)
+# Load VAE\ nvae = (
+    AutoencoderKL.from_pretrained(
+        "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
+    )
+    .to(device)
+)
+# Load Stable Diffusion XL with ControlNet
+pipe = (
+    StableDiffusionXLControlNetPipeline.from_pretrained(
+        "briaai/BRIA-2.3",
+        controlnet=controlnet,
+        torch_dtype=torch.float16,
+        vae=vae,
+    )
+    .to(device)
+)
+pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
+pipe.load_lora_weights("briaai/BRIA-2.3-FAST-LORA")
+pipe.fuse_lora()
+pipe.enable_xformers_memory_efficient_attention()
+# Tensor transform
+to_tensor = transforms.ToTensor()
+# -- Inference Function --
+def generative_fill(
+    image: Image.Image,
+    mask: Image.Image,
+    prompt: str,
+    negative_prompt: str = "blurry",
+    num_inference_steps: int = 12,
+    controlnet_conditioning_scale: float = 1.0,
+    guidance_scale: float = 1.2,
+    seed: int = 123456,
+) -> Image.Image:
+    # Preprocess image & mask
+    image = image.convert("RGB")
+    image = resize_image_to_retain_ratio(image)
+    width, height = image.size
+    mask = prepare_mask(image, mask)
+    # Create masked version
+    img_arr = np.array(image).astype(np.float32) / 255.0
+    mask_arr = (np.array(mask).astype(np.float32) / 255.0)
+    masked_arr = img_arr.copy()
+    masked_arr[mask_arr > 0.5] = 0.5
+    masked_pil = Image.fromarray((masked_arr * 255).astype(np.uint8))
+    # Encode latents
+    input_tensor = to_tensor(masked_pil)
+    input_tensor = (input_tensor - 0.5) / 0.5
+    input_tensor = input_tensor.unsqueeze(0).to(device)
+    latents = pipe.vae.encode(input_tensor[:, :3]).latent_dist.sample() * pipe.vae.config.scaling_factor
+    # Prepare mask tensor
+    mask_tensor = torch.tensor(mask_arr[None, None], dtype=torch.float32, device=device)
+    mask_resized = F.interpolate(mask_tensor, size=(latents.shape[2], latents.shape[3]), mode="nearest")
+    # Combine latents & mask for ControlNet
+    control_latents = latents
+    control_image = torch.cat([control_latents, mask_resized], dim=1)
+    # Generate
+    generator = torch.Generator(device=device).manual_seed(seed)
+    output = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        num_inference_steps=num_inference_steps,
+        height=height,
+        width=width,
+        image=control_image,
+        init_image=image,
+        mask_image=mask_tensor,
+        controlnet_conditioning_scale=controlnet_conditioning_scale,
+        guidance_scale=guidance_scale,
+        generator=generator,
+    ).images[0]
+    return output
+# -- Gradio Interface --
+with gr.Blocks() as demo:
+    gr.Markdown("## BRIA 2.3 ControlNet Generative Fill")
     with gr.Row():
+        inp_image = gr.Image(type="pil", label="Input Image")
+        inp_mask = gr.Image(type="pil", label="Mask (white = fill area)")
+    prompt_input = gr.Textbox(label="Prompt", placeholder="Describe what to fill...")
+    neg_prompt_input = gr.Textbox(label="Negative Prompt", value="blurry")
+    steps = gr.Slider(1, 50, value=12, step=1, label="Inference Steps")
+    c_scale = gr.Slider(0.0, 2.0, value=1.0, step=0.1, label="ControlNet Scale")
+    g_scale = gr.Slider(0.0, 20.0, value=1.2, step=0.1, label="Guidance Scale")
+    seed_input = gr.Number(value=123456, label="Seed")
+    run_btn = gr.Button("Generate")
+    output_image = gr.Image(type="pil", label="Generated Image")
+    run_btn.click(
+        generative_fill,
+        inputs=[
+            inp_image,
+            inp_mask,
+            prompt_input,
+            neg_prompt_input,
+            steps,
+            c_scale,
+            g_scale,
+            seed_input,
+        ],
+        outputs=output_image,
+    )
+    gr.Markdown("Model by BRIA AI | [Hugging Face](https://huggingface.co/briaai/BRIA-2.3-ControlNet-Generative-Fill)")
+demo.launch(server_name="0.0.0.0", share=True)