fluxInpaint-testing

Runtime error

App Files Files Community

Himanshu-AT commited on Feb 1

Commit

3534d80

1 Parent(s): 2f6f08a

update titles in README and requirements, add opencv-python

Browse files

Files changed (4) hide show

.DS_Store +0 -0
app.py +175 -92
readme.md +1 -1
requirements.txt +1 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

app.py CHANGED Viewed

@@ -1,61 +1,148 @@
 import gradio as gr
 import numpy as np
-import os
-import spaces
-import random
-import json
-# from image_gen_aux import DepthPreprocessor
-from PIL import Image
 import torch
-from torchvision import transforms
-from diffusers import FluxFillPipeline, AutoencoderKL
 from PIL import Image
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
-pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16).to("cuda")
-# pipe.load_lora_weights("Himanshu806/testLora")
-# pipe.enable_lora()
-with open("lora_models.json", "r") as f:
-    lora_models = json.load(f)
-def download_model(model_name, model_path):
-    print(f"Downloading model: {model_name} from {model_path}")
-    try:
-        pipe.load_lora_weights(model_path)
-        print(f"Successfully downloaded model: {model_name}")
-    except Exception as e:
-        print(f"Failed to download model: {model_name}. Error: {e}")
-# Iterate through the models and download each one
-for model_name, model_path in lora_models.items():
-    download_model(model_name, model_path)
-lora_models["None"] = None
-@spaces.GPU(durations=300)
-def infer(edit_images, prompt, width, height, lora_model, seed=42, randomize_seed=False, guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
-    # pipe.enable_xformers_memory_efficient_attention()
-    if lora_model != "None":
-        pipe.load_lora_weights(lora_models[lora_model])
-        pipe.enable_lora()
     image = edit_images["background"]
-    # width, height = calculate_optimal_dimensions(image)
-    mask = edit_images["layers"][0]
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    # controlImage = processor(image)
-    image = pipe(
-        # mask_image_latent=vae.encode(controlImage),
         prompt=prompt,
-        prompt_2=prompt,
         image=image,
         mask_image=mask,
         height=height,
@@ -63,23 +150,14 @@ def infer(edit_images, prompt, width, height, lora_model, seed=42, randomize_see
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
         generator=torch.Generator(device='cuda').manual_seed(seed),
-        # lora_scale=0.75 // not supported in this version
     ).images[0]
-    output_image_jpg = image.convert("RGB")
     output_image_jpg.save("output.jpg", "JPEG")
     return output_image_jpg, seed
-    # return image, seed
-examples = [
-    "photography of a young woman,  accent lighting,  (front view:1.4),  "
-    # "a tiny astronaut hatching from an egg on the moon",
-    # "a cat holding a sign that says hello world",
-    # "an anime illustration of a wiener schnitzel",
-]
-css="""
 #col-container {
     margin: 0 auto;
     max-width: 1000px;
@@ -87,41 +165,51 @@ css="""
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""# FLUX.1 [dev]
-        """)
         with gr.Row():
             with gr.Column():
                 edit_image = gr.ImageEditor(
-                    label='Upload and draw mask for inpainting',
                     type='pil',
                     sources=["upload", "webcam"],
                     image_mode='RGB',
-                    layers=False,
                     brush=gr.Brush(colors=["#FFFFFF"]),
-                    # height=600
                 )
                 prompt = gr.Text(
-                    label="Prompt",
                     show_label=False,
                     max_lines=2,
-                    placeholder="Enter your prompt",
                     container=False,
                 )
-                lora_model = gr.Dropdown(
-                    label="Select LoRA Model",
-                    choices=list(lora_models.keys()),
-                    value="None",
                 )
                 run_button = gr.Button("Run")
             result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
             seed = gr.Slider(
                 label="Seed",
                 minimum=0,
@@ -129,50 +217,45 @@ with gr.Blocks(css=css) as demo:
                 step=1,
                 value=0,
             )
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                 guidance_scale = gr.Slider(
                     label="Guidance Scale",
                     minimum=1,
                     maximum=30,
                     step=0.5,
-                    value=50,
                 )
                 num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
                     minimum=1,
                     maximum=50,
                     step=1,
                     value=28,
                 )
-            with gr.Row():
-                width = gr.Slider(
-                    label="width",
-                    minimum=512,
-                    maximum=3072,
-                    step=1,
-                    value=1024,
-                )
-                height = gr.Slider(
-                    label="height",
-                    minimum=512,
-                    maximum=3072,
-                    step=1,
-                    value=1024,
-                )
     gr.on(
         triggers=[run_button.click, prompt.submit],
-        fn = infer,
-        inputs = [edit_image, prompt, width, height, lora_model, seed, randomize_seed, guidance_scale, num_inference_steps],
-        outputs = [result, seed]
     )
 # demo.launch()

 import gradio as gr
 import numpy as np
 import torch
+import random
 from PIL import Image
+import cv2
+import spaces
+# ------------------ Inpainting Pipeline Setup ------------------ #
+from diffusers import FluxFillPipeline
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
+pipe = FluxFillPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16
+)
+pipe.load_lora_weights("alvdansen/flux-koda")
+pipe.enable_lora()
+def calculate_optimal_dimensions(image: Image.Image):
+    # Extract the original dimensions
+    original_width, original_height = image.size
+    # Set constants
+    MIN_ASPECT_RATIO = 9 / 16
+    MAX_ASPECT_RATIO = 16 / 9
+    FIXED_DIMENSION = 1024
+    # Calculate the aspect ratio of the original image
+    original_aspect_ratio = original_width / original_height
+    # Determine which dimension to fix
+    if original_aspect_ratio > 1:  # Wider than tall
+        width = FIXED_DIMENSION
+        height = round(FIXED_DIMENSION / original_aspect_ratio)
+    else:  # Taller than wide
+        height = FIXED_DIMENSION
+        width = round(FIXED_DIMENSION * original_aspect_ratio)
+    # Ensure dimensions are multiples of 8
+    width = (width // 8) * 8
+    height = (height // 8) * 8
+    # Enforce aspect ratio limits
+    calculated_aspect_ratio = width / height
+    if calculated_aspect_ratio > MAX_ASPECT_RATIO:
+        width = (height * MAX_ASPECT_RATIO // 8) * 8
+    elif calculated_aspect_ratio < MIN_ASPECT_RATIO:
+        height = (width / MIN_ASPECT_RATIO // 8) * 8
+    # Ensure minimum dimensions are met
+    width = max(width, 576) if width == FIXED_DIMENSION else width
+    height = max(height, 576) if height == FIXED_DIMENSION else height
+    return width, height
+# ------------------ SAM (Transformers) Imports and Initialization ------------------ #
+from transformers import SamModel, SamProcessor
+# Load the model and processor from Hugging Face.
+sam_model = SamModel.from_pretrained("facebook/sam-vit-base")
+sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
+@spaces.GPU(durations=300)
+def generate_mask_with_sam(image: Image.Image, mask_prompt: str):
+    """
+    Generate a segmentation mask using SAM (via Hugging Face Transformers).
+    The mask_prompt is expected to be a comma-separated string of two integers,
+    e.g. "450,600" representing an (x,y) coordinate in the image.
+    The function converts the coordinate into the proper input format for SAM and returns a binary mask.
+    """
+    if mask_prompt.strip() == "":
+        raise ValueError("No mask prompt provided.")
+    try:
+        # Parse the mask_prompt into a coordinate
+        coords = [int(x.strip()) for x in mask_prompt.split(",")]
+        if len(coords) != 2:
+            raise ValueError("Expected two comma-separated integers (x,y).")
+    except Exception as e:
+        raise ValueError("Invalid mask prompt. Please provide coordinates as 'x,y'. Error: " + str(e))
+    # The SAM processor expects a list of input points.
+    # Format the point as a list of lists; here we assume one point per image.
+    # (The Transformers SAM expects the points in [x, y] order.)
+    input_points = [coords]  # e.g. [[450,600]]
+    # Optionally, you can supply input_labels (1 for foreground, 0 for background)
+    input_labels = [1]
+    # Prepare the inputs for the SAM processor.
+    inputs = sam_processor(images=image,
+                           input_points=[input_points],
+                           input_labels=[input_labels],
+                           return_tensors="pt")
+    # Move tensors to the same device as the model.
+    device = next(sam_model.parameters()).device
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    # Forward pass through SAM.
+    with torch.no_grad():
+        outputs = sam_model(**inputs)
+    # The output contains predicted masks; we take the first mask from the first prompt.
+    # (Assuming outputs.pred_masks is of shape (batch_size, num_masks, H, W))
+    pred_masks = outputs.pred_masks  # Tensor of shape (1, num_masks, H, W)
+    mask = pred_masks[0][0].detach().cpu().numpy()
+    # Convert the mask to binary (0 or 255) using a threshold.
+    mask_bin = (mask > 0.5).astype(np.uint8) * 255
+    mask_pil = Image.fromarray(mask_bin)
+    return mask_pil
+# ------------------ Inference Function ------------------ #
+@spaces.GPU(durations=300)
+def infer(edit_images, prompt, mask_prompt,
+          seed=42, randomize_seed=False, width=1024, height=1024,
+          guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
+    # Get the base image from the "background" layer.
     image = edit_images["background"]
+    width, height = calculate_optimal_dimensions(image)
+    # If a mask prompt is provided, use the SAM-based mask generator.
+    if mask_prompt and mask_prompt.strip() != "":
+        try:
+            mask = generate_mask_with_sam(image, mask_prompt)
+        except Exception as e:
+            raise ValueError("Error generating mask from prompt: " + str(e))
+    else:
+        # Fall back to using a manually drawn mask (from the first layer).
+        try:
+            mask = edit_images["layers"][0]
+        except (TypeError, IndexError):
+            raise ValueError("No mask provided. Please either draw a mask or supply a mask prompt.")
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    # Run the inpainting diffusion pipeline with the provided prompt and mask.
+    image_out = pipe(
         prompt=prompt,
         image=image,
         mask_image=mask,
         height=height,
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
         generator=torch.Generator(device='cuda').manual_seed(seed),
     ).images[0]
+    output_image_jpg = image_out.convert("RGB")
     output_image_jpg.save("output.jpg", "JPEG")
     return output_image_jpg, seed
+# ------------------ Gradio UI ------------------ #
+css = """
 #col-container {
     margin: 0 auto;
     max-width: 1000px;
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("# FLUX.1 [dev] with SAM (Transformers) Mask Generation")
         with gr.Row():
             with gr.Column():
+                # The image editor now allows you to optionally draw a mask.
                 edit_image = gr.ImageEditor(
+                    label='Upload Image (and optionally draw a mask)',
                     type='pil',
                     sources=["upload", "webcam"],
                     image_mode='RGB',
+                    layers=False,  # We will generate a mask automatically if needed.
                     brush=gr.Brush(colors=["#FFFFFF"]),
                 )
                 prompt = gr.Text(
+                    label="Inpainting Prompt",
                     show_label=False,
                     max_lines=2,
+                    placeholder="Enter your inpainting prompt",
                     container=False,
                 )
+                mask_prompt = gr.Text(
+                    label="Mask Prompt (enter a coordinate as 'x,y')",
+                    show_label=True,
+                    placeholder="E.g. 450,600",
+                    container=True,
                 )
+                generate_mask_btn = gr.Button("Generate Mask")
+                mask_preview = gr.Image(label="Mask Preview", show_label=True)
                 run_button = gr.Button("Run")
             result = gr.Image(label="Result", show_label=False)
+        # Button to preview the generated mask.
+        def on_generate_mask(image, mask_prompt):
+            if image is None or mask_prompt.strip() == "":
+                return None
+            mask = generate_mask_with_sam(image, mask_prompt)
+            return mask
+        generate_mask_btn.click(
+            fn=on_generate_mask,
+            inputs=[edit_image, mask_prompt],
+            outputs=[mask_preview]
+        )
         with gr.Accordion("Advanced Settings", open=False):
             seed = gr.Slider(
                 label="Seed",
                 minimum=0,
                 step=1,
                 value=0,
             )
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
+                width = gr.Slider(
+                    label="Width",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=32,
+                    value=1024,
+                    visible=False
+                )
+                height = gr.Slider(
+                    label="Height",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=32,
+                    value=1024,
+                    visible=False
+                )
+            with gr.Row():
                 guidance_scale = gr.Slider(
                     label="Guidance Scale",
                     minimum=1,
                     maximum=30,
                     step=0.5,
+                    value=3.5,
                 )
                 num_inference_steps = gr.Slider(
+                    label="Number of Inference Steps",
                     minimum=1,
                     maximum=50,
                     step=1,
                     value=28,
                 )
     gr.on(
         triggers=[run_button.click, prompt.submit],
+        fn=infer,
+        inputs=[edit_image, prompt, mask_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
+        outputs=[result, seed]
     )
 # demo.launch()

readme.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Inpainting
 emoji: 🏆
 colorFrom: blue
 colorTo: purple

 ---
+title: Inpainting test
 emoji: 🏆
 colorFrom: blue
 colorTo: purple

requirements.txt CHANGED Viewed

@@ -8,3 +8,4 @@ peft
 xformers
 torchvision
 torch

 xformers
 torchvision
 torch
+opencv-python