Spaces:

linoyts
/

FramePack-F1

Running on Zero

App Files Files Community

add inpaint option

by linoyts HF Staff - opened about 16 hours ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+29

-4

Files changed (1) hide show

app.py +29 -4

app.py CHANGED Viewed

@@ -361,8 +361,33 @@ def process(input_image, prompt,
     if t2v:
         default_height, default_width = 640, 640
         input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
-        print("No input image provided. Using a blank white image.")
     yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
     stream = AsyncStream()
@@ -404,12 +429,12 @@ with block:
     gr.Markdown('# FramePack-F1')
     gr.Markdown(f"""### Video diffusion, but feels like image diffusion
 *FramePack F1 - a FramePack model that only predicts future frames from history frames*
 adapted from the officical code repo [FramePack](https://github.com/lllyasviel/FramePack) by [lllyasviel](lllyasviel/FramePack_F1_I2V_HY_20250503) and [FramePack Studio](https://github.com/colinurbs/FramePack-Studio) 🙌🏻
     """)
     with gr.Row():
         with gr.Column():
-            input_image = gr.Image(type="numpy", label="Image", height=320)
             prompt = gr.Textbox(label="Prompt", value='')
             t2v = gr.Checkbox(label="do text-to-video", value=False)
             example_quick_prompts = gr.Dataset(samples=quick_prompts, label='Quick List', samples_per_page=1000, components=[prompt])

     if t2v:
         default_height, default_width = 640, 640
         input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
+        print("No input image provided. Using a blank white image.")
+    else:
+        composite_rgba_uint8 = input_image["composite"]
+        # rgb_uint8 will be (H, W, 3), dtype uint8
+        rgb_uint8 = composite_rgba_uint8[:, :, :3]
+        # mask_uint8 will be (H, W), dtype uint8
+        mask_uint8 = composite_rgba_uint8[:, :, 3]
+        # Create background
+        h, w = rgb_uint8.shape[:2]
+        # White background, (H, W, 3), dtype uint8
+        background_uint8 = np.full((h, w, 3), 255, dtype=np.uint8)
+        # Normalize mask to range [0.0, 1.0].
+        alpha_normalized_float32 = mask_uint8.astype(np.float32) / 255.0
+        # Expand alpha to 3 channels to match RGB images for broadcasting.
+        # alpha_mask_float32 will have shape (H, W, 3)
+        alpha_mask_float32 = np.stack([alpha_normalized_float32] * 3, axis=2)
+        # alpha blending
+        blended_image_float32 = rgb_uint8.astype(np.float32) * alpha_mask_float32 + \
+                                background_uint8.astype(np.float32) * (1.0 - alpha_mask_float32)
+        input_image = np.clip(blended_image_float32, 0, 255).astype(np.uint8)
     yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
     stream = AsyncStream()
     gr.Markdown('# FramePack-F1')
     gr.Markdown(f"""### Video diffusion, but feels like image diffusion
 *FramePack F1 - a FramePack model that only predicts future frames from history frames*
+### *beta* FramePack Fill 🖋️- draw a mask over the input image to inpaint the video output
 adapted from the officical code repo [FramePack](https://github.com/lllyasviel/FramePack) by [lllyasviel](lllyasviel/FramePack_F1_I2V_HY_20250503) and [FramePack Studio](https://github.com/colinurbs/FramePack-Studio) 🙌🏻
     """)
     with gr.Row():
         with gr.Column():
+            input_image = gr.ImageEditor(type="numpy", label="Image", height=320)
             prompt = gr.Textbox(label="Prompt", value='')
             t2v = gr.Checkbox(label="do text-to-video", value=False)
             example_quick_prompts = gr.Dataset(samples=quick_prompts, label='Quick List', samples_per_page=1000, components=[prompt])