import gradio as gr
from PIL import Image
import numpy as np
import mlx.core as mx
from stable_diffusion import StableDiffusion

def generate_images(prompt, n_images=4, steps=50, cfg=7.5, negative_prompt="", n_rows=1):
    sd = StableDiffusion()

    # Generate the latent vectors using diffusion
    latents = sd.generate_latents(
        prompt,
        n_images=n_images,
        cfg_weight=cfg,
        num_steps=steps,
        negative_text=negative_prompt,
    )
    for x_t in latents:
        mx.simplify(x_t)
        mx.simplify(x_t)
        mx.eval(x_t)

    # Decode them into images
    decoded = []
    for i in range(0, n_images):
        decoded_img = sd.decode(x_t[i:i+1])
        mx.eval(decoded_img)
        decoded.append(decoded_img)

    # Arrange them on a grid
    x = mx.concatenate(decoded, axis=0)
    x = mx.pad(x, [(0, 0), (8, 8), (8, 8), (0, 0)])
    B, H, W, C = x.shape
    x = x.reshape(n_rows, B // n_rows, H, W, C).transpose(0, 2, 1, 3, 4)
    x = x.reshape(n_rows * H, B // n_rows * W, C)
    x = (x * 255).astype(mx.uint8)

    # Convert to PIL Image
    return Image.fromarray(x.__array__())

iface = gr.Interface(
    fn=generate_images,
    inputs=[
        gr.Textbox(label="Prompt"),
        gr.Slider(minimum=1, maximum=10, step=1, value=4, label="Number of Images"),
        gr.Slider(minimum=20, maximum=100, step=1, value=50, label="Steps"),
        gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=7.5, label="CFG Weight"),
        gr.Textbox(default="", label="Negative Prompt"),
        gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Number of Rows")
    ],
    outputs="image",
    title="Stable Diffusion Image Generator",
    description="Generate images from a textual prompt using Stable Diffusion"
)

iface.launch()