image_utilities_mcp

Sleeping

App Files Files Community

JuanjoSG5 commited on Jun 10

Commit

7af9a4a

1 Parent(s): 7325704

feat: imported the project to the org

Browse files

Files changed (15) hide show

.gitignore +3 -0
README.md +10 -7
app.py +393 -0
gradio_interface/app.py +204 -0
requirements.txt +8 -0
src/utils/add_text.py +62 -0
src/utils/apply_filter.py +129 -0
src/utils/change_format.py +45 -0
src/utils/compress.py +59 -0
src/utils/describe.py +112 -0
src/utils/generate_image.py +79 -0
src/utils/remove_background.py +100 -0
src/utils/resize_image.py +56 -0
src/utils/visualize_image.py +18 -0
src/utils/watermark.py +94 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__/
+.env
+test_agent.py

README.md CHANGED Viewed

@@ -1,13 +1,16 @@
 ---
-title: Mage Utilities Mcp
-emoji: 🏆
-colorFrom: red
-colorTo: purple
 sdk: gradio
-sdk_version: 5.33.1
 app_file: app.py
 pinned: false
-short_description: Some tools for image processing and generation.
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: ImageUtilitiesMCP
+emoji: 🖼️
+colorFrom: pink
+colorTo: red
 sdk: gradio
+sdk_version: 5.32.0
 app_file: app.py
 pinned: false
+short_description: Useful tools for image editing.
+tags: [mcp-server-track]
 ---
+This project has been created by: [RafaelJaime](https://huggingface.co/RafaelJaime), [ItzRoBeerT](https://huggingface.co/ItzRoBeerT) and [JuanjoJ55](https://huggingface.co/JuanjoJ55).
+The original space with the all the commits can be found here: [Original Space](https://huggingface.co/spaces/RafaelJaime/image_utilities_mcp)

app.py ADDED Viewed

	@@ -0,0 +1,393 @@

+import gradio as gr
+from src.utils.change_format import change_format
+from src.utils.remove_background import remove_background
+from src.utils.generate_image import generate_image
+from src.utils.add_text import add_text_to_image_base64
+from src.utils.compress import compress_image_memory
+from src.utils.generate_image import generate_image
+from src.utils.apply_filter import apply_filter_direct
+from src.utils.watermark import add_watermark, remove_watermark
+from src.utils.describe import describe_image
+import base64
+from PIL import Image
+import io
+import requests
+from io import BytesIO
+from typing import Union
+def change_format(image: Union[str, BytesIO], target_format: str) -> str:
+    """
+    Change the format of an image from a URL to the specified target format.
+    """
+    if not isinstance(image, BytesIO):
+        response = requests.get(image, timeout=30)
+        response.raise_for_status()
+        img = Image.open(BytesIO(response.content))
+    else:
+        img = Image.open(image)
+    output = BytesIO()
+    img.save(output, format=target_format)
+    output.seek(0)
+    encoded_image = base64.b64encode(output.getvalue()).decode('utf-8')
+    return encoded_image
+def image_to_base64(image):
+    if image is None:
+        return None
+    buffer = io.BytesIO()
+    image.save(buffer, format="PNG")
+    return base64.b64encode(buffer.getvalue()).decode()
+def base64_to_image(base64_str):
+    if not base64_str:
+        return None
+    # Remove data URI prefix if present (e.g., "data:image/png;base64,")
+    if isinstance(base64_str, str) and "base64," in base64_str:
+        base64_str = base64_str.split("base64,", 1)[1]
+    try:
+        # Strip any whitespace that might be in the base64 string
+        if isinstance(base64_str, str):
+            base64_str = base64_str.strip()
+        # Decode the base64 data
+        image_data = base64.b64decode(base64_str)
+        # Check if we have data
+        if not image_data:
+            print("Decoded base64 data is empty")
+            return None
+        # Attempt to open the image
+        image = Image.open(io.BytesIO(image_data))
+        # Convert the image to ensure it's valid
+        return image.copy()
+    except base64.binascii.Error as e:
+        print(f"Base64 decoding error: {str(e)}")
+        if isinstance(base64_str, str):
+            preview = base64_str[:30] + "..." if len(base64_str) > 30 else base64_str
+            print(f"Base64 preview: {preview}")
+        return None
+    except Exception as e:
+        print(f"Error converting base64 to image: {str(e)}")
+        # Print preview of the base64 string for debugging
+        if isinstance(base64_str, str):
+            preview = base64_str[:30] + "..." if len(base64_str) > 30 else base64_str
+            print(f"Base64 preview: {preview}")
+        # Additional debug information
+        if 'image_data' in locals() and image_data:
+            try:
+                magic_bytes = image_data[:12].hex()
+                print(f"First 12 bytes: {magic_bytes}")
+            except:
+                pass
+        return None
+def url_to_base64(url):
+    response = requests.get(url)
+    return base64.b64encode(response.content).decode()
+def gradio_remove_background(image):
+    if image is None:
+        return None
+    base64_img = image_to_base64(image)
+    result = remove_background(f"data:image/png;base64,{base64_img}")
+    # Check if the result is directly a base64 string or has an image_data key
+    if isinstance(result, str):
+        return base64_to_image(result)
+    elif isinstance(result, dict) and "image_data" in result:
+        # If image_data contains a data URI prefix
+        if isinstance(result["image_data"], str) and result["image_data"].startswith("data:"):
+            # The response already contains the full data URI
+            return base64_to_image(result["image_data"])
+        else:
+            # Try to process it as a regular base64 string
+            try:
+                return base64_to_image(result["image_data"])
+            except Exception as e:
+                print(f"Error processing image data: {e}")
+                return None
+    else:
+        print(f"Unexpected response format from remove_background: {type(result)}")
+        return None
+def gradio_describe_image(image):
+    if image is None:
+        return "No image provided"
+    try:
+        base64_img = image_to_base64(image)
+        return describe_image(base64_img)
+    except Exception as e:
+        print(f"Error describing image: {e}")
+        return f"Error: {str(e)}"
+def gradio_change_format(image, format_type):
+    if image is None:
+        return None
+    try:
+        base64_img = image_to_base64(image)
+        result = change_format(base64_img, format_type)
+        return base64_to_image(result)
+    except Exception as e:
+        print(f"Error changing format: {e}")
+        return image
+def gradio_generate_image(prompt, width=512, height=512):
+    result = generate_image(prompt, width, height)
+    return base64_to_image(result["b64"])
+def gradio_apply_filter(image, filter_type, intensity=1.0):
+    if image is None:
+        print("No image provided")
+        return None
+    return apply_filter_direct(image, filter_type, intensity)
+def update_text_image(image, text, centered, x, y, font_size, color):
+    if image is None:
+        return None
+    if not text or text.strip() == "":
+        return image
+    result = add_text_to_image_base64(image, text, int(x), int(y), int(font_size), color, centered)
+    return result
+def toggle_position_fields(centered):
+    return (
+        gr.Number(interactive=not centered),
+        gr.Number(interactive=not centered)
+    )
+def toggle_intensity_slider(filter_type):
+    intensity_filters = ['blur', 'brightness', 'contrast', 'saturation']
+    return gr.Slider(interactive=filter_type in intensity_filters)
+def gradio_add_watermark(image, watermark_text, opacity=0.5):
+    if image is None:
+        return None
+    try:
+        base64_img = image_to_base64(image)
+        result = add_watermark(base64_img, watermark_text, opacity)
+        return base64_to_image(result)
+    except Exception as e:
+        print(f"Error adding watermark: {e}")
+        return image
+def gradio_remove_watermark(image):
+    if image is None:
+        return None
+    try:
+        base64_img = image_to_base64(image)
+        result = remove_watermark(base64_img)
+        return base64_to_image(result)
+    except Exception as e:
+        print(f"Error removing watermark: {e}")
+        return image
+def gradio_compress_image(image, quality=80):
+    """
+    Compress image for Gradio interface
+    """
+    if image is None:
+        return None
+    try:
+        compressed_image = compress_image_memory(image, quality, "JPEG")
+        return compressed_image
+    except Exception as e:
+        print(f"Error compressing image: {e}")
+        return image
+def create_gradio_interface():
+    with gr.Blocks(title="ImageUtilitiesMCP", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🖼️ ImageUtilitiesMCP")
+        gr.Markdown("Complete processing image tools")
+        with gr.Tabs():
+            with gr.Tab("🎨 Generate Image"):
+                with gr.Row():
+                    prompt_input = gr.Textbox(label="Prompt", placeholder="Describe the image you want to generate")
+                    with gr.Column():
+                        width_input = gr.Slider(256, 1024, 512, label="Width")
+                        height_input = gr.Slider(256, 1024, 512, label="Height")
+                generate_btn = gr.Button("Generate", variant="primary")
+                generated_output = gr.Image(label="Generated Image")
+                generate_btn.click(
+                    gradio_generate_image,
+                    [prompt_input, width_input, height_input],
+                    generated_output
+                )
+            with gr.Tab("🔍 Describe Image"):
+                with gr.Row():
+                    describe_input = gr.Image(label="Upload Image", type="pil")
+                    description_output = gr.Textbox(label="Description", lines=4)
+                describe_input.change(gradio_describe_image, describe_input, description_output)
+            with gr.Tab("✂️ Remove Background"):
+                with gr.Row():
+                    bg_input = gr.Image(label="Upload Image", type="pil")
+                    bg_output = gr.Image(label="Background Removed")
+                bg_input.change(gradio_remove_background, bg_input, bg_output)
+            with gr.Tab("🎭 Apply Filters"):
+                with gr.Row():
+                    filter_input = gr.Image(label="Upload Image", type="pil")
+                    with gr.Column():
+                        filter_type = gr.Dropdown(
+                            ["blur", "sharpen", "vintage", "black_white", "sepia", "emboss", "edge", "smooth", "brightness", "contrast", "saturation", "grayscale"],
+                            label="Filter Type",
+                            value="blur"
+                        )
+                        intensity_slider = gr.Slider(
+                            minimum=0.1,
+                            maximum=300.0,
+                            value=1.0,
+                            step=0.1,
+                            label="Intensity",
+                            interactive=True
+                        )
+                        filter_output = gr.Image(label="Filtered Image")
+                filter_type.change(
+                    toggle_intensity_slider,
+                    filter_type,
+                    intensity_slider
+                )
+                filter_inputs = [filter_input, filter_type, intensity_slider]
+                for inp in filter_inputs:
+                    inp.change(gradio_apply_filter, filter_inputs, filter_output)
+            with gr.Tab("📝 Add Text"):
+                with gr.Row():
+                    text_input = gr.Image(label="Upload Image", type="pil")
+                    with gr.Column():
+                        text_content = gr.Textbox(
+                            label="Text",
+                            placeholder="Enter text to add",
+                            value=""
+                        )
+                        text_centered = gr.Checkbox(label="Center Text", value=False)
+                        with gr.Row():
+                            text_x = gr.Number(
+                                label="X Position",
+                                value=50,
+                                interactive=True,
+                                minimum=0
+                            )
+                            text_y = gr.Number(
+                                label="Y Position",
+                                value=50,
+                                interactive=True,
+                                minimum=0
+                            )
+                        with gr.Row():
+                            font_size = gr.Slider(
+                                minimum=10,
+                                maximum=100,
+                                value=20,
+                                label="Font Size"
+                            )
+                            text_color = gr.ColorPicker(
+                                label="Color",
+                                value="#FFFFFF"
+                            )
+                        add_text_btn = gr.Button("Add Text", variant="primary")
+                        text_output = gr.Image(label="Image with Text")
+                text_centered.change(
+                    toggle_position_fields,
+                    text_centered,
+                    [text_x, text_y]
+                )
+                inputs = [text_input, text_content, text_centered, text_x, text_y, font_size, text_color]
+                add_text_btn.click(
+                    update_text_image,
+                    inputs,
+                    text_output
+                )
+                for inp in inputs:
+                    inp.change(update_text_image, inputs, text_output)
+            with gr.Tab("💧 Watermark"):
+                with gr.Tabs():
+                    with gr.Tab("Add Watermark"):
+                        with gr.Row():
+                            watermark_input = gr.Image(label="Upload Image", type="pil")
+                            with gr.Column():
+                                watermark_text = gr.Textbox(label="Watermark Text")
+                                watermark_opacity = gr.Slider(0.1, 1.0, 0.5, label="Opacity")
+                                watermark_output = gr.Image(label="Watermarked Image")
+                        inputs = [watermark_input, watermark_text, watermark_opacity]
+                        for inp in inputs:
+                            inp.change(gradio_add_watermark, inputs, watermark_output)
+                    with gr.Tab("Remove Watermark"):
+                        with gr.Row():
+                            unwatermark_input = gr.Image(label="Upload Image", type="pil")
+                            unwatermark_output = gr.Image(label="Watermark Removed")
+                        unwatermark_input.change(gradio_remove_watermark, unwatermark_input, unwatermark_output)
+            with gr.Tab("🗜️ Compress"):
+                with gr.Row():
+                    compress_input = gr.Image(label="Upload Image", type="pil")
+                    with gr.Column():
+                        quality_slider = gr.Slider(0, 100, 80, label="Quality %")
+                        compress_output = gr.Image(label="Compressed Image")
+                compress_input.change(gradio_compress_image, [compress_input, quality_slider], compress_output)
+                quality_slider.change(gradio_compress_image, [compress_input, quality_slider], compress_output)
+            with gr.Tab("🔄 Change Format"):
+                with gr.Row():
+                    format_input = gr.Image(label="Upload Image", type="pil")
+                    with gr.Column():
+                        format_type = gr.Dropdown(
+                            ["PNG", "JPEG", "WEBP", "BMP"],
+                            label="Output Format",
+                            value="PNG"
+                        )
+                        format_output = gr.Image(label="Converted Image")
+                format_input.change(gradio_change_format, [format_input, format_type], format_output)
+                format_type.change(gradio_change_format, [format_input, format_type], format_output)
+        gr.Markdown("---")
+        gr.Markdown("💡 **Status**: Active | Procesamiento de imágenes en tiempo real")
+    return demo
+if __name__ == "__main__":
+    demo = create_gradio_interface()
+    demo.launch(
+        mcp_server=True,
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )

gradio_interface/app.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import os
+import gradio as gr
+from os import getenv
+import base64
+from io import BytesIO
+from dotenv import load_dotenv
+import requests
+import socket
+import logging
+import json
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import HumanMessage, AIMessage
+from langchain_core.callbacks import StreamingStdOutCallbackHandler
+# Load environment
+dotenv_path = os.path.join(os.path.dirname(__file__), '.env')
+load_dotenv(dotenv_path=dotenv_path)
+# Connectivity test
+def test_connectivity(url="https://openrouter.helicone.ai/api/v1"):
+    try:
+        return requests.get(url, timeout=5).status_code == 200
+    except (requests.RequestException, socket.error):
+        return False
+# Helper to make direct API calls to OpenRouter when LangChain fails
+def direct_api_call(messages, api_key, base_url):
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}",
+        "HTTP-Referer": "https://your-app-domain.com",  # Add your domain
+        "X-Title": "Image Analysis App"
+    }
+    if getenv("HELICONE_API_KEY"):
+        headers["Helicone-Auth"] = f"Bearer {getenv('HELICONE_API_KEY')}"
+    payload = {
+        "model": "google/gemini-flash-1.5",
+        "messages": messages,
+        "stream": False,
+    }
+    try:
+        response = requests.post(
+            f"{base_url}/chat/completions",
+            headers=headers,
+            json=payload,
+            timeout=30
+        )
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"]
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Initialize LLM with streaming and retry logic
+def init_llm():
+    if not test_connectivity():
+        raise RuntimeError("No hay conexión a OpenRouter. Verifica red y claves.")
+    return ChatOpenAI(
+        openai_api_key=getenv("OPENROUTER_API_KEY"),
+        openai_api_base=getenv("OPENROUTER_BASE_URL"),
+        model_name="google/gemini-flash-1.5",
+        streaming=True,
+        callbacks=[StreamingStdOutCallbackHandler()],
+        model_kwargs={
+            "extra_headers": {"Helicone-Auth": f"Bearer {getenv('HELICONE_API_KEY')}"}
+        },
+    )
+# Try to initialize LLM but handle failures gracefully
+try:
+    llm = init_llm()
+except Exception as e:
+    llm = None
+# Helpers
+def encode_image_to_base64(pil_image):
+    buffer = BytesIO()
+    pil_image.save(buffer, format="PNG")
+    return base64.b64encode(buffer.getvalue()).decode()
+# Core logic
+def generate_response(message, chat_history, image):
+    # Convert chat history to standard format
+    formatted_history = []
+    for msg in chat_history:
+        role = msg.get('role')
+        content = msg.get('content')
+        if role == 'user':
+            formatted_history.append({"role": "user", "content": content})
+        else:
+            formatted_history.append({"role": "assistant", "content": content})
+    # Prepare system message
+    system_msg = {"role": "system", "content": "You are an expert image analysis assistant. Answer succinctly."}
+    # Prepare the latest message with image if provided
+    if image:
+        base64_image = encode_image_to_base64(image)
+        # Format for direct API call (OpenRouter/OpenAI format)
+        api_messages = [system_msg] + formatted_history + [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": message},
+                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
+            ]
+        }]
+        # For LangChain format
+        content_for_langchain = [
+            {"type": "text", "text": message},
+            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
+        ]
+    else:
+        api_messages = [system_msg] + formatted_history + [{"role": "user", "content": message}]
+        content_for_langchain = message
+    # Build LangChain messages
+    lc_messages = [HumanMessage(content="You are an expert image analysis assistant. Answer succinctly.")]
+    for msg in chat_history:
+        role = msg.get('role')
+        content = msg.get('content')
+        if role == 'user':
+            lc_messages.append(HumanMessage(content=content))
+        else:
+            lc_messages.append(AIMessage(content=content))
+    lc_messages.append(HumanMessage(content=content_for_langchain))
+    try:
+        # First try with LangChain
+        if llm:
+            try:
+                try:
+                    stream_iter = llm.stream(lc_messages)
+                    partial = ""
+                    for chunk in stream_iter:
+                        if chunk is None:
+                            continue
+                        content = getattr(chunk, 'content', None)
+                        if content is None:
+                            continue
+                        partial += content
+                        yield partial
+                    # If we got this far, streaming worked
+                    return
+                except Exception as e:
+                    print(f"Streaming failed: {e}. Falling back to non-streaming mode")
+                # Try non-streaming
+                try:
+                    response = llm.invoke(lc_messages)
+                    yield response.content
+                    return
+                except Exception as e:
+                    raise e
+            except Exception as e:
+                raise e
+        response_text = direct_api_call(
+            api_messages,
+            getenv("OPENROUTER_API_KEY"),
+            getenv("OPENROUTER_BASE_URL")
+        )
+        yield response_text
+    except Exception as e:
+        import traceback
+        error_trace = traceback.format_exc()
+        yield f"⚠️ Error al generar respuesta: {str(e)}. Intenta más tarde."
+# Gradio interface
+def process_message(message, chat_history, image):
+    if chat_history is None:
+        chat_history = []
+    if image is None:
+        chat_history.append({'role':'assistant','content':'Por favor sube una imagen.'})
+        return "", chat_history
+    chat_history.append({'role':'user','content':message})
+    chat_history.append({'role':'assistant','content':'⏳ Procesando...'})
+    yield "", chat_history
+    for chunk in generate_response(message, chat_history, image):
+        chat_history[-1]['content'] = chunk
+        yield "", chat_history
+    return "", chat_history
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(type='messages', height=600)
+            msg = gr.Textbox(label="Mensaje", placeholder="Escribe tu pregunta...")
+            clear = gr.ClearButton([msg, chatbot])
+        with gr.Column(scale=1):
+            image_input = gr.Image(type="pil", label="Sube Imagen")
+            info = gr.Textbox(label="Info Imagen", interactive=False)
+    msg.submit(process_message, [msg, chatbot, image_input], [msg, chatbot])
+    image_input.change(lambda img: f"Tamaño: {img.size}" if img else "Sin imagen.", [image_input], [info])
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastmcp
+requests
+Pillow
+rembg
+onnxruntime
+openai
+opencv-python
+langchain_openai

src/utils/add_text.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from PIL import Image, ImageDraw, ImageFont
+import os
+from typing import Optional, Tuple, Dict, Any
+def parse_color(color_str):
+    if color_str.startswith('rgba('):
+        values = color_str[5:-1].split(',')
+        r = int(float(values[0]))
+        g = int(float(values[1]))
+        b = int(float(values[2]))
+        return (r, g, b)
+    elif color_str.startswith('rgb('):
+        values = color_str[4:-1].split(',')
+        r = int(float(values[0]))
+        g = int(float(values[1]))
+        b = int(float(values[2]))
+        return (r, g, b)
+    elif color_str.startswith('#'):
+        return color_str
+    else:
+        return color_str
+def add_text_to_image_base64(image, text, x, y, font_size, color, centered=False):
+    """
+    Adds centered text to an image and saves the result in the same folder.
+    If no output_name is provided, '_edited' is appended to the original filename.
+    If no color is provided, black is used by default.
+    Args:
+        image_path: Path to the original image.
+        text: Text to write on the image.
+        color: Optional RGB color of the text. Defaults to black.
+        output_name: Optional output filename (without extension).
+    Returns:
+        Dictionary with success status and info.
+    """
+    if image is None:
+        return None
+    img = image.copy()
+    draw = ImageDraw.Draw(img)
+    try:
+        font = ImageFont.truetype("arial.ttf", font_size)
+    except:
+        try:
+            font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", font_size)
+        except:
+            font = ImageFont.load_default()
+    parsed_color = parse_color(color)
+    if centered:
+        bbox = draw.textbbox((0, 0), text, font=font)
+        text_width = bbox[2] - bbox[0]
+        text_height = bbox[3] - bbox[1]
+        x = (img.width - text_width) // 2
+        y = (img.height - text_height) // 2
+    draw.text((x, y), text, fill=parsed_color, font=font)
+    return img

src/utils/apply_filter.py ADDED Viewed

	@@ -0,0 +1,129 @@

+from PIL import Image, ImageFilter, ImageEnhance
+from io import BytesIO
+import requests
+import base64
+def apply_filter_direct(image, filter_type, intensity=1.0):
+    """
+    Apply filters directly to PIL image without base64 conversion
+    """
+    if image is None:
+        return None
+    try:
+        print(f"Applying filter: {filter_type} with intensity: {intensity}")
+        img = image.copy()
+        if img.mode != 'RGB':
+            img = img.convert('RGB')
+        if filter_type == 'blur':
+            img = img.filter(ImageFilter.GaussianBlur(radius=max(0.1, intensity)))
+        elif filter_type == 'sharpen':
+            if intensity <= 1.0:
+                img = img.filter(ImageFilter.SHARPEN)
+            else:
+                img = img.filter(ImageFilter.UnsharpMask(radius=2, percent=int(intensity * 150), threshold=3))
+        elif filter_type == 'emboss':
+            img = img.filter(ImageFilter.EMBOSS)
+        elif filter_type == 'edge':
+            img = img.filter(ImageFilter.FIND_EDGES)
+        elif filter_type == 'smooth':
+            img = img.filter(ImageFilter.SMOOTH_MORE)
+        elif filter_type == 'brightness':
+            enhancer = ImageEnhance.Brightness(img)
+            img = enhancer.enhance(max(0.1, intensity))
+        elif filter_type == 'contrast':
+            enhancer = ImageEnhance.Contrast(img)
+            img = enhancer.enhance(max(0.1, intensity))
+        elif filter_type == 'saturation':
+            enhancer = ImageEnhance.Color(img)
+            img = enhancer.enhance(max(0.1, intensity))
+        elif filter_type == 'sepia':
+            img = apply_sepia_filter_direct(img)
+        elif filter_type == 'grayscale' or filter_type == 'black_white':
+            img = img.convert('L').convert('RGB')
+        elif filter_type == 'vintage':
+            img = apply_vintage_effect_direct(img)
+        else:
+            print(f"Unknown filter type: {filter_type}")
+            return image
+        print(f"Filter applied successfully")
+        return img
+    except Exception as e:
+        print(f"Error applying filter: {e}")
+        import traceback
+        traceback.print_exc()
+        return image
+def apply_sepia_filter_direct(img):
+    """Apply sepia tone effect to an image."""
+    width, height = img.size
+    pixels = img.load()
+    for y in range(height):
+        for x in range(width):
+            r, g, b = pixels[x, y]
+            tr = int(0.393 * r + 0.769 * g + 0.189 * b)
+            tg = int(0.349 * r + 0.686 * g + 0.168 * b)
+            tb = int(0.272 * r + 0.534 * g + 0.131 * b)
+            tr = min(255, tr)
+            tg = min(255, tg)
+            tb = min(255, tb)
+            pixels[x, y] = (tr, tg, tb)
+    return img
+def apply_vintage_effect_direct(img):
+    """Apply a vintage effect combining multiple filters."""
+    contrast_enhancer = ImageEnhance.Contrast(img)
+    img = contrast_enhancer.enhance(0.8)
+    brightness_enhancer = ImageEnhance.Brightness(img)
+    img = brightness_enhancer.enhance(1.1)
+    img = apply_sepia_filter_direct(img)
+    img = img.filter(ImageFilter.GaussianBlur(radius=0.5))
+    return img
+def apply_sepia_filter_direct(img):
+    """Apply sepia tone effect to an image."""
+    width, height = img.size
+    pixels = img.load()
+    for y in range(height):
+        for x in range(width):
+            r, g, b = pixels[x, y]
+            tr = int(0.393 * r + 0.769 * g + 0.189 * b)
+            tg = int(0.349 * r + 0.686 * g + 0.168 * b)
+            tb = int(0.272 * r + 0.534 * g + 0.131 * b)
+            tr = min(255, tr)
+            tg = min(255, tg)
+            tb = min(255, tb)
+            pixels[x, y] = (tr, tg, tb)
+    return img
+def apply_vintage_effect_direct(img):
+    """Apply a vintage effect combining multiple filters."""
+    contrast_enhancer = ImageEnhance.Contrast(img)
+    img = contrast_enhancer.enhance(0.8)
+    brightness_enhancer = ImageEnhance.Brightness(img)
+    img = brightness_enhancer.enhance(1.1)
+    img = apply_sepia_filter_direct(img)
+    img = img.filter(ImageFilter.GaussianBlur(radius=0.5))
+    return img

src/utils/change_format.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from PIL import Image
+from io import BytesIO
+import requests
+import base64
+from typing import Union
+def change_format(image: Union[str, BytesIO], target_format: str) -> str:
+    """
+    Change the format of an image from a URL to the specified target format.
+    Args:
+        image_url: The URL of the input image.
+        target_format: The desired output format (e.g., 'JPEG', 'PNG').
+    Returns:
+        The image converted to the target format as a base64-encoded string.
+    """
+    if not isinstance(image, BytesIO):
+        response = requests.get(image, timeout=30)
+        response.raise_for_status()
+        # Open the image from bytes
+        img = Image.open(BytesIO(response.content))
+    # Convert the image to the target format
+        output = BytesIO()
+        img.save(output, format=target_format)
+        output.seek(0)
+        # Convert to base64 string for JSON serialization
+        encoded_image = base64.b64encode(output.getvalue()).decode('utf-8')
+        return encoded_image  # Return base64 encoded string that can be serialized to JSON
+    else:
+        img = Image.open(image)
+        output = BytesIO()
+        img.save(output, format=target_format)
+        output.seek(0)
+        # Convert to base64 string for JSON serialization
+        encoded_image = base64.b64encode(output.getvalue()).decode('utf-8')
+        return encoded_image

src/utils/compress.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from PIL import Image
+import os
+from typing import Literal, Optional
+def compress_image_file(
+    input_path: str,
+    output_path: str,
+    quality: int = 85,
+    format: Literal["JPEG", "PNG", "WEBP"] = "JPEG",
+    max_width: Optional[int] = None,
+    max_height: Optional[int] = None
+) -> str:
+    """
+    Compress an image file from disk.
+    """
+    try:
+        if not os.path.splitext(output_path)[1]:
+            extension_map = {"JPEG": ".jpg", "PNG": ".png", "WEBP": ".webp"}
+            output_path = output_path + extension_map[format]
+        with Image.open(input_path) as img:
+            if format == "JPEG" and img.mode in ("RGBA", "P"):
+                img = img.convert("RGB")
+            if max_width or max_height:
+                img.thumbnail((max_width or img.width, max_height or img.height), Image.Resampling.LANCZOS)
+            save_kwargs = {"format": format, "optimize": True}
+            if format in ["JPEG", "WEBP"]:
+                save_kwargs["quality"] = quality
+            img.save(output_path, **save_kwargs)
+        original_size = os.path.getsize(input_path) / 1024 / 1024
+        compressed_size = os.path.getsize(output_path) / 1024 / 1024
+        reduction = (1 - compressed_size/original_size) * 100
+        return f"✅ Compressed successfully!\nOriginal: {original_size:.2f}MB → Compressed: {compressed_size:.2f}MB\nReduction: {reduction:.1f}%"
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+def compress_image_memory(image: Image.Image, quality: int = 80, format: str = "JPEG") -> Image.Image:
+    """
+    Compress an image in memory and return the compressed image.
+    """
+    if format == "JPEG" and image.mode in ("RGBA", "P"):
+        image = image.convert("RGB")
+    output = BytesIO()
+    save_kwargs = {"format": format, "optimize": True}
+    if format in ["JPEG", "WEBP"]:
+        save_kwargs["quality"] = quality
+    image.save(output, **save_kwargs)
+    output.seek(0)
+    return Image.open(output)

src/utils/describe.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import os
+import base64
+import requests
+from pathlib import Path
+from openai import OpenAI
+from urllib.parse import urlparse
+from dotenv import load_dotenv
+def describe_image(image_path: str) -> str:
+    """
+    Generate a description of the image at the given path or URL.
+    Args:
+        image_path: Path to local image file OR URL to image
+    Returns:
+        A string description of the image """
+    load_dotenv()
+    # Check if API key is available
+    api_key = os.getenv("NEBIUS_API_KEY")
+    if not api_key:
+        return "Error: NEBIUS_API_KEY environment variable not set"
+    try:
+        # Determine if it's a URL or local file path
+        parsed = urlparse(image_path)
+        is_url = bool(parsed.scheme and parsed.netloc)
+        if is_url:
+            # Handle URL
+            print(f"📡 Downloading image from URL: {image_path}")
+            response = requests.get(image_path, timeout=30)
+            response.raise_for_status()
+            image_data = response.content
+            # Determine content type from response headers
+            content_type = response.headers.get('content-type', '')
+            if 'image' not in content_type:
+                return f"Error: URL does not appear to contain an image. Content-Type: {content_type}"
+        else:
+            # Handle local file
+            image_path = Path(image_path)
+            if not image_path.exists():
+                return f"Error: Local file not found: {image_path}"
+            # Check if it's an image file
+            valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'}
+            if image_path.suffix.lower() not in valid_extensions:
+                return f"Error: Unsupported file type '{image_path.suffix}'. Supported: {valid_extensions}"
+            print(f"📁 Reading local image: {image_path}")
+            with open(image_path, "rb") as f:
+                image_data = f.read()
+        # Encode image to base64
+        base64_image = base64.b64encode(image_data).decode('utf-8')
+        # Create OpenAI client
+        client = OpenAI(
+            base_url="https://api.studio.nebius.com/v1/",
+            api_key=api_key
+        )
+        # Make API call with proper vision format
+        response = client.chat.completions.create(
+            model="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a helpful assistant that provides detailed descriptions of images. Focus on the main subjects, colors, composition, and any notable details."
+                },
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Please provide a detailed description of this image."
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{base64_image}"
+                            }
+                        }
+                    ]
+                }
+            ],
+            max_tokens=500
+        )
+        description = response.choices[0].message.content.strip()
+        return description
+    except requests.RequestException as e:
+        return f"Error downloading image from URL: {str(e)}"
+    except FileNotFoundError:
+        return f"Error: File not found: {image_path}"
+    except Exception as e:
+        error_msg = str(e)
+        if "vision" in error_msg.lower() or "image" in error_msg.lower():
+            return f"Error: This model may not support vision capabilities. Try a vision-enabled model. Details: {error_msg}"
+        elif "401" in error_msg or "unauthorized" in error_msg.lower():
+            return "Error: Invalid API key or insufficient permissions"
+        elif "rate" in error_msg.lower() or "quota" in error_msg.lower():
+            return f"Error: API rate limit or quota exceeded: {error_msg}"
+        else:
+            return f"Error processing image: {error_msg}"

src/utils/generate_image.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import os
+import base64
+from typing import Dict, Any
+from openai import OpenAI
+def generate_image(
+    prompt: str,
+    width: int = 1024,
+    height: int = 1024,
+    num_inference_steps: int = 28,
+    negative_prompt: str = "",
+    seed: int = -1
+) -> Dict[str, Any]:
+    """
+    Generate an image using Nebius API.
+    Args:
+        prompt: Text prompt for image generation
+        output_path: Path where to save the generated image
+        width: Image width
+        height: Image height
+        num_inference_steps: Number of inference steps
+        negative_prompt: Negative prompt for generation
+        seed: Random seed (-1 for random)
+    Returns:
+        Dictionary with result information
+    """
+    try:
+        client = OpenAI(
+            base_url="https://api.studio.nebius.com/v1/",
+            api_key=os.environ.get("NEBIUS_API_KEY")
+        )
+        response = client.images.generate(
+            model="black-forest-labs/flux-dev",
+            response_format="b64_json",
+            extra_body={
+                "response_extension": "png",
+                "width": width,
+                "height": height,
+                "num_inference_steps": num_inference_steps,
+                "negative_prompt": negative_prompt,
+                "seed": seed
+            },
+            prompt=prompt
+        )
+        image_data = base64.b64decode(response.data[0].b64_json)
+        return {
+            "success": True,
+            "message": "Image generated successfully",
+            "prompt": prompt,
+            "b64": image_data,
+            "generation_params": {
+                "width": width,
+                "height": height,
+                "num_inference_steps": num_inference_steps,
+                "negative_prompt": negative_prompt,
+                "seed": seed
+            }
+        }
+    except Exception as e:
+        if "NEBIUS_API_KEY" in str(e) or not os.environ.get("NEBIUS_API_KEY"):
+            return {
+                "success": False,
+                "error": "NEBIUS_API_KEY environment variable not set",
+                "output_path": None,
+                'user': os.environ.get("USER")
+            }
+        return {
+            "success": False,
+            "error": f"Failed to generate image: {str(e)}",
+            "output_path": None
+        }

src/utils/remove_background.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import requests
+from typing import Optional, Dict, Any, Union
+import os
+import rembg
+import numpy as np
+from PIL import Image
+import io
+import base64
+import re
+def remove_background(
+    image_input: Union[str, bytes, np.ndarray, Image.Image],
+    model_name: str = "u2net"
+) -> Dict[str, Any]:
+    """
+    Remove background from an image.
+    Args:
+        image_input: Can be one of:
+            - URL string
+            - Data URL string (base64 encoded)
+            - Image bytes
+            - NumPy array
+            - PIL Image
+        model_name: Background removal model to use
+    Returns:
+        Dictionary with result information and processed image data
+    """
+    try:
+        # Initialize session
+        session = rembg.new_session(model_name=model_name)
+        # Handle different input types
+        if isinstance(image_input, str):
+            if image_input.startswith('http://') or image_input.startswith('https://'):
+                # If input is a URL, download the image
+                response = requests.get(image_input, timeout=30)
+                response.raise_for_status()
+                input_data = response.content
+                source_info = f"URL: {image_input}"
+            elif image_input.startswith('data:'):
+                # If input is a data URL (base64 encoded string)
+                # Extract the base64 part after the comma
+                base64_data = re.sub('^data:image/.+;base64,', '', image_input)
+                input_data = base64.b64decode(base64_data)
+                source_info = "data URL"
+            else:
+                return {
+                    "success": False,
+                    "error": f"Unsupported string input format: {image_input[:30]}...",
+                    "image_data": None
+                }
+        elif isinstance(image_input, bytes):
+            # If input is bytes, use directly
+            input_data = image_input
+            source_info = "image bytes"
+        elif isinstance(image_input, np.ndarray):
+            # If input is numpy array, convert to bytes
+            pil_img = Image.fromarray(image_input)
+            buffer = io.BytesIO()
+            pil_img.save(buffer, format="PNG")
+            input_data = buffer.getvalue()
+            source_info = "numpy array"
+        elif isinstance(image_input, Image.Image):
+            # If input is PIL Image, convert to bytes
+            buffer = io.BytesIO()
+            image_input.save(buffer, format="PNG")
+            input_data = buffer.getvalue()
+            source_info = "PIL Image"
+        else:
+            return {
+                "success": False,
+                "error": f"Unsupported input type: {type(image_input)}",
+                "image_data": None
+            }
+        # Remove background
+        output_data = rembg.remove(input_data, session=session)
+        return {
+            "success": True,
+            "message": f"Background removed from {source_info} using {model_name} model",
+            "image_data": output_data,
+            "model_used": model_name
+        }
+    except requests.RequestException as e:
+        return {
+            "success": False,
+            "error": f"Failed to download image: {str(e)}",
+            "image_data": None
+        }
+    except Exception as e:
+        return {
+            "success": False,
+            "error": f"Failed to process image: {str(e)}",
+            "image_data": None
+        }

src/utils/resize_image.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from PIL import Image
+from io import BytesIO
+import requests
+import base64
+from typing import Union, Tuple
+def resize_image(image_input: Union[str, BytesIO], target_size: Tuple[int, int], return_format: str = "base64") -> str:
+    """
+    Resize an image to the target size while maintaining aspect ratio.
+    Args:
+        image_input: URL, file path, base64 string, or BytesIO object
+        target_size: Tuple (width, height) for the target size
+        return_format: Format to return the image in ("base64" or "pil")
+    Returns:
+        Base64 encoded string of the resized image or PIL Image object
+    """
+    # Convert input to PIL Image
+    if isinstance(image_input, str):
+        if image_input.startswith(('http://', 'https://')):
+            # It's a URL
+            response = requests.get(image_input, timeout=10)
+            response.raise_for_status()
+            image = Image.open(BytesIO(response.content))
+        elif image_input.startswith('data:image'):
+            # It's a base64 data URI
+            base64_data = image_input.split(',')[1]
+            image = Image.open(BytesIO(base64.b64decode(base64_data)))
+        elif ';base64,' not in image_input and len(image_input) > 500:
+            # Likely a raw base64 string
+            image = Image.open(BytesIO(base64.b64decode(image_input)))
+        else:
+            # Assume it's a file path
+            image = Image.open(image_input)
+    elif isinstance(image_input, BytesIO):
+        image = Image.open(image_input)
+    else:
+        raise ValueError("Unsupported image input type")
+    # Calculate the aspect ratio
+    aspect_ratio = min(target_size[0] / image.width, target_size[1] / image.height)
+    # Calculate new size
+    new_size = (int(image.width * aspect_ratio), int(image.height * aspect_ratio))
+    # Resize the image using the proper resampling filter
+    resized_image = image.resize(new_size, Image.LANCZOS)
+    # Return in requested format
+    if return_format.lower() == "base64":
+        buffer = BytesIO()
+        resized_image.save(buffer, format="PNG")
+        return base64.b64encode(buffer.getvalue()).decode('utf-8')
+    else:
+        return resized_image

src/utils/visualize_image.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import base64
+from PIL import Image
+from io import BytesIO
+def visualize_base64_image(base64_string:str):
+    """
+    Visualize a base64-encoded image string.
+    Args:
+        base64_string: The base64-encoded image string.
+    """
+    # Decode the base64 string back to binary
+    image_data = base64.b64decode(base64_string)
+    # Create an image from the binary data
+    img = Image.open(BytesIO(image_data))
+    img.show()

src/utils/watermark.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from PIL import Image, ImageDraw, ImageFont
+import os
+from typing import Dict, Any
+import cv2
+import numpy as np
+def add_watermark(image: Image.Image, watermark_text: str, opacity: float = 0.5) -> Image.Image:
+    """
+    Add a semi-transparent text watermark directly to a PIL Image.
+    Args:
+        image: PIL Image object to watermark
+        watermark_text: Text to use as watermark
+        opacity: Opacity of the watermark (0.1-1.0)
+    Returns:
+        PIL Image with watermark added
+    """
+    from PIL import ImageDraw, ImageFont
+    overlay = Image.new('RGBA', image.size, (255, 255, 255, 0))
+    draw = ImageDraw.Draw(overlay)
+    try:
+        font_size = min(image.width, image.height) // 20
+        font = ImageFont.truetype("arial.ttf", font_size)
+    except:
+        font = ImageFont.load_default()
+    bbox = draw.textbbox((0, 0), watermark_text, font=font)
+    text_width = bbox[2] - bbox[0]
+    text_height = bbox[3] - bbox[1]
+    x = (image.width - text_width) // 2
+    y = (image.height - text_height) // 2
+    alpha_value = int(255 * opacity)
+    text_color = (255, 255, 255, alpha_value)
+    shadow_color = (0, 0, 0, int(alpha_value * 0.5))
+    draw.text((x-2, y-2), watermark_text, fill=shadow_color, font=font)
+    draw.text((x, y), watermark_text, fill=text_color, font=font)
+    watermarked = Image.alpha_composite(image.convert('RGBA'), overlay)
+    return watermarked.convert('RGB')
+def remove_watermark(image_path: str, alpha: float = 2.0, beta: float = -160) -> Dict[str, Any]:
+    """
+    Attempt to remove watermarks from an image using contrast and brightness adjustment.
+    Args:
+        image_path: The path to the input image file.
+        alpha: Contrast control (1.0-3.0, default 2.0). Higher values increase contrast.
+        beta: Brightness control (-255 to 255, default -160). Negative values decrease brightness.
+    Returns:
+        A dictionary containing success status, file paths, and operation details.
+        On success: success=True, input_path, output_path, output_size_bytes, alpha, beta, message.
+        On failure: success=False, error message, input_path, output_path=None.
+    """
+    try:
+        img = cv2.imread(image_path)
+        if img is None:
+            raise ValueError("Could not load image")
+        new = alpha * img + beta
+        new = np.clip(new, 0, 255).astype(np.uint8)
+        base_dir = os.path.dirname(image_path)
+        base_name, ext = os.path.splitext(os.path.basename(image_path))
+        new_filename = f"{base_name}_cleaned{ext}"
+        new_path = os.path.join(base_dir, new_filename)
+        cv2.imwrite(new_path, new)
+        output_size = os.path.getsize(new_path)
+        return {
+            "success": True,
+            "message": "Watermark removal attempted successfully",
+            "input_path": image_path,
+            "output_path": new_path,
+            "output_size_bytes": output_size,
+            "alpha": alpha,
+            "beta": beta
+        }
+    except Exception as e:
+        return {
+            "success": False,
+            "error": str(e),
+            "input_path": image_path,
+            "output_path": None
+        }