Spaces:

pimcore
/

local-inference-service

Running

App Files Files

xet

Community

fashxp commited on Aug 28

Commit

8f80642

1 Parent(s): afab4d9

added embedding endpoints

Browse files

Files changed (5) hide show

Dockerfile +8 -2
docker-compose.yaml +15 -2
requirements.txt +1 -0
src/embeddings.py +356 -0
src/main.py +193 -1

Dockerfile CHANGED Viewed

@@ -4,14 +4,20 @@ RUN useradd -m -u 1000 user
 USER user
 ENV HOME=/home/user \
-	PATH=/home/user/.local/bin:$PATH
 WORKDIR $HOME/app
 COPY --chown=user requirements.txt requirements.txt
-RUN pip install --upgrade -r requirements.txt
 COPY --chown=user . .
 CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]

 USER user
 ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH \
+	PYTHONDONTWRITEBYTECODE=1 \
+	PYTHONUNBUFFERED=1
 WORKDIR $HOME/app
+# Copy requirements first for better caching
 COPY --chown=user requirements.txt requirements.txt
+# Install dependencies with caching
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir --user -r requirements.txt
+# Copy application code
 COPY --chown=user . .
 CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]

docker-compose.yaml CHANGED Viewed

@@ -2,14 +2,27 @@ services:
   server:
     build:
       context: .
     ports:
       - 7860:7860
     develop:
       watch:
         - action: rebuild
-          path: .
     volumes:
       - python-cache:/home/user/.cache
 volumes:
-  python-cache:

   server:
     build:
       context: .
+      # Enable BuildKit for better caching
+      cache_from:
+        - python:3.9
     ports:
       - 7860:7860
     develop:
       watch:
+        # Only rebuild on requirements.txt changes, sync code changes otherwise
         - action: rebuild
+          path: ./requirements.txt
+        - action: sync
+          path: ./src
+          target: /home/user/app/src
+        - action: sync
+          path: ./README.md
+          target: /home/user/app/README.md
     volumes:
       - python-cache:/home/user/.cache
+      # Cache pip packages
+      - pip-cache:/home/user/.cache/pip
 volumes:
+  python-cache:
+  pip-cache:

requirements.txt CHANGED Viewed

@@ -6,4 +6,5 @@ sentencepiece
 sacremoses
 torch
 pillow
 # Optional dependencies for specific features

 sacremoses
 torch
 pillow
+protobuf
 # Optional dependencies for specific features

src/embeddings.py ADDED Viewed

	@@ -0,0 +1,356 @@

+# -------------------------------------------------------------------
+# This source file is available under the terms of the
+# Pimcore Open Core License (POCL)
+# Full copyright and license information is available in
+# LICENSE.md which is distributed with this source code.
+#
+#  @copyright  Copyright (c) Pimcore GmbH (https://www.pimcore.com)
+#  @license    Pimcore Open Core License (POCL)
+# -------------------------------------------------------------------
+import torch
+import base64
+import io
+import logging
+from PIL import Image
+from pydantic import BaseModel
+from fastapi import Request, HTTPException
+import json
+from typing import Optional, Union, Dict, Any
+from transformers import AutoProcessor, AutoModel
+class EmbeddingRequest(BaseModel):
+    inputs: str
+    parameters: Optional[dict] = None
+class BaseEmbeddingTaskService:
+    """Base class for embedding services with common functionality"""
+    def __init__(self, logger: logging.Logger):
+        self._logger = logger
+        self._model_cache = {}
+        self._processor_cache = {}
+    async def get_embedding_request(self, request: Request) -> EmbeddingRequest:
+        """Parse request body into EmbeddingRequest"""
+        content_type = request.headers.get("content-type", "")
+        if content_type.startswith("application/json"):
+            data = await request.json()
+            return EmbeddingRequest(**data)
+        if content_type.startswith("application/x-www-form-urlencoded"):
+            raw = await request.body()
+            try:
+                data = json.loads(raw)
+                return EmbeddingRequest(**data)
+            except Exception:
+                try:
+                    data = json.loads(raw.decode("utf-8"))
+                    return EmbeddingRequest(**data)
+                except Exception:
+                    raise HTTPException(status_code=400, detail="Invalid request body")
+        raise HTTPException(status_code=400, detail="Unsupported content type")
+    def _get_device(self) -> torch.device:
+        """Get the appropriate device (GPU if available, otherwise CPU)"""
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self._logger.info(f"Using device: {device}")
+        return device
+    def _load_processor(self, model_name: str):
+        """Load and cache processor for the model using AutoProcessor"""
+        if model_name not in self._processor_cache:
+            try:
+                self._processor_cache[model_name] = AutoProcessor.from_pretrained(model_name)
+                self._logger.info(f"Loaded processor for model: {model_name}")
+            except Exception as e:
+                self._logger.error(f"Failed to load processor for model '{model_name}': {str(e)}")
+                raise HTTPException(
+                    status_code=404,
+                    detail=f"Processor for model '{model_name}' could not be loaded: {str(e)}"
+                )
+        return self._processor_cache[model_name]
+    def _load_model(self, model_name: str, cache_suffix: str = ""):
+        """Load and cache model using AutoModel"""
+        cache_key = f"{model_name}{cache_suffix}"
+        if cache_key not in self._model_cache:
+            try:
+                device = self._get_device()
+                model = AutoModel.from_pretrained(model_name)
+                model.to(device)
+                self._model_cache[cache_key] = model
+                self._logger.info(f"Loaded model: {model_name} on {device}")
+            except Exception as e:
+                self._logger.error(f"Failed to load model '{model_name}': {str(e)}")
+                raise HTTPException(
+                    status_code=404,
+                    detail=f"Model '{model_name}' could not be loaded: {str(e)}"
+                )
+        return self._model_cache[cache_key]
+    async def get_embedding_vector_size(self, model_name: str) -> dict:
+        """Get the vector size of embeddings for a given model"""
+        try:
+            # Load the model to get its configuration
+            model = self._load_model(model_name)
+            # Try to get the embedding dimension from the model configuration
+            used_attribute = None
+            if hasattr(model.config, 'hidden_size'):
+                vector_size = model.config.hidden_size
+                used_attribute = "hidden_size"
+            elif hasattr(model.config, 'projection_dim'):
+                vector_size = model.config.projection_dim
+                used_attribute = "projection_dim"
+            elif hasattr(model.config, 'd_model'):
+                vector_size = model.config.d_model
+                used_attribute = "d_model"
+            elif hasattr(model.config, 'text_config') and hasattr(model.config.text_config, 'hidden_size'):
+                vector_size = model.config.text_config.hidden_size
+                used_attribute = "text_config.hidden_size"
+            elif hasattr(model.config, 'vision_config') and hasattr(model.config.vision_config, 'hidden_size'):
+                vector_size = model.config.vision_config.hidden_size
+                used_attribute = "vision_config.hidden_size"
+            else:
+                # If we can't determine from config, we'll need to run a dummy inference
+                raise AttributeError("Could not determine vector size from model configuration")
+            self._logger.info(f"Model {model_name} has embedding vector size: {vector_size}")
+            return {
+                "model_name": model_name,
+                "vector_size": vector_size,
+                "config_attribute_used": used_attribute
+            }
+        except Exception as e:
+            self._logger.error(f"Failed to get vector size for model '{model_name}': {str(e)}")
+            raise HTTPException(
+                status_code=404,
+                detail=f"Could not determine vector size for model '{model_name}': {str(e)}"
+            )
+    def _extract_embeddings(self, model_output, model_name: str) -> torch.Tensor:
+        """Extract embeddings from model output with fallback strategies"""
+        # Try different embedding extraction methods in order of preference
+        # 1. Check for pooler_output (most common)
+        if hasattr(model_output, 'pooler_output') and model_output.pooler_output is not None:
+            self._logger.debug(f"Using pooler_output for {model_name}")
+            return model_output.pooler_output
+        # 2. Check for last_hidden_state and pool it
+        if hasattr(model_output, 'last_hidden_state') and model_output.last_hidden_state is not None:
+            self._logger.debug(f"Using pooled last_hidden_state for {model_name}")
+            # Mean pooling over sequence dimension
+            return model_output.last_hidden_state.mean(dim=1)
+        # 3. Check for image_embeds (CLIP-style models)
+        if hasattr(model_output, 'image_embeds') and model_output.image_embeds is not None:
+            self._logger.debug(f"Using image_embeds for {model_name}")
+            return model_output.image_embeds
+        # 4. Check for text_embeds (CLIP-style models)
+        if hasattr(model_output, 'text_embeds') and model_output.text_embeds is not None:
+            self._logger.debug(f"Using text_embeds for {model_name}")
+            return model_output.text_embeds
+        # 5. Fallback: try to use the output directly if it's a tensor
+        if isinstance(model_output, torch.Tensor):
+            self._logger.debug(f"Using direct tensor output for {model_name}")
+            return model_output
+        # 6. Last resort: check if output is a tuple and use the first element
+        if isinstance(model_output, tuple) and len(model_output) > 0:
+            self._logger.debug(f"Using first element of tuple output for {model_name}")
+            return model_output[0]
+        # If none of the above work, raise an error
+        raise HTTPException(
+            status_code=500,
+            detail=f"Could not extract embeddings from model output for {model_name}. "
+                   f"Available attributes: {dir(model_output) if hasattr(model_output, '__dict__') else 'Unknown'}"
+        )
+class ImageEmbeddingTaskService(BaseEmbeddingTaskService):
+    """Service for generating image embeddings"""
+    def _decode_base64_image(self, base64_string: str) -> Image.Image:
+        """Decode base64 string to PIL Image"""
+        try:
+            # Remove data URL prefix if present
+            if base64_string.startswith('data:image'):
+                base64_string = base64_string.split(',')[1]
+            image_data = base64.b64decode(base64_string)
+            image = Image.open(io.BytesIO(image_data))
+            # Convert to RGB if necessary
+            if image.mode != 'RGB':
+                image = image.convert('RGB')
+            return image
+        except Exception as e:
+            raise HTTPException(status_code=400, detail=f"Invalid image data: {str(e)}")
+    def _generate_image_embeddings(self, image: Image.Image, model, processor, model_name: str) -> list:
+        """Generate embeddings for an image"""
+        device = self._get_device()
+        # Process the image
+        inputs = processor(images=image, return_tensors="pt", padding=True)
+        # Move inputs to the same device as the model
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        # Get the embeddings
+        with torch.no_grad():
+            # Try using specialized methods first for CLIP-like models
+            if hasattr(model, 'get_image_features'):
+                self._logger.debug(f"Using get_image_features for {model_name}")
+                embeddings = model.get_image_features(pixel_values=inputs.get('pixel_values'))
+            elif hasattr(model, 'vision_model'):
+                self._logger.debug(f"Using vision_model for {model_name}")
+                vision_outputs = model.vision_model(**inputs)
+                embeddings = self._extract_embeddings(vision_outputs, model_name)
+            else:
+                self._logger.debug(f"Using full model for {model_name}")
+                outputs = model(**inputs)
+                embeddings = self._extract_embeddings(outputs, model_name)
+        self._logger.info(f"Image embedding shape: {embeddings.shape}")
+        # Move back to CPU before converting to numpy
+        embeddings_array = embeddings.cpu().numpy()
+        return embeddings_array[0].tolist()
+    async def generate_embedding(self, request: Request, model_name: str):
+        """Main method to generate image embeddings"""
+        embedding_request: EmbeddingRequest = await self.get_embedding_request(request)
+        self._logger.info(f"Generating image embedding for model: {model_name}")
+        # Load processor and model using auto-detection
+        processor = self._load_processor(model_name)
+        model = self._load_model(model_name, "_image")
+        # Decode image from base64
+        image = self._decode_base64_image(embedding_request.inputs)
+        try:
+            # Generate embeddings
+            embeddings = self._generate_image_embeddings(image, model, processor, model_name)
+            self._logger.info("Image embedding generation completed")
+            return {"embeddings": embeddings}
+        except Exception as e:
+            self._logger.error(f"Embedding generation failed for model '{model_name}': {str(e)}")
+            raise HTTPException(
+                status_code=500,
+                detail=f"Embedding generation failed: {str(e)}"
+            )
+    async def generate_embedding_from_upload(self, uploaded_file, model_name: str):
+        """Generate image embeddings from uploaded file"""
+        from fastapi import UploadFile
+        self._logger.info(f"Generating image embedding from uploaded file for model: {model_name}")
+        # Validate file type
+        if not uploaded_file.content_type.startswith('image/'):
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid file type: {uploaded_file.content_type}. Only image files are supported."
+            )
+        try:
+            # Read file content
+            file_content = await uploaded_file.read()
+            # Convert to PIL Image
+            image = Image.open(io.BytesIO(file_content)).convert('RGB')
+            # Load processor and model using auto-detection
+            processor = self._load_processor(model_name)
+            model = self._load_model(model_name, "_image")
+            # Generate embeddings
+            embeddings = self._generate_image_embeddings(image, model, processor, model_name)
+            self._logger.info("Image embedding generation from upload completed")
+            return {"embeddings": embeddings}
+        except Exception as e:
+            self._logger.error(f"Embedding generation from upload failed for model '{model_name}': {str(e)}")
+            raise HTTPException(
+                status_code=500,
+                detail=f"Embedding generation from upload failed: {str(e)}"
+            )
+class TextEmbeddingTaskService(BaseEmbeddingTaskService):
+    """Service for generating text embeddings"""
+    def _generate_text_embeddings(self, text: str, model, processor, model_name: str) -> list:
+        """Generate embeddings for text"""
+        device = self._get_device()
+        # Process the text
+        inputs = processor(text=[text], return_tensors="pt", padding=True, truncation=True)
+        # Move inputs to the same device as the model
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        # Get the embeddings
+        with torch.no_grad():
+            # Try using specialized methods first for CLIP-like models
+            if hasattr(model, 'get_text_features'):
+                self._logger.debug(f"Using get_text_features for {model_name}")
+                embeddings = model.get_text_features(
+                    input_ids=inputs.get('input_ids'),
+                    attention_mask=inputs.get('attention_mask')
+                )
+            elif hasattr(model, 'text_model'):
+                self._logger.debug(f"Using text_model for {model_name}")
+                text_outputs = model.text_model(**inputs)
+                embeddings = self._extract_embeddings(text_outputs, model_name)
+            else:
+                self._logger.debug(f"Using full model for {model_name}")
+                outputs = model(**inputs)
+                embeddings = self._extract_embeddings(outputs, model_name)
+        self._logger.info(f"Text embedding shape: {embeddings.shape}")
+        # Move back to CPU before converting to numpy
+        embeddings_array = embeddings.cpu().numpy()
+        return embeddings_array[0].tolist()
+    async def generate_embedding(self, request: Request, model_name: str):
+        """Main method to generate text embeddings"""
+        embedding_request: EmbeddingRequest = await self.get_embedding_request(request)
+        self._logger.info(f"Generating text embedding for: {embedding_request.inputs[:50]}...")
+        # Load processor and model using auto-detection
+        processor = self._load_processor(model_name)
+        model = self._load_model(model_name, "_text")
+        try:
+            # Generate embeddings
+            embeddings = self._generate_text_embeddings(embedding_request.inputs, model, processor, model_name)
+            self._logger.info("Text embedding generation completed")
+            return {"embeddings": embeddings}
+        except Exception as e:
+            self._logger.error(f"Embedding generation failed for model '{model_name}': {str(e)}")
+            raise HTTPException(
+                status_code=500,
+                detail=f"Embedding generation failed: {str(e)}"
+            )

src/main.py CHANGED Viewed

@@ -10,13 +10,14 @@
 import torch
-from fastapi import FastAPI, Path, Request
 import logging
 import sys
 from .translation_task import TranslationTaskService
 from .classification import ClassificationTaskService
 from .text_to_image import TextToImageTaskService
 app = FastAPI(
     title="Pimcore Local Inference Service",
@@ -294,3 +295,194 @@ async def image_to_text(
     model_name = model_name.rstrip("/")
     imageToTextTask = TextToImageTaskService(logger)
     return await imageToTextTask.extract(request, model_name)

 import torch
+from fastapi import FastAPI, Path, Request, File, UploadFile
 import logging
 import sys
 from .translation_task import TranslationTaskService
 from .classification import ClassificationTaskService
 from .text_to_image import TextToImageTaskService
+from .embeddings import ImageEmbeddingTaskService, TextEmbeddingTaskService
 app = FastAPI(
     title="Pimcore Local Inference Service",
     model_name = model_name.rstrip("/")
     imageToTextTask = TextToImageTaskService(logger)
     return await imageToTextTask.extract(request, model_name)
+# =========================
+# Image Embedding Task
+# =========================
+@app.post(
+    "/image-embedding/{model_name:path}",
+    openapi_extra={
+        "requestBody": {
+            "content": {
+                "application/json": {
+                    "example": {
+                        "inputs": "base64_encoded_image_string"
+                    }
+                }
+            }
+        }
+    }
+)
+async def image_embedding(
+    request: Request,
+    model_name: str = Path(
+        ...,
+        description="The name of the image embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr",
+        example="google/siglip-so400m-patch14-384"
+    )
+    ):
+    """
+    Generate embedding vectors for image data.
+    The service supports multiple model types including SigLIP, CLIP, and BLIP models.
+    Returns a dense vector representation of the input image.
+    Returns:
+        list: The embedding vector as a list of float values.
+    """
+    model_name = model_name.rstrip("/")
+    imageEmbeddingTask = ImageEmbeddingTaskService(logger)
+    return await imageEmbeddingTask.generate_embedding(request, model_name)
+# =========================
+# Image Embedding Upload Task (Development/Testing)
+# =========================
+@app.post(
+    "/image-embedding-upload/{model_name:path}",
+    openapi_extra={
+        "requestBody": {
+            "content": {
+                "multipart/form-data": {
+                    "schema": {
+                        "type": "object",
+                        "properties": {
+                            "image": {
+                                "type": "string",
+                                "format": "binary",
+                                "description": "Image file to upload for embedding generation"
+                            }
+                        },
+                        "required": ["image"]
+                    }
+                }
+            }
+        },
+        "responses": {
+            "200": {
+                "description": "Image embedding vector",
+                "content": {
+                    "application/json": {
+                        "example": {
+                            "embeddings": [0.1, -0.2, 0.3, "..."]
+                        }
+                    }
+                }
+            }
+        }
+    }
+)
+async def image_embedding_upload(
+    image: UploadFile = File(..., description="Image file to generate embeddings for"),
+    model_name: str = Path(
+        ...,
+        description="The name of the image embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr",
+        example="google/siglip-so400m-patch14-384"
+    )
+    ):
+    """
+    Generate embedding vectors for uploaded image data (Development/Testing endpoint).
+    This endpoint allows you to upload an image file directly through the Swagger UI
+    for development and testing purposes. The image is processed and converted to
+    embedding vectors using the specified model.
+    Supported formats: JPEG, PNG, GIF, BMP, TIFF
+    The service supports multiple model types including SigLIP, CLIP, and BLIP models.
+    Returns a dense vector representation of the uploaded image.
+    Returns:
+        dict: The embedding vector as a list of float values.
+    """
+    model_name = model_name.rstrip("/")
+    imageEmbeddingTask = ImageEmbeddingTaskService(logger)
+    return await imageEmbeddingTask.generate_embedding_from_upload(image, model_name)
+# =========================
+# Text Embedding Task
+# =========================
+@app.post(
+    "/text-embedding/{model_name:path}",
+    openapi_extra={
+        "requestBody": {
+            "content": {
+                "application/json": {
+                    "example": {
+                        "inputs": "text to embed"
+                    }
+                }
+            }
+        }
+    }
+)
+async def text_embedding(
+    request: Request,
+    model_name: str = Path(
+        ...,
+        description="The name of the text embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr",
+        example="google/siglip-so400m-patch14-384"
+    )
+    ):
+    """
+    Generate embedding vectors for text data.
+    The service supports multiple model types including SigLIP, CLIP, and BLIP models.
+    Returns a dense vector representation of the input text.
+    Returns:
+        list: The embedding vector as a list of float values.
+    """
+    model_name = model_name.rstrip("/")
+    textEmbeddingTask = TextEmbeddingTaskService(logger)
+    return await textEmbeddingTask.generate_embedding(request, model_name)
+# =========================
+# Embedding Vector Size
+# =========================
+@app.get(
+    "/embedding-vector-size/{model_name:path}",
+    openapi_extra={
+        "responses": {
+            "200": {
+                "description": "Vector size information",
+                "content": {
+                    "application/json": {
+                        "example": {
+                            "model_name": "google/siglip-so400m-patch14-384",
+                            "vector_size": 1152,
+                            "config_attribute_used": "hidden_size"
+                        }
+                    }
+                }
+            }
+        }
+    }
+)
+async def embedding_vector_size(
+    model_name: str = Path(
+        ...,
+        description="The name of the embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr",
+        example="google/siglip-so400m-patch14-384"
+    )
+    ):
+    """
+    Get the vector size of embeddings for a given model.
+    This endpoint returns the dimensionality of the embedding vectors that the model produces.
+    Useful for understanding the output format before generating embeddings.
+    Returns:
+        dict: Information about the vector size including model name, vector size, and configuration attribute used.
+    """
+    model_name = model_name.rstrip("/")
+    # We can use either ImageEmbeddingTaskService or TextEmbeddingTaskService as they inherit from the same base class
+    embeddingTask = ImageEmbeddingTaskService(logger)
+    return await embeddingTask.get_embedding_vector_size(model_name)