# ------------------------------------------------------------------- # This source file is available under the terms of the # Pimcore Open Core License (POCL) # Full copyright and license information is available in # LICENSE.md which is distributed with this source code. # # @copyright Copyright (c) Pimcore GmbH (https://www.pimcore.com) # @license Pimcore Open Core License (POCL) # ------------------------------------------------------------------- import torch from fastapi import FastAPI, Path, Request, File, UploadFile import logging import sys from .translation_task import TranslationTaskService from .classification import ClassificationTaskService from .text_to_image import TextToImageTaskService from .embeddings import ImageEmbeddingTaskService, TextEmbeddingTaskService app = FastAPI( title="Pimcore Local Inference Service", description="This services allows HF inference provider compatible inference to models which are not available at HF inference providers.", version="1.0.0" ) logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s') logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) class StreamToLogger(object): def __init__(self, logger, log_level): self.logger = logger self.log_level = log_level self.linebuf = '' def write(self, buf): for line in buf.rstrip().splitlines(): self.logger.log(self.log_level, line.rstrip()) def flush(self): pass sys.stdout = StreamToLogger(logger, logging.INFO) sys.stderr = StreamToLogger(logger, logging.ERROR) @app.get("/gpu_check") async def gpu_check(): """ Check if a GPU is available """ gpu = 'GPU not available' if torch.cuda.is_available(): gpu = 'GPU is available' print("GPU is available") else: print("GPU is not available") return {'success': True, 'gpu': gpu} # ========================= # Translation Task # ========================= @app.post( "/translation/{model_name:path}", openapi_extra={ "requestBody": { "content": { "application/json": { "example": { "inputs": "Hello, world! foo bar", "parameters": {"repetition_penalty": 1.6} } } } } } ) async def translate( request: Request, model_name: str = Path( ..., description="The name of the translation model (e.g. Helsinki-NLP/opus-mt-en-de)", example="Helsinki-NLP/opus-mt-en-de" ) ): """ Execute translation tasks. Returns: list: The translation result(s) as returned by the pipeline. """ model_name = model_name.rstrip("/") translationTaskService = TranslationTaskService(logger) return await translationTaskService.translate(request, model_name) # ========================= # Zero-Shot Image Classification Task # ========================= @app.post( "/zero-shot-image-classification/{model_name:path}", openapi_extra={ "requestBody": { "content": { "application/json": { "example": { "inputs": "base64_encoded_image_string", "parameters": {"candidate_labels": "green, yellow, blue, white, silver"} } } } } } ) async def zero_shot_image_classification( request: Request, model_name: str = Path( ..., description="The name of the zero-shot classification model (e.g., openai/clip-vit-large-patch14-336)", example="openai/clip-vit-large-patch14-336" ) ): """ Execute zero-shot image classification tasks. Returns: list: The classification result(s) as returned by the pipeline. """ model_name = model_name.rstrip("/") zeroShotTask = ClassificationTaskService(logger, 'zero-shot-image-classification') return await zeroShotTask.classify(request, model_name) # ========================= # Image Classification Task # ========================= @app.post( "/image-classification/{model_name:path}", openapi_extra={ "requestBody": { "content": { "application/json": { "example": { "inputs": "base64_encoded_image_string" } } } } } ) async def image_classification( request: Request, model_name: str = Path( ..., description="The name of the image classification model (e.g., pimcore/car-countries-classification)", example="pimcore/car-countries-classification" ) ): """ Execute image classification tasks. Returns: list: The classification result(s) as returned by the pipeline. """ model_name = model_name.rstrip("/") imageTask = ClassificationTaskService(logger, 'image-classification') return await imageTask.classify(request, model_name) # ========================= # Zero-Shot Text Classification Task # ========================= @app.post( "/zero-shot-text-classification/{model_name:path}", openapi_extra={ "requestBody": { "content": { "application/json": { "example": { "inputs": "text to classify", "parameters": {"candidate_labels": "green, yellow, blue, white, silver"} } } } } } ) async def zero_shot_text_classification( request: Request, model_name: str = Path( ..., description="The name of the zero-shot text classification model (e.g., facebook/bart-large-mnli)", example="facebook/bart-large-mnli" ) ): """ Execute zero-shot text classification tasks. Returns: list: The classification result(s) as returned by the pipeline. """ model_name = model_name.rstrip("/") zeroShotTask = ClassificationTaskService(logger, 'zero-shot-classification') return await zeroShotTask.classify(request, model_name) # ========================= # Text Classification Task # ========================= @app.post( "/text-classification/{model_name:path}", openapi_extra={ "requestBody": { "content": { "application/json": { "example": { "inputs": "text to classify" } } } } } ) async def text_classification( request: Request, model_name: str = Path( ..., description="The name of the text classification model (e.g., pimcore/car-class-classification)", example="pimcore/car-class-classification" ) ): """ Execute text classification tasks. Returns: list: The classification result(s) as returned by the pipeline. """ model_name = model_name.rstrip("/") textTask = ClassificationTaskService(logger, 'text-classification') return await textTask.classify(request, model_name) # ========================= # Image to Text Task # ========================= @app.post( "/image-to-text/{model_name:path}", openapi_extra={ "requestBody": { "content": { "multipart/form-data": { "schema": { "type": "object", "properties": { "image": { "type": "string", "format": "binary", "description": "Image file to upload" } }, "required": ["image"] } } } } } ) async def image_to_text( request: Request, model_name: str = Path( ..., description="The name of the image-to-text (e.g., Salesforce/blip-image-captioning-base)", example="Salesforce/blip-image-captioning-base" ) ): """ Execute image-to-text tasks. Returns: list: The generated text as returned by the pipeline. """ model_name = model_name.rstrip("/") imageToTextTask = TextToImageTaskService(logger) return await imageToTextTask.extract(request, model_name) # ========================= # Image Embedding Task # ========================= @app.post( "/image-embedding/{model_name:path}", openapi_extra={ "requestBody": { "content": { "application/json": { "example": { "inputs": "base64_encoded_image_string" } } } } } ) async def image_embedding( request: Request, model_name: str = Path( ..., description="The name of the image embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr", example="google/siglip-so400m-patch14-384" ) ): """ Generate embedding vectors for image data. The service supports multiple model types including SigLIP, CLIP, and BLIP models. Returns a dense vector representation of the input image. Returns: list: The embedding vector as a list of float values. """ model_name = model_name.rstrip("/") imageEmbeddingTask = ImageEmbeddingTaskService(logger) return await imageEmbeddingTask.generate_embedding(request, model_name) # ========================= # Image Embedding Upload Task (Development/Testing) # ========================= @app.post( "/image-embedding-upload/{model_name:path}", openapi_extra={ "requestBody": { "content": { "multipart/form-data": { "schema": { "type": "object", "properties": { "image": { "type": "string", "format": "binary", "description": "Image file to upload for embedding generation" } }, "required": ["image"] } } } }, "responses": { "200": { "description": "Image embedding vector", "content": { "application/json": { "example": { "embeddings": [0.1, -0.2, 0.3, "..."] } } } } } } ) async def image_embedding_upload( image: UploadFile = File(..., description="Image file to generate embeddings for"), model_name: str = Path( ..., description="The name of the image embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr", example="google/siglip-so400m-patch14-384" ) ): """ Generate embedding vectors for uploaded image data (Development/Testing endpoint). This endpoint allows you to upload an image file directly through the Swagger UI for development and testing purposes. The image is processed and converted to embedding vectors using the specified model. Supported formats: JPEG, PNG, GIF, BMP, TIFF The service supports multiple model types including SigLIP, CLIP, and BLIP models. Returns a dense vector representation of the uploaded image. Returns: dict: The embedding vector as a list of float values. """ model_name = model_name.rstrip("/") imageEmbeddingTask = ImageEmbeddingTaskService(logger) return await imageEmbeddingTask.generate_embedding_from_upload(image, model_name) # ========================= # Text Embedding Task # ========================= @app.post( "/text-embedding/{model_name:path}", openapi_extra={ "requestBody": { "content": { "application/json": { "example": { "inputs": "text to embed" } } } } } ) async def text_embedding( request: Request, model_name: str = Path( ..., description="The name of the text embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr", example="google/siglip-so400m-patch14-384" ) ): """ Generate embedding vectors for text data. The service supports multiple model types including SigLIP, CLIP, and BLIP models. Returns a dense vector representation of the input text. Returns: list: The embedding vector as a list of float values. """ model_name = model_name.rstrip("/") textEmbeddingTask = TextEmbeddingTaskService(logger) return await textEmbeddingTask.generate_embedding(request, model_name) # ========================= # Embedding Vector Size # ========================= @app.get( "/embedding-vector-size/{model_name:path}", openapi_extra={ "responses": { "200": { "description": "Vector size information", "content": { "application/json": { "example": { "model_name": "google/siglip-so400m-patch14-384", "vector_size": 1152, "config_attribute_used": "hidden_size" } } } } } } ) async def embedding_vector_size( model_name: str = Path( ..., description="The name of the embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr", example="google/siglip-so400m-patch14-384" ) ): """ Get the vector size of embeddings for a given model. This endpoint returns the dimensionality of the embedding vectors that the model produces. Useful for understanding the output format before generating embeddings. Returns: dict: Information about the vector size including model name, vector size, and configuration attribute used. """ model_name = model_name.rstrip("/") # We can use either ImageEmbeddingTaskService or TextEmbeddingTaskService as they inherit from the same base class embeddingTask = ImageEmbeddingTaskService(logger) return await embeddingTask.get_embedding_vector_size(model_name)