# app/embed_and_store.py import os from app.embed import get_image_embedding from app.chroma_utils import add_embedding # Create absolute paths for data directories base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) IMAGE_DIR = os.path.join(base_dir, "data", "images") COLLECTION_NAME = "pdf_images" def embed_all_images(image_path=None): """ Embed and store images in ChromaDB. Args: image_path: Optional path to a specific image. If None, all images in IMAGE_DIR will be processed. """ if image_path: print(f"🔍 Embedding: {os.path.basename(image_path)}") emb = get_image_embedding(image_path) if emb is not None: add_embedding(COLLECTION_NAME, image_path, emb) print(f"✅ Stored {os.path.basename(image_path)} in ChromaDB.") else: print("❌ Embedding failed.") else: for fname in os.listdir(IMAGE_DIR): if fname.endswith(".png") or fname.endswith(".jpg"): path = os.path.join(IMAGE_DIR, fname) print(f"🔍 Embedding: {fname}") emb = get_image_embedding(path) if emb is not None: add_embedding(COLLECTION_NAME, path, emb) print(f"✅ Stored {fname} in ChromaDB.") else: print(f"❌ Failed to embed {fname}") if __name__ == "__main__": embed_all_images()