Spaces:
Sleeping
Sleeping
| # app/embed_and_store.py | |
| import os | |
| from app.embed import get_image_embedding | |
| from app.chroma_utils import add_embedding | |
| # Create absolute paths for data directories | |
| base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| IMAGE_DIR = os.path.join(base_dir, "data", "images") | |
| COLLECTION_NAME = "pdf_images" | |
| def embed_all_images(image_path=None): | |
| """ | |
| Embed and store images in ChromaDB. | |
| Args: | |
| image_path: Optional path to a specific image. If None, all images in IMAGE_DIR will be processed. | |
| """ | |
| if image_path: | |
| print(f"🔍 Embedding: {os.path.basename(image_path)}") | |
| emb = get_image_embedding(image_path) | |
| if emb is not None: | |
| add_embedding(COLLECTION_NAME, image_path, emb) | |
| print(f"✅ Stored {os.path.basename(image_path)} in ChromaDB.") | |
| else: | |
| print("❌ Embedding failed.") | |
| else: | |
| for fname in os.listdir(IMAGE_DIR): | |
| if fname.endswith(".png") or fname.endswith(".jpg"): | |
| path = os.path.join(IMAGE_DIR, fname) | |
| print(f"🔍 Embedding: {fname}") | |
| emb = get_image_embedding(path) | |
| if emb is not None: | |
| add_embedding(COLLECTION_NAME, path, emb) | |
| print(f"✅ Stored {fname} in ChromaDB.") | |
| else: | |
| print(f"❌ Failed to embed {fname}") | |
| if __name__ == "__main__": | |
| embed_all_images() | |