Spaces:
Sleeping
Sleeping
File size: 1,453 Bytes
9089af6 c7a656d 9089af6 2376bae 9089af6 2376bae 9089af6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# app/embed_and_store.py
import os
from app.embed import get_image_embedding
from app.chroma_utils import add_embedding
# Create absolute paths for data directories
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
IMAGE_DIR = os.path.join(base_dir, "data", "images")
COLLECTION_NAME = "pdf_images"
def embed_all_images(image_path=None):
"""
Embed and store images in ChromaDB.
Args:
image_path: Optional path to a specific image. If None, all images in IMAGE_DIR will be processed.
"""
if image_path:
print(f"🔍 Embedding: {os.path.basename(image_path)}")
emb = get_image_embedding(image_path)
if emb is not None:
add_embedding(COLLECTION_NAME, image_path, emb)
print(f"✅ Stored {os.path.basename(image_path)} in ChromaDB.")
else:
print("❌ Embedding failed.")
else:
for fname in os.listdir(IMAGE_DIR):
if fname.endswith(".png") or fname.endswith(".jpg"):
path = os.path.join(IMAGE_DIR, fname)
print(f"🔍 Embedding: {fname}")
emb = get_image_embedding(path)
if emb is not None:
add_embedding(COLLECTION_NAME, path, emb)
print(f"✅ Stored {fname} in ChromaDB.")
else:
print(f"❌ Failed to embed {fname}")
if __name__ == "__main__":
embed_all_images()
|