""" This script handles document embedding using EmbeddingGemma. This is the entry point for indexing documents. TODO: Wire this to FAISS """ import os from sentence_transformers import SentenceTransformer def embed_documents(path: str, config: dict): try: model = SentenceTransformer(config["embedding"]["model_path"]) except Exception as e: print(f"Error loading model: {str(e)}") model = SentenceTransformer(config["embedding"]["model_path"]) embeddings = [] for fname in os.listdir(path): with open(os.path.join(path, fname), "r", encoding="utf-8") as f: text = f.read() emb = model.encode(text) if emb is not None: embeddings.append((fname, emb)) else: print(f"Embedding failed for {fname}.") print(f"Total embeddings created: {len(embeddings)}") return embeddings # TODO: Save embeddings to disk or vector store