import os from pathlib import Path from haystack import Pipeline from haystack.components.converters import TextFileToDocument from haystack.components.writers import DocumentWriter from haystack_integrations.document_stores.chroma import ChromaDocumentStore def load_data(): file_paths = ["data" / Path(name) for name in os.listdir("data")] # Chroma is used in-memory so we use the same instances in the two pipelines below document_store = ChromaDocumentStore() indexing = Pipeline() indexing.add_component("converter", TextFileToDocument()) indexing.add_component("writer", DocumentWriter(document_store)) indexing.connect("converter", "writer") indexing.run({"converter": {"sources": file_paths}}) return document_store