{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "f5a0d75d", "metadata": {}, "outputs": [], "source": [ "import chromadb\n", "from llama_index.core import StorageContext\n", "from llama_index.vector_stores.chroma import ChromaVectorStore\n", "# from llama_index.embeddings.fastembed import FastEmbedEmbedding\n", "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n", "from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex\n", "\n", "# embed_model = FastEmbedEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n", "data_dir = r\"knowledge_base\\raw\\classification\"\n", "\n", "documents = SimpleDirectoryReader(str(data_dir)).load_data()\n", "data_path = r\"knowledge_base\\vector\\classification\"\n", "db = chromadb.PersistentClient(path=data_path)" ] }, { "cell_type": "markdown", "id": "b52b6ba8", "metadata": {}, "source": [ "### Storing the data locally" ] }, { "cell_type": "code", "execution_count": null, "id": "348df588", "metadata": {}, "outputs": [], "source": [ "chroma_collection = db.get_or_create_collection(\"classification_db\")\n", "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n", "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n", "index = VectorStoreIndex.from_documents(\n", " documents=documents,\n", " storage_context=storage_context,\n", " show_progress=True,\n", " # embed_model=embed_model\n", ")" ] }, { "cell_type": "markdown", "id": "f7411c03", "metadata": {}, "source": [ "### Loading the locally stored vector index" ] }, { "cell_type": "code", "execution_count": 6, "id": "4d9cbd1b", "metadata": {}, "outputs": [], "source": [ "import chromadb\n", "from llama_index.core import StorageContext\n", "from llama_index.core import VectorStoreIndex\n", "from llama_index.core.retrievers import VectorIndexRetriever\n", "from llama_index.vector_stores.chroma import ChromaVectorStore\n", "# from llama_index.embeddings.fastembed import FastEmbedEmbedding\n", "\n", "# embed_model = FastEmbedEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n", "\n", "data_path = r\"knowledge_base\\vector\\classification\"\n", "db = chromadb.PersistentClient(path=data_path)\n", "chroma_collection = db.get_or_create_collection(\"classification_db\")\n", "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n", "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n", "\n", "index = VectorStoreIndex.from_vector_store(vector_store, storage_context=storage_context)\n", "retriever = VectorIndexRetriever(\n", " index, \n", " # embed_model=embed_model\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "05804310", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "dev", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 5 }