architojha commited on
Commit
be07415
·
1 Parent(s): 6c6841f
Files changed (1) hide show
  1. prepare_kb.ipynb +0 -118
prepare_kb.ipynb DELETED
@@ -1,118 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "f5a0d75d",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "import chromadb\n",
11
- "from llama_index.core import StorageContext\n",
12
- "from llama_index.vector_stores.chroma import ChromaVectorStore\n",
13
- "# from llama_index.embeddings.fastembed import FastEmbedEmbedding\n",
14
- "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
15
- "from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex\n",
16
- "\n",
17
- "# embed_model = FastEmbedEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n",
18
- "data_dir = r\"knowledge_base\\raw\\classification\"\n",
19
- "\n",
20
- "documents = SimpleDirectoryReader(str(data_dir)).load_data()\n",
21
- "data_path = r\"knowledge_base\\vector\\classification\"\n",
22
- "db = chromadb.PersistentClient(path=data_path)"
23
- ]
24
- },
25
- {
26
- "cell_type": "markdown",
27
- "id": "b52b6ba8",
28
- "metadata": {},
29
- "source": [
30
- "### Storing the data locally"
31
- ]
32
- },
33
- {
34
- "cell_type": "code",
35
- "execution_count": null,
36
- "id": "348df588",
37
- "metadata": {},
38
- "outputs": [],
39
- "source": [
40
- "chroma_collection = db.get_or_create_collection(\"classification_db\")\n",
41
- "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
42
- "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
43
- "index = VectorStoreIndex.from_documents(\n",
44
- " documents=documents,\n",
45
- " storage_context=storage_context,\n",
46
- " show_progress=True,\n",
47
- " # embed_model=embed_model\n",
48
- ")"
49
- ]
50
- },
51
- {
52
- "cell_type": "markdown",
53
- "id": "f7411c03",
54
- "metadata": {},
55
- "source": [
56
- "### Loading the locally stored vector index"
57
- ]
58
- },
59
- {
60
- "cell_type": "code",
61
- "execution_count": 6,
62
- "id": "4d9cbd1b",
63
- "metadata": {},
64
- "outputs": [],
65
- "source": [
66
- "import chromadb\n",
67
- "from llama_index.core import StorageContext\n",
68
- "from llama_index.core import VectorStoreIndex\n",
69
- "from llama_index.core.retrievers import VectorIndexRetriever\n",
70
- "from llama_index.vector_stores.chroma import ChromaVectorStore\n",
71
- "# from llama_index.embeddings.fastembed import FastEmbedEmbedding\n",
72
- "\n",
73
- "# embed_model = FastEmbedEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n",
74
- "\n",
75
- "data_path = r\"knowledge_base\\vector\\classification\"\n",
76
- "db = chromadb.PersistentClient(path=data_path)\n",
77
- "chroma_collection = db.get_or_create_collection(\"classification_db\")\n",
78
- "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
79
- "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
80
- "\n",
81
- "index = VectorStoreIndex.from_vector_store(vector_store, storage_context=storage_context)\n",
82
- "retriever = VectorIndexRetriever(\n",
83
- " index, \n",
84
- " # embed_model=embed_model\n",
85
- ")"
86
- ]
87
- },
88
- {
89
- "cell_type": "code",
90
- "execution_count": null,
91
- "id": "05804310",
92
- "metadata": {},
93
- "outputs": [],
94
- "source": []
95
- }
96
- ],
97
- "metadata": {
98
- "kernelspec": {
99
- "display_name": "dev",
100
- "language": "python",
101
- "name": "python3"
102
- },
103
- "language_info": {
104
- "codemirror_mode": {
105
- "name": "ipython",
106
- "version": 3
107
- },
108
- "file_extension": ".py",
109
- "mimetype": "text/x-python",
110
- "name": "python",
111
- "nbconvert_exporter": "python",
112
- "pygments_lexer": "ipython3",
113
- "version": "3.11.4"
114
- }
115
- },
116
- "nbformat": 4,
117
- "nbformat_minor": 5
118
- }