refactor: Remove unused code and dependencies, update embeddings and text splitter modules
Browse files- law-bot.ipynb +8 -7
law-bot.ipynb
CHANGED
|
@@ -11,8 +11,6 @@
|
|
| 11 |
"from langchain.schema import Document\n",
|
| 12 |
"from langchain_community.vectorstores import FAISS\n",
|
| 13 |
"from langchain.schema import Document\n",
|
| 14 |
-
"from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n",
|
| 15 |
-
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
| 16 |
"from langchain_community.retrievers import BM25Retriever\n",
|
| 17 |
"from langchain.retrievers import EnsembleRetriever"
|
| 18 |
]
|
|
@@ -35,29 +33,32 @@
|
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"cell_type": "code",
|
| 38 |
-
"execution_count":
|
| 39 |
"metadata": {},
|
| 40 |
"outputs": [
|
| 41 |
{
|
| 42 |
"name": "stderr",
|
| 43 |
"output_type": "stream",
|
| 44 |
"text": [
|
| 45 |
-
"/Users/
|
| 46 |
" from tqdm.autonotebook import tqdm, trange\n"
|
| 47 |
]
|
| 48 |
}
|
| 49 |
],
|
| 50 |
"source": [
|
| 51 |
-
"
|
|
|
|
| 52 |
"embeddings = HuggingFaceBgeEmbeddings(model_name=\"BAAI/bge-m3\")"
|
| 53 |
]
|
| 54 |
},
|
| 55 |
{
|
| 56 |
"cell_type": "code",
|
| 57 |
-
"execution_count":
|
| 58 |
"metadata": {},
|
| 59 |
"outputs": [],
|
| 60 |
"source": [
|
|
|
|
|
|
|
| 61 |
"# ํ
์คํธ ๋ถํ ๊ธฐ ์ค์ \n",
|
| 62 |
"text_splitter = RecursiveCharacterTextSplitter(\n",
|
| 63 |
" chunk_size=2000,\n",
|
|
@@ -413,7 +414,7 @@
|
|
| 413 |
"name": "python",
|
| 414 |
"nbconvert_exporter": "python",
|
| 415 |
"pygments_lexer": "ipython3",
|
| 416 |
-
"version": "3.
|
| 417 |
}
|
| 418 |
},
|
| 419 |
"nbformat": 4,
|
|
|
|
| 11 |
"from langchain.schema import Document\n",
|
| 12 |
"from langchain_community.vectorstores import FAISS\n",
|
| 13 |
"from langchain.schema import Document\n",
|
|
|
|
|
|
|
| 14 |
"from langchain_community.retrievers import BM25Retriever\n",
|
| 15 |
"from langchain.retrievers import EnsembleRetriever"
|
| 16 |
]
|
|
|
|
| 33 |
},
|
| 34 |
{
|
| 35 |
"cell_type": "code",
|
| 36 |
+
"execution_count": 2,
|
| 37 |
"metadata": {},
|
| 38 |
"outputs": [
|
| 39 |
{
|
| 40 |
"name": "stderr",
|
| 41 |
"output_type": "stream",
|
| 42 |
"text": [
|
| 43 |
+
"/Users/anpigon/Library/Caches/pypoetry/virtualenvs/law-bot-C3zMZhS7-py3.11/lib/python3.11/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 44 |
" from tqdm.autonotebook import tqdm, trange\n"
|
| 45 |
]
|
| 46 |
}
|
| 47 |
],
|
| 48 |
"source": [
|
| 49 |
+
"from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n",
|
| 50 |
+
"\n",
|
| 51 |
"embeddings = HuggingFaceBgeEmbeddings(model_name=\"BAAI/bge-m3\")"
|
| 52 |
]
|
| 53 |
},
|
| 54 |
{
|
| 55 |
"cell_type": "code",
|
| 56 |
+
"execution_count": 3,
|
| 57 |
"metadata": {},
|
| 58 |
"outputs": [],
|
| 59 |
"source": [
|
| 60 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
| 61 |
+
"\n",
|
| 62 |
"# ํ
์คํธ ๋ถํ ๊ธฐ ์ค์ \n",
|
| 63 |
"text_splitter = RecursiveCharacterTextSplitter(\n",
|
| 64 |
" chunk_size=2000,\n",
|
|
|
|
| 414 |
"name": "python",
|
| 415 |
"nbconvert_exporter": "python",
|
| 416 |
"pygments_lexer": "ipython3",
|
| 417 |
+
"version": "3.11.9"
|
| 418 |
}
|
| 419 |
},
|
| 420 |
"nbformat": 4,
|