{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "- Write a Python notebook that does semantic search on the vector database and return top k results (use LangChain). Comment on what you observe." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sentence_transformers import SentenceTransformer\n", "import numpy as np\n", "from tqdm import tqdm\n", "import os\n", "from langchain.vectorstores import Chroma" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Wrapper with embed_documents and embed_query\n", "class SentenceTransformerWrapper:\n", " def __init__(self, model_name):\n", " self.model = SentenceTransformer(model_name)\n", " \n", " def embed_documents(self, texts):\n", " # Convert the list of texts to embeddings\n", " return self.model.encode(texts, show_progress_bar=True).tolist()\n", " \n", " def embed_query(self, text):\n", " # Convert a single query to its embedding\n", " return self.model.encode(text).tolist()\n", "\n", "# Instantiate wrapper with model\n", "embedding_model = SentenceTransformerWrapper('bkai-foundation-models/vietnamese-bi-encoder')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Chroma database\n", "vector_db = Chroma(\n", " persist_directory=\"chroma_db_new\",\n", " embedding=embedding_model # Use your SentenceTransformerWrapper instance\n", ")\n", "\n", "# Test by running a similarity search\n", "query = input(\"Enter your query: \")\n", "results = vector_db.similarity_search(query, k=5)\n", "\n", "# Display the results\n", "print(f\"\\nTop 5 results for query: '{query}'\\n\")\n", "for i, doc in enumerate(results):\n", " print(f\"Result {i+1}:\")\n", " print(f\"Metadata: {doc.metadata}\")\n", " print(f\"Content: {doc.page_content[:50]}...\") # Display a preview of the chunk\n", " print(\"-\" * 50)\n" ] } ], "metadata": { "kernelspec": { "display_name": "phapdienvv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 2 }