Spaces:
Sleeping
Sleeping
| import qdrant_client | |
| from langchain.vectorstores import Qdrant | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.schema import Document | |
| import os | |
| import json | |
| import re | |
| from config import EMBEDDING_MODEL, QDRANT_HOST, QDRANT_API_KEY, QDRANT_COLECTION_NAME, EMBEDDING_SIZE | |
| embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL) | |
| def load_json(file_path): | |
| if not os.path.exists(file_path): | |
| raise FileNotFoundError(f"File not found: {file_path}") | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| def load_data(filepath): | |
| products = load_json(filepath) | |
| documents = [] | |
| for p in products: | |
| text = f""" | |
| URL: {p.get("url", "Không có thông tin")}, | |
| Tên: {p.get("product_name", "Không có tên")}, | |
| Mã sản phẩm: {p.get("product_id", "Không có mã")}, | |
| Model: {p.get("model", "Không có model")}, | |
| Giá gốc: {p.get("old_price", "Không có giá")} VND, | |
| Giá KM: {p.get("price", "Không có giá")} VND, | |
| Giảm giá: {p.get("discount_rate", "Không có thông tin")}, | |
| Mô tả: {p.get("description", "Không có mô tả")}, | |
| Thông số kỹ thuật: {json.dumps(p.get("specifications", {}), ensure_ascii=False)} | |
| """ | |
| text = re.sub(r'\n\s*', ' ', text) | |
| documents.append(Document(page_content=text, | |
| metadata={"L1": p.get("L1", "N/A"), | |
| "L2": p.get("L2", "N/A"), | |
| "L3": p.get("L3", "N/A"), | |
| "L4": p.get("L4", "N/A"), | |
| "SKU": p.get("SKU", "N/A"), | |
| "Price": p.get("price", "N/A")})) | |
| return documents | |
| client = qdrant_client.QdrantClient( | |
| QDRANT_HOST, | |
| api_key = QDRANT_API_KEY, | |
| ) | |
| def load_vectordb(): | |
| collections_info = client.get_collections() | |
| if not any(col.name == QDRANT_COLECTION_NAME for col in collections_info.collections): | |
| vectors_config = qdrant_client.http.models.VectorParams( | |
| size=EMBEDDING_SIZE, | |
| distance=qdrant_client.http.models.Distance.COSINE, | |
| ) | |
| client.create_collection( | |
| collection_name= QDRANT_COLECTION_NAME, | |
| vectors_config=vectors_config, | |
| ) | |
| vector_store = Qdrant( | |
| client=client, | |
| collection_name=QDRANT_COLECTION_NAME, | |
| embeddings=embedding_model, | |
| content_payload_key="page_content", | |
| metadata_payload_key="metadata", | |
| ) | |
| vector_store.add_documents(load_data("data/rangdong.json")) | |
| else: | |
| vector_store = Qdrant( | |
| client=client, | |
| collection_name=QDRANT_COLECTION_NAME, | |
| embeddings=embedding_model, | |
| content_payload_key="page_content", | |
| metadata_payload_key="metadata", | |
| ) | |
| return vector_store | |