Update config.py
Browse files
config.py
CHANGED
@@ -14,10 +14,12 @@ class Config:
|
|
14 |
# QDrant Configuration
|
15 |
QDRANT_URL = os.getenv("QDRANT_URL")
|
16 |
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
17 |
-
|
18 |
-
|
19 |
# Embedding configuration
|
20 |
-
EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
|
|
|
|
|
|
21 |
|
22 |
# Text Processing Configuration
|
23 |
CHUNK_SIZE = 512
|
@@ -29,14 +31,16 @@ class Config:
|
|
29 |
STOPWORDS_PATH = "stopwords.txt"
|
30 |
|
31 |
# RAG Configuration
|
32 |
-
TOP_K_RETRIEVAL =
|
33 |
BM25_TOP_K = 20
|
|
|
|
|
34 |
SIMILARITY_THRESHOLD = 0.25
|
35 |
|
36 |
# Reranker Configuration
|
37 |
ENABLE_RERANKING = True
|
38 |
RERANKER_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
39 |
-
RERANKER_TOP_K =
|
40 |
RERANK_BEFORE_RETRIEVAL_TOP_K = 25
|
41 |
USE_SCORE_FUSION = True
|
42 |
RERANKER_FUSION_ALPHA = 0.8
|
@@ -94,4 +98,4 @@ Thông tin tham khảo:
|
|
94 |
|
95 |
Câu hỏi: {question}
|
96 |
|
97 |
-
Trả lời:"""
|
|
|
14 |
# QDrant Configuration
|
15 |
QDRANT_URL = os.getenv("QDRANT_URL")
|
16 |
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
17 |
+
|
|
|
18 |
# Embedding configuration
|
19 |
+
# EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
20 |
+
EMBEDDING_MODEL = "bkai-foundation-models/vietnamese-bi-encoder"
|
21 |
+
|
22 |
+
COLLECTION_NAME = "final_vietnamese_legal_corpus" if EMBEDDING_MODEL == "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" else "bkai_biencoder_vietnamese_legal_corpus"
|
23 |
|
24 |
# Text Processing Configuration
|
25 |
CHUNK_SIZE = 512
|
|
|
31 |
STOPWORDS_PATH = "stopwords.txt"
|
32 |
|
33 |
# RAG Configuration
|
34 |
+
TOP_K_RETRIEVAL = 20
|
35 |
BM25_TOP_K = 20
|
36 |
+
BM25_B = 0.65
|
37 |
+
BM25_K1 = 1.2
|
38 |
SIMILARITY_THRESHOLD = 0.25
|
39 |
|
40 |
# Reranker Configuration
|
41 |
ENABLE_RERANKING = True
|
42 |
RERANKER_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
43 |
+
RERANKER_TOP_K = 20
|
44 |
RERANK_BEFORE_RETRIEVAL_TOP_K = 25
|
45 |
USE_SCORE_FUSION = True
|
46 |
RERANKER_FUSION_ALPHA = 0.8
|
|
|
98 |
|
99 |
Câu hỏi: {question}
|
100 |
|
101 |
+
Trả lời:"""
|